├── .coveragerc
├── .github
    └── workflows
    │   ├── Github-PR-Review-Comment.yml
    │   ├── Github-PR-thread-comment.yml
    │   ├── Github-PR.yml
    │   ├── Github-Push.yml
    │   └── python-package.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CHANGELOG
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── Makefile
├── NOTICE
├── README.md
├── VERSION
├── common
    ├── __init__.py
    ├── aws_service.py
    ├── config.py
    ├── log.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_aws_service.py
    │   ├── test_config.py
    │   ├── test_util.py
    │   └── testdata
    │   │   ├── config.yaml
    │   │   └── config_ext.yaml
    └── util.py
├── config
    ├── external_object_replicator.yaml
    ├── extract.yaml
    ├── replay.yaml
    └── user_config.json
├── core
    ├── README.md
    ├── __init__.py
    ├── extract
    │   ├── __init__.py
    │   ├── cloudwatch_extractor.py
    │   ├── extract.py
    │   ├── extract_parser.py
    │   ├── extractor.py
    │   ├── local_extractor.py
    │   └── s3_extractor.py
    ├── replay
    │   ├── __init__.py
    │   ├── connection_thread.py
    │   ├── connections_parser.py
    │   ├── copy_replacements_parser.py
    │   ├── logo.png
    │   ├── prep.py
    │   ├── replay.py
    │   ├── replayer.py
    │   ├── report_content.yaml
    │   ├── report_gen.py
    │   ├── report_util.py
    │   ├── stats.py
    │   ├── summarizer.py
    │   ├── transactions_parser.py
    │   ├── unload_sys_table.py
    │   ├── unload_system_tables.sql
    │   └── worker.py
    ├── sql
    │   ├── aborted_queries.sql
    │   ├── cluster_level_metrics.sql
    │   ├── latency_distribution.sql
    │   ├── query_distribution.sql
    │   ├── query_metrics.sql
    │   ├── statement_types.sql
    │   ├── sys_external_query_data.sql
    │   ├── sys_load_history.sql
    │   └── sys_query_history.sql
    ├── tests
    │   ├── __init__.py
    │   ├── support_files
    │   │   └── audit_objects.json
    │   ├── test_cloudwatch_extractor.py
    │   ├── test_connection_thread.py
    │   ├── test_connections_parser.py
    │   ├── test_copy_replacements_parser.py
    │   ├── test_extract_parser.py
    │   ├── test_extractor.py
    │   ├── test_filters.py
    │   ├── test_local_extractor.py
    │   ├── test_log_validation.py
    │   ├── test_prep.py
    │   ├── test_report_gen.py
    │   ├── test_s3_extractor.py
    │   ├── test_stats.py
    │   ├── test_summarizer.py
    │   ├── test_transactions_parser.py
    │   ├── test_unload_sys_table.py
    │   └── test_worker.py
    └── util
    │   ├── __init__.py
    │   └── log_validation.py
├── requirements.txt
└── tools
    ├── ExternalObjectReplicator
        ├── README.md
        ├── __init__.py
        ├── external_object_replicator.py
        ├── sql
        │   ├── external_table_query.sql
        │   ├── stl_load_query.sql
        │   └── svl_s3_list.sql
        ├── tests
        │   ├── __init__.py
        │   ├── test_copy_util.py
        │   ├── test_external_object_replicator.py
        │   └── test_glue_util.py
        └── util
        │   ├── copy_util.py
        │   └── glue_util.py
    ├── NodeConfigCompare
        ├── IAM_Permissions.pdf
        ├── README.md
        ├── __init__.py
        ├── bootstrap_scripts
        │   ├── extract_bootstrap.sh
        │   ├── performance_test_bootstrap.sh
        │   └── replay_bootstrap.sh
        ├── configuration
        │   ├── RedshiftConfigTestingStepFunction.json
        │   ├── cloud_formation_template.yaml
        │   ├── parameter_group_config.json
        │   ├── source-wlm.json
        │   └── wlm-concurrency-scaling.json
        ├── images
        │   ├── architecure-serverless.png
        │   ├── batch-cw-log-group.png
        │   ├── redshift-clusters-provisioned.png
        │   ├── redshift-clusters-serverless.png
        │   ├── redshift-clusters.png
        │   ├── statemachine-log.png
        │   └── statemachine.png
        ├── python_scripts
        │   ├── RedshiftConfigTestingLambda.py
        │   ├── RedshiftConfigTestingLambda.py.zip
        │   ├── StartUpLambda.py.zip
        │   ├── boto3-redshift-serverless.zip
        │   ├── create_external_schema.py
        │   ├── python.zip
        │   └── redshift-performance-test.py
        └── sql
        │   ├── ddl.sql
        │   ├── gather_comparison_stats.sql
        │   ├── gather_comparison_stats_serverless.sql
        │   ├── populate_comparison_results.sql
        │   └── test_queries.sql
    ├── ReplayAnalysis
        ├── README.md
        ├── api
        │   ├── __init__.py
        │   ├── app.py
        │   └── utils.py
        ├── gui
        │   ├── package-lock.json
        │   ├── package.json
        │   ├── public
        │   │   ├── index.html
        │   │   ├── manifest.json
        │   │   └── robots.txt
        │   └── src
        │   │   ├── App.js
        │   │   ├── App.test.js
        │   │   ├── components
        │   │       ├── AccessControl.js
        │   │       ├── ReplayAnalysis
        │   │       │   ├── AggregateMetrics.js
        │   │       │   ├── CompareThroughput.js
        │   │       │   ├── QueryLatency.js
        │   │       │   ├── ThroughputBreakdown.js
        │   │       │   ├── TopQueryDeltas.js
        │   │       │   └── TopRunningQueries.js
        │   │       ├── ReplayList.js
        │   │       ├── ReplayOverview.js
        │   │       ├── ReplayValidation
        │   │       │   ├── CopyAgg.js
        │   │       │   ├── CopyDiff.js
        │   │       │   ├── ErrorDistribution.js
        │   │       │   ├── ErrorTable.js
        │   │       │   └── SpectrumDiff.js
        │   │       └── navigation
        │   │       │   ├── GlobalFilters.js
        │   │       │   ├── NavDrawer.js
        │   │       │   └── ToolBar.js
        │   │   ├── helpers
        │   │       ├── PrepareOptions.js
        │   │       └── msFormatter.js
        │   │   ├── index.js
        │   │   ├── pages
        │   │       ├── analysis.js
        │   │       └── home.js
        │   │   ├── reportWebVitals.js
        │   │   └── setupTests.js
        ├── replay_analysis.py
        ├── tests
        │   ├── __init__.py
        │   └── test_replay_analysis.py
        └── util
        │   └── report_gen.py
    └── __init__.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | # .coveragerc to control coverage.py
 2 | [run]
 3 | branch = True
 4 | omit =
 5 |     core/tests/*
 6 |     common/tests/*
 7 |     tools/ExternalObjectReplicator/tests/*
 8 |     tools/ReplayAnalysis/tests/*
 9 | 
10 | command_line = -m unittest discover
11 | 
12 | [html]
13 | directory = coverage_html_report


--------------------------------------------------------------------------------
/.github/workflows/Github-PR-Review-Comment.yml:
--------------------------------------------------------------------------------
 1 | name: Github-PR-Review-Comment
 2 | run-name: ${{ github.triggering_actor }} has left a comment in the PR's Diff
 3 | on: 
 4 |   pull_request_review_comment:
 5 |     types: [created,edited,deleted]
 6 | jobs:
 7 |   notify-pull-request-review-comment:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Send GitHub Action trigger data to Slack workflow
11 |         id: slack
12 |         uses: slackapi/slack-github-action@v1.23.0
13 |         env:
14 |           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_PR_REVIEW_COMMENT }}
15 | 
16 |   
17 | 


--------------------------------------------------------------------------------
/.github/workflows/Github-PR-thread-comment.yml:
--------------------------------------------------------------------------------
 1 | name: Github-PR-Thread-Comment
 2 | run-name: ${{ github.triggering_actor }} has left a comment in the PR's thread
 3 | on: 
 4 |   issue_comment:
 5 |     types: [created,edited,deleted]
 6 | jobs:
 7 |   notify-pull-request-thread-comment:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Send GitHub Action trigger data to Slack workflow
11 |         id: slack
12 |         uses: slackapi/slack-github-action@v1.23.0
13 |         env:
14 |           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_PR_THREAD_COMMENT }}
15 | 
16 | 
17 | 
18 | 
19 | 
20 | 
21 | 
22 | 
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/.github/workflows/Github-PR.yml:
--------------------------------------------------------------------------------
 1 | name: GitHub-PR
 2 | run-name: ${{ github.triggering_actor }} has opened a pull request
 3 | on: 
 4 |   pull_request:
 5 |     types: [opened,reopened,closed,converted_to_draft]
 6 | jobs:
 7 |   notify-pull-request:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Send GitHub Action trigger data to Slack workflow
11 |         id: slack
12 |         uses: slackapi/slack-github-action@v1.23.0
13 |         env:
14 |           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
15 | 


--------------------------------------------------------------------------------
/.github/workflows/Github-Push.yml:
--------------------------------------------------------------------------------
 1 | name: GitHub-Push
 2 | run-name: ${{ github.triggering_actor }} has pushed a commit
 3 | on: 
 4 |   push:
 5 | jobs:
 6 |   notify-pull-request:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |       - name: Send GitHub Action trigger data to Slack workflow
10 |         id: slack
11 |         uses: slackapi/slack-github-action@v1.23.0
12 |         env:
13 |           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL_PUSH }}
14 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "main" ]
 9 |   pull_request:
10 |     branches: [ "main" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.10"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         make setup
31 |     - name: Unit tests for redshift-test-drive
32 |       run: |
33 |         make test


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .DS_Store
  2 | .json
  3 | .sql
  4 | .idea
  5 | *.pdf
  6 | *.csv
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | cover/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | .pybuilder/
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 
141 | # pytype static type analyzer
142 | .pytype/
143 | 
144 | # Cython debug symbols
145 | cython_debug/
146 | 
147 | 
148 | # gui
149 | # dependencies
150 | tools/ReplayAnalysis/gui/node_modules/
151 | /.pnp
152 | .pnp.js
153 | 
154 | # testing
155 | /coverage
156 | 
157 | # production
158 | /build
159 | 
160 | # misc
161 | .env.local
162 | .env.development.local
163 | .env.test.local
164 | .env.production.local
165 | 
166 | npm-debug.log*
167 | yarn-debug.log*
168 | yarn-error.log*
169 | 
170 | api/*.csv
171 | 
172 | statements_to_be_avoided.txt
173 | 
174 | core/*/logs/*
175 | tools/*/logs/*
176 | tools/*/tests/logs/*


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: 'tools/NodeConfigCompare/gui/.*'
 2 | repos:
 3 | - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |   rev: v2.3.0
 5 |   hooks:
 6 |   -   id: check-yaml
 7 |       types: [yaml]
 8 |       exclude: cloud_formation_template.yaml
 9 |   -   id: end-of-file-fixer
10 |       types: [python]
11 |   -   id: trailing-whitespace
12 |       types: [python]
13 | - repo: https://github.com/psf/black
14 |   rev: 22.10.0
15 |   hooks:
16 |   -   id: black
17 |       types: [python]
18 |       args:
19 |         - "--line-length=99"
20 | - repo: https://github.com/pycqa/flake8
21 |   rev: 4.0.1
22 |   hooks:
23 |   - id: flake8
24 |     alias: flake8-check
25 |     stages: [manual]
26 |     types: [python]
27 |     args:
28 |       - "--max-line-length=99"
29 | - repo: local
30 |   hooks:
31 |   - id: unittest
32 |     name: unittest
33 |     verbose: true
34 |     entry: make test_with_coverage
35 |     language: system
36 |     types: [python]
37 |     additional_dependencies: []
38 |     pass_filenames: false
39 | 


--------------------------------------------------------------------------------
/CHANGELOG:
--------------------------------------------------------------------------------
1 | ## RedshiftTestDrive 0.0.1
2 | Migrating RedshiftReplay and NodeConfig


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Variables
 2 | PYTHON = python3
 3 | TEST_DIR = tests
 4 | EXTRACT_DIR =  $(PWD)/core/extract
 5 | CORE_FOLDER = $(PWD)/core
 6 | COMMON_FOLDER = $(PWD)/common
 7 | CONFIG_FOLDER = $(PWD)/config
 8 | REPLAY_DIR = $(PWD)/core/replay
 9 | REPLAY_ANALYSIS_DIR = $(PWD)/tools/ReplayAnalysis
10 | EXTERNAL_OBJECT_REPLICATOR_DIR = $(PWD)/tools/ExternalObjectReplicator
11 | 
12 | 
13 | 
14 | 
15 | # Targets
16 | .PHONY: all run setup clean test
17 | 
18 | extract:
19 | 	export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(CORE_FOLDER)/util:$(COMMON_FOLDER) && $(PYTHON) $(EXTRACT_DIR)/extract.py $(CONFIG_FOLDER)/extract.yaml
20 | 
21 | replay:
22 | 	export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(CORE_FOLDER)/util:$(COMMON_FOLDER) && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(REPLAY_ANALYSIS_DIR)/ && $(PYTHON) $(REPLAY_DIR)/replay.py $(CONFIG_FOLDER)/replay.yaml
23 | 
24 | replay_analysis:
25 | 	export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(REPLAY_ANALYSIS_DIR)/util && $(PYTHON) $(REPLAY_ANALYSIS_DIR)/replay_analysis.py 
26 | 
27 | external_object_replicator:
28 | 	export PYTHONPATH=$(PYTHONPATH):$(EXTERNAL_OBJECT_REPLICATOR_DIR):$(EXTERNAL_OBJECT_REPLICATOR_DIR)/util && $(PYTHON) $(EXTERNAL_OBJECT_REPLICATOR_DIR)/external_object_replicator.py $(CONFIG_FOLDER)/external_object_replicator.yaml
29 | 
30 | 
31 | setup: requirements.txt
32 | 	pip3 install -r requirements.txt
33 | 
34 | test:
35 | 	export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(CORE_FOLDER) && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(COMMON_FOLDER) && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(EXTERNAL_OBJECT_REPLICATOR_DIR)/ && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(REPLAY_ANALYSIS_DIR)/ && pytest ${EXTERNAL_OBJECT_REPLICATOR_DIR} ${CORE_FOLDER} ${COMMON_FOLDER} ${REPLAY_ANALYSIS_DIR}
36 | 
37 | test_with_coverage:
38 | 	export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(CORE_FOLDER) && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(COMMON_FOLDER) && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(EXTERNAL_OBJECT_REPLICATOR_DIR)/ && export PYTHONPATH=$(PYTHONPATH):$(CORE_FOLDER):$(REPLAY_ANALYSIS_DIR)/ && coverage run && coverage html
39 | 
40 | clean:
41 | 	rm -rf __pycache__
42 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Redshift Test Drive
 2 | 
 3 | ## Introduction
 4 | Redshift Test Drive is an amalgamation of Redshift Replay and Node Config. The Redshift Replay consits of Workload Replicator, Replay Analysis and External Object Replicator.
 5 | 
 6 | ## Prerequisites
 7 | Install the following packages before cloning the repository:
 8 | <br>1. Install git
 9 | <br> 
10 | ```
11 | yum install git
12 | ```
13 |  <br>2. Install pip3:
14 |  <br>
15 |  ```
16 |  yum install pip3
17 |  ```
18 |  <br>3. Install make:
19 |  <br>
20 |  ```
21 |  yum install make
22 |  ```
23 | 
24 |  ## Preparations
25 |  01. Clone the git repository using the following command:
26 |  
27 |  ```
28 |  git clone https://github.com/aws/redshift-test-drive
29 |  cd redshift-test-drive/
30 |  export REDSHIFT_TEST_DRIVE_ROOT=$(pwd)
31 |  ```
32 |  02. Create a virtual environment inside the redshift-test-drive directory
33 |  ```
34 |  python3 -m venv testDriveEnv
35 |  source testDriveEnv/bin/activate
36 |  ```
37 |  03. Execute the following command from the root directory to install all the required packages:
38 |  ```
39 |  cd $REDSHIFT_TEST_DRIVE_ROOT && make setup
40 |  ```
41 |  04. Refer to the Table of Content which will point out the different tools and README links of your interest.
42 |  05. Finally after using the utility to run different benchmarks to deactivate virtual environment, run the following
43 |  ```
44 |  deactivate
45 |  ```
46 | 
47 | <br>
48 | 
49 | ### Table of Content
50 | The following table provides links to all tools, locations & READMEs in the repository
51 | 
52 | 
53 | 
54 | | Index |                             Tool                              | Description | README links|
55 | | ----- |:-------------------------------------------------------------:|-------| :-------: |
56 | | 01|                 [Workload Replicator](/core)                  |Workload Replicator is an open source tool which helps customers to mimic their workloads on clusters |[README](/core/README.md)|
57 | | 02|           [Replay Analysis](/tools/ReplayAnalysis)            |Replay Analysis utility enhances auditing in the Workload Replicator process to extract information about the errors that occurred, the validity of the run, and the performance of the replay. This is also a user interface in which customers can choose multiple replays to analyze, validate, and compare using the extracted audit logs.|[README](/tools/ReplayAnalysis/README.md)|
58 | |03 | [External Object Replicator](/tools/ExternalObjectReplicator) |External Object Replicator replicates COPY manifest objects, and Spectrum object in the customer cluster|[README](/tools/ExternalObjectReplicator/README.md)|
59 | |04|            [Node Config](/tools/NodeConfigCompare)            | Node Configuration Comparison utility answers a very common question on which instance type and number of nodes should we choose for your workload on Amazon Redshift.|[README](/tools/NodeConfigCompare/README.md)
60 | 
61 | ## FAQs
62 | Q. I'm experiencing issues with boto3 appearing as `ValueError: Invalid endpoint: https://s3..amazonaws.com` or something to that effect, how do I fix this?
63 | 
64 | A. `aws configure` command is a pre-requisite step for most tools within Test drive. Make sure you run `aws configure` and configure the default region.
65 | 
66 | ----
67 | Q. My make commands are failing with `make: *** No rule to make target `, how do I fix this?
68 | 
69 | A. Make sure you are in the right directory for execution. Make commands are made possible through the Makefile found in the root directory. If you followed the setup instructions, this is aliased to `REDSHIFT_TEST_DRIVE_ROOT` in your shell.
70 | 
71 | ----
72 | 
73 | ## Security
74 | 
75 | See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
76 | 
77 | ## License
78 | 
79 | This project is licensed under the Apache-2.0 License.
80 | 
81 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 0.0.1
2 | 


--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/common/__init__.py


--------------------------------------------------------------------------------
/common/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import logging.handlers
 3 | import time
 4 | import os
 5 | 
 6 | log_date_format = "%Y-%m-%d %H:%M:%S"
 7 | 
 8 | 
 9 | def log_version():
10 |     """Read the VERSION file and log it"""
11 |     logger = logging.getLogger("WorkloadReplicatorLogger")
12 |     try:
13 |         with open("VERSION", "r") as fp:
14 |             logger.info(f"Version {fp.read().strip()}")
15 |     except:
16 |         logger.warning(f"Version unknown")
17 | 
18 | 
19 | def init_logging(
20 |     filename,
21 |     dir="",
22 |     level=logging.DEBUG,
23 |     backup_count=5,
24 |     preamble="",
25 |     script_type="",
26 |     logger_name="WorkloadReplicatorLogger",
27 |     log_id="",
28 | ):
29 |     """Initialize logging to stdio"""
30 |     logger = logging.getLogger(logger_name)
31 |     logger.setLevel(level)
32 |     logging.Formatter.converter = time.gmtime
33 |     ch = logging.StreamHandler()
34 |     ch.setLevel(level)
35 |     ch.setFormatter(get_log_formatter())
36 |     logger.addHandler(ch)
37 | 
38 |     """ Additionally log to a logfile """
39 |     os.makedirs(dir, exist_ok=True)
40 |     filename = f"{dir}/{filename}"
41 |     file_exists = os.path.isfile(filename)
42 |     fh = logging.handlers.RotatingFileHandler(filename, backupCount=backup_count)
43 | 
44 |     # if the file exists from a previous run, rotate it
45 |     if file_exists:
46 |         fh.doRollover()
47 | 
48 |     # dump the preamble to the file first
49 |     if preamble:
50 |         with open(filename, "w") as fp:
51 |             fp.write(preamble.rstrip() + "\n\n" + "-" * 40 + "\n")
52 | 
53 |     fh.setLevel(level)
54 |     fh.setFormatter(get_log_formatter())
55 |     logger = logging.getLogger(logger_name)
56 |     logger.info(f"Starting the {script_type}")
57 |     logger.info(f"Logging to {filename}")
58 |     logger.addHandler(fh)
59 |     logger.info("== Initializing logfile ==")
60 |     if script_type == "extract":
61 |         if log_id:
62 |             logger.info(f"Extract ID: {log_id}")
63 |         else:
64 |             None
65 |     elif script_type == "replay":
66 |         if log_id:
67 |             logger.info(f"Replay ID: {log_id}")
68 |         else:
69 |             None
70 | 
71 | 
72 | def get_log_formatter():
73 |     """Define the log format, with the option to prepend process and job/thread to each message"""
74 |     fmt = "([%(levelname)s] %(asctime)s %(threadName)s %(processName)s): %(message)s"
75 |     formatter = logging.Formatter(fmt, datefmt=log_date_format)
76 |     formatter.converter = time.gmtime
77 |     return formatter
78 | 


--------------------------------------------------------------------------------
/common/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/common/tests/__init__.py


--------------------------------------------------------------------------------
/common/tests/test_util.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from unittest import TestCase
  3 | from unittest.mock import patch, Mock
  4 | 
  5 | import redshift_connector
  6 | 
  7 | import common.util
  8 | 
  9 | 
 10 | class TestDbConnect(TestCase):
 11 |     @patch("redshift_connector.connect")
 12 |     def test_db_connect_psql_drop_return_true(self, mock_connect):
 13 |         mock_connection = Mock()
 14 |         mock_connection.message_types = {}
 15 |         mock_connect.return_value = mock_connection
 16 | 
 17 |         connection = common.util.db_connect(drop_return=True)
 18 |         self.assertEqual(connection, mock_connection)
 19 |         self.assertIsNotNone(connection.message_types[redshift_connector.core.DATA_ROW])
 20 | 
 21 |     @patch("redshift_connector.connect")
 22 |     def test_db_connect_psql_drop_return_false(self, mock_connect):
 23 |         mock_connection = Mock()
 24 |         mock_connection.message_types = {}
 25 |         mock_connect.return_value = mock_connection
 26 | 
 27 |         connection = common.util.db_connect()
 28 |         self.assertEqual(connection, mock_connection)
 29 |         self.assertIsNone(connection.message_types.get(redshift_connector.core.DATA_ROW, None))
 30 | 
 31 |     def test_db_connect_unsupported_interface(self):
 32 |         with self.assertRaises(ValueError) as _:
 33 |             common.util.db_connect("test")
 34 | 
 35 | 
 36 | class TestClusterDict(TestCase):
 37 |     @patch("common.aws_service.redshift_describe_clusters")
 38 |     def test_cluster_dict_provisioned_success(self, patched_redshift_describe_clusters):
 39 |         patched_redshift_describe_clusters.return_value = {
 40 |             "Clusters": [{"NumberOfNodes": "1", "NodeType": "ra3"}]
 41 |         }
 42 |         cluster = common.util.cluster_dict(
 43 |             endpoint="test.test.us-east-1.redshift.amazonaws.com:5439/dev",
 44 |             start_time="2021-08-15T15:50",
 45 |             end_time="2021-08-15T18:55",
 46 |         )
 47 |         self.assertEqual(cluster.get("num_nodes", None), "1")
 48 |         self.assertEqual(cluster.get("instance", None), "ra3")
 49 | 
 50 |     @patch("common.aws_service.redshift_describe_clusters")
 51 |     def test_cluster_dict_provisioned_success(self, patched_redshift_describe_clusters):
 52 |         patched_redshift_describe_clusters.side_effect = [Exception("Failed to describe clusters")]
 53 |         cluster = common.util.cluster_dict(
 54 |             endpoint="test.test.us-east-1.redshift.amazonaws.com:5439/dev",
 55 |             start_time="2021-08-15T15:50",
 56 |             end_time="2021-08-15T18:55",
 57 |         )
 58 |         self.assertEqual(cluster.get("num_nodes", None), "N/A")
 59 |         self.assertEqual(cluster.get("instance", None), "N/A")
 60 | 
 61 |     @patch("common.aws_service.redshift_get_serverless_workgroup")
 62 |     def test_cluster_dict_serverless_success(self, patched_get_serverless_workgroup):
 63 |         patched_get_serverless_workgroup.return_value = {"workgroup": {"baseCapacity": 10}}
 64 |         cluster = common.util.cluster_dict(
 65 |             endpoint="test.test.us-east-1.redshift-serverless.amazonaws.com:5439/dev",
 66 |             start_time="2021-08-15T15:50",
 67 |             end_time="2021-08-15T18:55",
 68 |             is_serverless=True,
 69 |         )
 70 |         self.assertEqual(cluster.get("num_nodes", None), "N/A")
 71 |         self.assertEqual(cluster.get("instance", None), "Serverless")
 72 |         self.assertEqual(cluster.get("base_rpu", 0), 10)
 73 | 
 74 |     @patch("common.aws_service.redshift_get_serverless_workgroup")
 75 |     def test_cluster_dict_serverless_resource_not_found(self, patched_get_serverless_workgroup):
 76 |         e = Exception("Failed..")
 77 |         e.response = {"Error": {"Code": "ResourceNotFoundException"}}
 78 |         patched_get_serverless_workgroup.side_effect = [e]
 79 |         with self.assertRaises(Exception):
 80 |             common.util.cluster_dict(
 81 |                 endpoint="test.test.us-east-1.redshift-serverless.amazonaws.com:5439/dev",
 82 |                 start_time="2021-08-15T15:50",
 83 |                 end_time="2021-08-15T18:55",
 84 |                 is_serverless=True,
 85 |             )
 86 | 
 87 |     @patch("common.aws_service.redshift_get_serverless_workgroup")
 88 |     def test_cluster_dict_serverless_internal_exception(self, patched_get_serverless_workgroup):
 89 |         e = Exception("Failed..")
 90 |         e.response = {"Error": {"Code": "InternalFailure"}}
 91 |         patched_get_serverless_workgroup.side_effect = [e]
 92 |         cluster = common.util.cluster_dict(
 93 |             endpoint="test.test.us-east-1.redshift-serverless.amazonaws.com:5439/dev",
 94 |             start_time="2021-08-15T15:50",
 95 |             end_time="2021-08-15T18:55",
 96 |             is_serverless=True,
 97 |         )
 98 |         self.assertEqual(cluster.get("num_nodes", None), "N/A")
 99 |         self.assertEqual(cluster.get("instance", None), "Serverless")
100 |         self.assertEqual(cluster.get("base_rpu", 0), "N/A")
101 | 
102 | 
103 | class TestBucketDict(unittest.TestCase):
104 |     def test_bucket_dict(self):
105 |         bucket_dict = common.util.bucket_dict("s3://test-bucket/test-key")
106 |         self.assertEqual(bucket_dict.get("url", ""), "s3://test-bucket/test-key")
107 |         self.assertEqual(bucket_dict.get("bucket_name", ""), "test-bucket")
108 |         self.assertEqual(bucket_dict.get("prefix", ""), "test-key/")
109 | 
110 |     @patch("common.util.urlparse")
111 |     def test_bucket_dict_urlparse_failure(self, patched_url_parse):
112 |         patched_url_parse.side_effect = [ValueError("Failed")]
113 |         with self.assertRaises(SystemExit):
114 |             common.util.bucket_dict("s3://test-bucket/test-key")
115 | 


--------------------------------------------------------------------------------
/common/tests/testdata/config.yaml:
--------------------------------------------------------------------------------
 1 | # Optional - Custom identifier for this replay run
 2 | tag: ""
 3 | 
 4 | # Directory location of extracted workload, relative to current directory
 5 | workload_location: "test-location"
 6 | 
 7 | # Endpoint and username of target cluster to replay queries on
 8 | target_cluster_endpoint: "test.111222333222.us-east-1.redshift-serverless.amazonaws.com:5439/dev"
 9 | target_cluster_region: "us-east-1"
10 | master_username: "awsuser"
11 | 
12 | # NLB or NAT endpoint for Simple Replay to connect to. This NLB or NAT should have connectivity to target_cluster_endpoint
13 | nlb_nat_dns: ""
14 | 
15 | # Required only for playback using ODBC (pyodbc)
16 | odbc_driver: ""
17 | 
18 | # If original driver isn't supported (e.g. JDBC), use this driver. "psql" or
19 | # "odbc" are the only valid values.
20 | default_interface: "psql"
21 | 
22 | # Optional - Leaving it empty defers to connections.json. "all on" preserves
23 | # time between transactions. "all off" disregards time between transactions,
24 | # executing them as a batch.
25 | time_interval_between_transactions: "all on"
26 | 
27 | # Optional - Leaving it empty defers to connections.json. "all on" preserves
28 | # time between queries. "all off" disregards time between queries, executing
29 | # them as a batch.
30 | time_interval_between_queries: "all on"
31 | 
32 | # Should COPY statements be executed?
33 | execute_copy_statements: "false"
34 | 
35 | # Should UNLOAD statements be executed?
36 | execute_unload_statements: "false"
37 | 
38 | # Optional - Where the UNLOADs and system table unload goes.
39 | replay_output: ""
40 | 
41 | # Optional - Where the analysis data and summary report will be uploaded.  Example:  s3://bucket_name/path
42 | analysis_output: ""
43 | 
44 | # Optional - Leaving this blank means UNLOADs will not be replayed. IAM role for UNLOADs to be performed with.
45 | unload_iam_role: ""
46 | 
47 | # Optional - Leaving this blank means analysis will not be run. IAM role for analysis needs UNLOAD access.
48 | analysis_iam_role: ""
49 | 
50 | # Location of the SQL file containing queries to unload system tables
51 | unload_system_table_queries: "unload_system_tables.sql"
52 | 
53 | # IAM role to UNLOAD system tables from source cluster to S3 location for later
54 | # analysis
55 | target_cluster_system_table_unload_iam_role: ""
56 | 
57 | # Include filters will work as "db AND user AND pid". Exclude filters will work as "db OR user OR pid".
58 | # In case of multiple values for any specific filter, please enclose each in single quotes
59 | filters:
60 |   include:
61 |     database_name: ['*']
62 |     username: ['*']
63 |     pid: ['*']
64 |   exclude:
65 |     database_name: []
66 |     username: []
67 |     pid: []
68 | 
69 | ##
70 | ## The settings below probably don't need to be modified for a typical run
71 | ##
72 | 
73 | # Set the amount of logging
74 | log_level: "info"
75 | 
76 | # number of proceses to use to parallelize the work. If omitted or null, uses
77 | # one process per cpu - 1 
78 | num_workers: ~
79 | 
80 | # output warnings if connections are not within this number of seconds from
81 | # their expected time.
82 | connection_tolerance_sec: 300
83 | 
84 | # Number of TestDrive logfiles to maintain
85 | backup_count: 1
86 | 
87 | # Should we discard the returned data 
88 | drop_return: true
89 | 
90 | # Should connections in the replay be throttled
91 | limit_concurrent_connections: ~
92 | 
93 | # Should multistatement SQL be split
94 | split_multi: true
95 | 
96 | # In case of Serverless, set up a secret to store admin username and password. Specify the name of the secret below
97 | # Note: This admin username maps to the username specified as `master_username` in this file.  This will be updated to `admin_username` in a future release.
98 | secret_name: ""


--------------------------------------------------------------------------------
/common/tests/testdata/config_ext.yaml:
--------------------------------------------------------------------------------
 1 | # Required. Where to save the extracted workload. Either S3 location or local directory.
 2 | workload_location: "s3://mybucketname/myworkload"
 3 | 
 4 | # Optional. Providing this enables automatic log retrieval from S3 and system
 5 | # table information retrieval (which allows query start and end times to be
 6 | # extracted, rather than just record times)
 7 | source_cluster_endpoint: "test.111222333222.us-east-1.redshift-serverless.amazonaws.com:5439/dev"
 8 | 
 9 | # Required only if source_cluster_endpoint is given.
10 | master_username: "awsuser"
11 | 
12 | # Required. Start and end time of the workload to be extracted, e.g. 2020-06-14T21:41:16+00:00
13 | start_time: "2023-01-09T15:48:10+00:00"
14 | end_time: "2023-01-09T15:55:57+00:00"
15 | 
16 | # Required only if extraction using ODBC is preferred and installed. Otherwise, Python driver is used.
17 | odbc_driver: ""
18 | 
19 | # Leave blank to automatically retrieve audit logs from the source cluster.
20 | # You can specify a local location or S3 location to load the audit logs from
21 | # another location.
22 | log_location: ""
23 | 
24 | #Required only if log location for cloudwatch logs is specified
25 | region: ""
26 | 
27 | # Location of the SQL file containing queries to unload system tables
28 | unload_system_table_queries: "unload_system_tables.sql"
29 | 
30 | # Should be a S3 location. If unspecified, system tables will not be unloaded
31 | source_cluster_system_table_unload_location: ""
32 | 
33 | # If an IAM role is provided, UNLOAD will occur. If this is blank, UNLOAD of system tables will not occur.
34 | source_cluster_system_table_unload_iam_role: ""
35 | 
36 | #Provide the schemas list for spectrum to avoid modification during Replay in format ['schema_name']
37 | external_schemas: ""
38 | 
39 | ##
40 | ## The settings below probably don't need to be modified for a typical run
41 | ##
42 | 
43 | # Set the amount of logging
44 | log_level: info
45 | 
46 | # Number of TestDrive logfiles to maintain
47 | backup_count: 1


--------------------------------------------------------------------------------
/config/external_object_replicator.yaml:
--------------------------------------------------------------------------------
 1 | #Provide a provisioned source cluster endpoint. eg: "<region>.redshift.amazonaws.com:<port>\<databasename>"
 2 | source_cluster_endpoint:
 3 | #Provide region. eg: "us-east-2"
 4 | region:
 5 | #Provide redshift username. eg: "awsuser"
 6 | redshift_user :
 7 | 
 8 | #Provide the start time and end time of workload to be replicated. eg: “2020-07-24T09:31:00+00:00”
 9 | start_time :
10 | end_time  :
11 | 
12 | #Provide the S3 bucket location where you want the replicator to store cloned objects. eg: "s3://mybucket/myworkload"
13 | target_s3_location:
14 | 
15 | # Set the amount of logging - either INFO or DEBUG
16 | log_level:


--------------------------------------------------------------------------------
/config/extract.yaml:
--------------------------------------------------------------------------------
 1 | # Required. Where to save the extracted workload. Either S3 location or local directory.
 2 | workload_location: "s3://mybucketname/myworkload"
 3 | 
 4 | # Optional. Providing this enables automatic log retrieval from S3 and system
 5 | # table information retrieval (which allows query start and end times to be
 6 | # extracted, rather than just record times)
 7 | source_cluster_endpoint: ""
 8 | 
 9 | # Required only if source_cluster_endpoint is given.
10 | master_username: "awsuser"
11 | 
12 | #Required for generating copy_replacements and  if log location for cloudwatch logs is specified
13 | region: ""
14 | 
15 | 
16 | # Required. Start and end time of the workload to be extracted, e.g. 2020-06-14T21:41:16+00:00
17 | start_time: ""
18 | end_time: ""
19 | 
20 | #Replacement s3 location and IAM roles for copy files 
21 | replacement_copy_location: ""
22 | replacement_iam_location: ""
23 | 
24 | 
25 | # Required only if extraction using ODBC is preferred and installed. Otherwise, Python driver is used.
26 | odbc_driver: ""
27 | 
28 | # Leave blank to automatically retrieve audit logs from the source cluster.
29 | # You can specify a local location or S3 location to load the audit logs from
30 | # another location.
31 | log_location: ""
32 | 
33 | # Location of the SQL file containing queries to unload system tables
34 | unload_system_table_queries: "core/replay/unload_system_tables.sql"
35 | 
36 | # Should be a S3 location. If unspecified, system tables will not be unloaded
37 | source_cluster_system_table_unload_location: ""
38 | 
39 | # If an IAM role is provided, UNLOAD will occur. If this is blank, UNLOAD of system tables will not occur.
40 | source_cluster_system_table_unload_iam_role: ""
41 | 
42 | ##
43 | ## The settings below probably don't need to be modified for a typical run
44 | ##
45 | 
46 | # Set the amount of logging
47 | log_level: info
48 | 
49 | # Number of logfiles to maintain
50 | backup_count: 1
51 | 
52 | #Provide the schemas list for spectrum to avoid modification during Replay in format ['schema_name']
53 | external_schemas:


--------------------------------------------------------------------------------
/config/replay.yaml:
--------------------------------------------------------------------------------
 1 | # Optional - Custom identifier for this replay run
 2 | tag: ""
 3 | 
 4 | # Directory location of extracted workload, relative to current directory
 5 | workload_location: ""
 6 | 
 7 | # Endpoint and username of target cluster to replay queries on
 8 | target_cluster_endpoint: ""
 9 | target_cluster_region: ""
10 | master_username: ""
11 | 
12 | # NLB or NAT endpoint for Simple Replay to connect to. This NLB or NAT should have connectivity to target_cluster_endpoint
13 | nlb_nat_dns: ""
14 | 
15 | # Required only for playback using ODBC (pyodbc)
16 | odbc_driver: ""
17 | 
18 | # If original driver isn't supported (e.g. JDBC), use this driver. "psql" or
19 | # "odbc" are the only valid values.
20 | default_interface: "psql"
21 | 
22 | # Optional - Leaving it empty defers to connections.json. "all on" preserves
23 | # time between transactions. "all off" disregards time between transactions,
24 | # executing them as a batch.
25 | time_interval_between_transactions: ""
26 | 
27 | # Optional - Leaving it empty defers to connections.json. "all on" preserves
28 | # time between queries. "all off" disregards time between queries, executing
29 | # them as a batch.
30 | time_interval_between_queries: ""
31 | 
32 | # Should COPY statements be executed?
33 | execute_copy_statements: "false"
34 | 
35 | # Should UNLOAD statements be executed?
36 | execute_unload_statements: "false"
37 | 
38 | # Optional - Where the UNLOADs and system table unload goes.
39 | replay_output: ""
40 | 
41 | # Optional - Where the analysis data and summary report will be uploaded.  Example:  s3://bucket_name/path
42 | analysis_output: ""
43 | 
44 | # Optional - Leaving this blank means UNLOADs will not be replayed. IAM role for UNLOADs to be performed with.
45 | unload_iam_role: ""
46 | 
47 | # Optional - Leaving this blank means analysis will not be run. IAM role for analysis needs UNLOAD access.
48 | analysis_iam_role: ""
49 | 
50 | # Location of the SQL file containing queries to unload system tables
51 | unload_system_table_queries: "core/replay/unload_system_tables.sql"
52 | 
53 | # IAM role to UNLOAD system tables from source cluster to S3 location for later
54 | # analysis
55 | target_cluster_system_table_unload_iam_role: ""
56 | 
57 | # Include filters will work as "db AND user AND pid". Exclude filters will work as "db OR user OR pid".
58 | # In case of multiple values for any specific filter, please enclose each in single quotes
59 | filters:
60 |   include:
61 |     database_name: ['*']
62 |     username: ['*']
63 |     pid: ['*']
64 |   exclude:
65 |     database_name: []
66 |     username: []
67 |     pid: []
68 | 
69 | ##
70 | ## The settings below probably don't need to be modified for a typical run
71 | ##
72 | 
73 | # Set the amount of logging
74 | log_level: "DEBUG"
75 | 
76 | # number of proceses to use to parallelize the work. If omitted or null, uses
77 | # one process per cpu - 1 
78 | num_workers: ~
79 | 
80 | # output warnings if connections are not within this number of seconds from
81 | # their expected time.
82 | connection_tolerance_sec: 300
83 | 
84 | # Number of TestDrive logfiles to maintain
85 | backup_count: 1
86 | 
87 | # Should we discard the returned data 
88 | drop_return: true
89 | 
90 | # Should connections in the replay be throttled
91 | limit_concurrent_connections: ~
92 | 
93 | # Should multistatement SQL be split
94 | split_multi: true
95 | 
96 | # In case of Serverless, set up a secret to store admin username and password. Specify the name of the secret below
97 | # Note: This admin username maps to the username specified as `master_username` in this file.  This will be updated to `admin_username` in a future release.
98 | secret_name: ""
99 | 


--------------------------------------------------------------------------------
/config/user_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "SNAPSHOT_ID": "redshift-cluster-manual-snapshot",
 3 |   "SNAPSHOT_ACCOUNT_ID": "123456789012",
 4 |   "PARAMETER_GROUP_CONFIG_S3_PATH": "s3://node-config-compare-bucket/pg_config.json",
 5 |   "DDL_AND_COPY_SCRIPT_S3_PATH": "s3://node-config-compare-bucket/ddl.sql",
 6 |   "SQL_SCRIPT_S3_PATH": "s3://node-config-compare-bucket/test_queries.sql",
 7 |   "NUMBER_OF_PARALLEL_SESSIONS_LIST": "1",
 8 |   "SIMPLE_REPLAY_LOG_LOCATION": "s3://redshift-logging-xxxxxxxx/RSLogs/",
 9 |   "SIMPLE_REPLAY_EXTRACT_START_TIME": "2021-08-28T11:15:00+00:00",
10 |   "SIMPLE_REPLAY_EXTRACT_END_TIME": "2021-08-28T12:00:00+00:00",
11 |   "SIMPLE_REPLAY_UNLOAD_STATEMENTS": "false",
12 |   "SIMPLE_REPLAY_EXTRACT_OVERWRITE_S3_PATH": "N/A",
13 |   "SIMPLE_REPLAY_OVERWRITE_S3_PATH": "N/A",
14 |   "AUTO_PAUSE": true,
15 |   "DATABASE_NAME": "database_name",
16 |   "CONFIGURATIONS": [
17 |     {
18 |       "TYPE": "Provisioned",
19 |       "NODE_TYPE": "dc2.8xlarge",
20 |       "NUMBER_OF_NODES": "2",
21 |       "WLM_CONFIG_S3_PATH": "N/A"
22 |     },
23 |     {
24 |       "TYPE": "Provisioned",
25 |       "NODE_TYPE": "ra3.4xlarge",
26 |       "NUMBER_OF_NODES": "4",
27 |       "WLM_CONFIG_S3_PATH": "N/A"
28 |     },
29 |     {
30 |       "TYPE": "Provisioned",
31 |       "NODE_TYPE": "ra3.4xlarge",
32 |       "NUMBER_OF_NODES": "4",
33 |       "WLM_CONFIG_S3_PATH": "s3://node-config-compare-bucket/wlmconfig.json"
34 |     },
35 |     {
36 |       "TYPE": "Serverless",
37 |       "BASE_RPU": "64"
38 |     },
39 |     {
40 |       "TYPE": "Serverless",
41 |       "BASE_RPU": "128"
42 |     }
43 |   ]
44 | }


--------------------------------------------------------------------------------
/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/core/__init__.py


--------------------------------------------------------------------------------
/core/extract/__init__.py:
--------------------------------------------------------------------------------
1 | from pkgutil import extend_path
2 | 
3 | __path__ = extend_path(__path__, __name__)
4 | 


--------------------------------------------------------------------------------
/core/extract/cloudwatch_extractor.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import tempfile
  3 | import gzip
  4 | import sys
  5 | 
  6 | import common.aws_service as aws_service_helper
  7 | from core.extract.extract_parser import parse_log
  8 | 
  9 | logger = logging.getLogger("WorkloadReplicatorLogger")
 10 | 
 11 | 
 12 | class CloudwatchExtractor:
 13 |     config = None
 14 | 
 15 |     def __init__(self, config):
 16 |         self.config = config
 17 | 
 18 |     def get_extract_from_cloudwatch(self, start_time, end_time):
 19 |         cloudwatch_logs = []
 20 |         if self.config.get("source_cluster_endpoint"):
 21 |             logger.info(
 22 |                 f"Extracting logs from source cluster endpoint: {self.config['source_cluster_endpoint']}"
 23 |             )
 24 |             source_cluster_endpoint = self.config.get("source_cluster_endpoint")
 25 |             region = source_cluster_endpoint.split(".")[2]
 26 |             endpoint = source_cluster_endpoint.split(".")[0]
 27 |             response = aws_service_helper.cw_describe_log_groups(region=region)
 28 |             cloudwatch_logs = self._read_cloudwatch_logs(
 29 |                 response, endpoint, start_time, end_time, region
 30 |             )
 31 |         elif self.config.get("log_location"):
 32 |             logger.info(f"Extracting logs for {self.config['log_location']}")
 33 |             response = aws_service_helper.cw_describe_log_groups(
 34 |                 log_group_name=self.config.get("log_location"),
 35 |                 region=self.config.get("region"),
 36 |             )
 37 |             for log_group in response["logGroups"]:
 38 |                 log_group_name = log_group["logGroupName"]
 39 |                 response_stream = aws_service_helper.cw_describe_log_streams(
 40 |                     log_group_name, self.config.get("region")
 41 |                 )
 42 |                 endpoint = response_stream["logStreams"][0]["logStreamName"]
 43 |                 cloudwatch_logs = self._read_cloudwatch_logs(
 44 |                     response, endpoint, start_time, end_time, self.config.get("region")
 45 |                 )
 46 |         else:
 47 |             logger.error(
 48 |                 "For Cloudwatch Log Extraction, one of source_cluster_endpoint or log_location must be provided"
 49 |             )
 50 |             sys.exit(-1)
 51 |         return cloudwatch_logs
 52 | 
 53 |     def _read_cloudwatch_logs(self, response, endpoint, start_time, end_time, region):
 54 |         connections = {}
 55 |         last_connections = {}
 56 |         logs = {}
 57 |         databases = set()
 58 |         for log_group in response["logGroups"]:
 59 |             log_group_name = log_group["logGroupName"]
 60 |             stream_batch = aws_service_helper.cw_describe_log_streams(
 61 |                 log_group_name=log_group_name, region=region
 62 |             )["logStreams"]
 63 |             for stream in stream_batch:
 64 |                 stream_name = stream["logStreamName"]
 65 |                 if endpoint == stream_name:
 66 |                     logger.info(
 67 |                         f"Extracting for log group: {log_group_name} between time {start_time} and {end_time}"
 68 |                     )
 69 | 
 70 |                     log_list = aws_service_helper.cw_get_paginated_logs(
 71 |                         log_group_name,
 72 |                         stream["logStreamName"],
 73 |                         start_time,
 74 |                         end_time,
 75 |                         region,
 76 |                     )
 77 |                     if "useractivitylog" in log_group_name:
 78 |                         log_type = "useractivitylog"
 79 |                     elif "connectionlog" in log_group_name:
 80 |                         log_type = "connectionlog"
 81 |                     else:
 82 |                         logger.warning(
 83 |                             f"Unsupported log file {log_group_name}, cannot determine type"
 84 |                         )
 85 |                         continue
 86 | 
 87 |                     with tempfile.TemporaryDirectory(suffix="TestDrive") as tempdir:
 88 |                         with gzip.open(f"{tempdir}/{log_type}.gz", "wt") as gzip_file:
 89 |                             gzip_file.write("\n".join(log_list))
 90 | 
 91 |                         if log_type == "connectionlog":
 92 |                             logger.info("Parsing connection logs...")
 93 |                             with gzip.open(f"{tempdir}/connectionlog.gz", "r") as gzip_file:
 94 |                                 parse_log(
 95 |                                     gzip_file,
 96 |                                     "connectionlog.gz",
 97 |                                     connections,
 98 |                                     last_connections,
 99 |                                     logs,
100 |                                     databases,
101 |                                     start_time,
102 |                                     end_time,
103 |                                 )
104 |                         if log_type == "useractivitylog":
105 |                             logger.info("Parsing user activity logs...")
106 |                             with gzip.open(f"{tempdir}/useractivitylog.gz", "r") as gzip_file:
107 |                                 parse_log(
108 |                                     gzip_file,
109 |                                     "useractivitylog.gz",
110 |                                     connections,
111 |                                     last_connections,
112 |                                     logs,
113 |                                     databases,
114 |                                     start_time,
115 |                                     end_time,
116 |                                 )
117 | 
118 |         return connections, logs, databases, last_connections
119 | 


--------------------------------------------------------------------------------
/core/extract/extract.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | import hashlib
  4 | import datetime
  5 | import yaml
  6 | import os
  7 | import zipfile
  8 | import time
  9 | import re
 10 | import common.config as config_helper
 11 | import common.log as log_helper
 12 | from common import aws_service as aws_service_helper
 13 | from common.util import cluster_dict, db_connect
 14 | import core.extract.extractor as extractor
 15 | 
 16 | logger = logging.getLogger("WorkloadReplicatorLogger")
 17 | 
 18 | serverless_cluster_endpoint_pattern = (
 19 |     r"(.+)\.(.+)\.(.+).redshift-serverless(-dev)?\.amazonaws\.com:[0-9]{4,5}\/(.)+"
 20 | )
 21 | 
 22 | 
 23 | def is_serverless(config):
 24 |     return bool(
 25 |             re.fullmatch(serverless_cluster_endpoint_pattern, config["source_cluster_endpoint"])
 26 |         )
 27 | 
 28 | 
 29 | def main():
 30 | 
 31 |     extract_start_time = time.time()
 32 | 
 33 |     # Parse config file
 34 |     config = config_helper.get_config_file_from_args()
 35 |     config_helper.validate_config_file_for_extract(config)
 36 | 
 37 |     # UID for extract logs
 38 |     extract_start_timestamp = datetime.datetime.now(tz=datetime.timezone.utc)
 39 |     id_hash = hashlib.sha1(
 40 |         extract_start_timestamp.isoformat().encode("UTF-8")
 41 |     ).hexdigest()[:5]
 42 |     if config.get("source_cluster_endpoint", "") != "":
 43 |         cluster = cluster_dict(config["source_cluster_endpoint"])
 44 |         if config.get("tag", "") != "":
 45 |             extract_id = f'{extract_start_timestamp.isoformat()}_{cluster.get("id")}_{config["tag"]}_{id_hash}'
 46 |         else:
 47 |             extract_id = (
 48 |                 f'{extract_start_timestamp.isoformat()}_{cluster.get("id")}_{id_hash}'
 49 |             )
 50 |     else:
 51 |         log_location = config.get("log_location")
 52 |         if config.get("tag", "") != "":
 53 |             extract_id = f'{extract_start_timestamp.isoformat()}_{log_location}_{config["tag"]}_{id_hash}'
 54 |         else:
 55 |             extract_id = (
 56 |                 f"{extract_start_timestamp.isoformat()}_{log_location}_{id_hash}"
 57 |             )
 58 | 
 59 |     # Setup Logging
 60 |     level = logging.getLevelName(config.get("log_level", "INFO").upper())
 61 |     log_helper.init_logging(
 62 |         "extract.log",
 63 |         dir=f"core/logs/extract/extract_log-{extract_id}",
 64 |         level=level,
 65 |         preamble=yaml.dump(config),
 66 |         backup_count=config.get("backup_count", 2),
 67 |         script_type="extract",
 68 |         log_id=extract_id,
 69 |     )
 70 |     log_helper.log_version()
 71 | 
 72 |     e = extractor.Extractor(config)
 73 |     if not e.load_driver():
 74 |         sys.exit("Failed to load driver")
 75 | 
 76 |     # setting application name for tracking
 77 |     if config.get("source_cluster_endpoint"):
 78 |         application = "WorkloadReplicator-Extract"
 79 | 
 80 |         if is_serverless(config):
 81 |             host = f'redshift-serverless-{config.get("source_cluster_endpoint").split(".")[0]}' 
 82 |         else:
 83 |             host = config.get("source_cluster_endpoint").split(".")[0]
 84 |         port = int(config.get("source_cluster_endpoint").split(":")[-1]
 85 |                    .split("/")[0])
 86 |         DbUser = config.get("master_username")
 87 |         DbName = config.get("source_cluster_endpoint").split("/")[-1]
 88 |         region = config.get("region")
 89 |         endpoint = config.get('source_cluster_endpoint').split(":")[0]
 90 | 
 91 |         response = aws_service_helper.redshift_get_cluster_credentials(
 92 |             user=DbUser,
 93 |             database_name=DbName,
 94 |             cluster_id=host,
 95 |             region=region)
 96 |         db_connect(host=endpoint,
 97 |                    port=port,
 98 |                    database=DbName,
 99 |                    password=response['DbPassword'],
100 |                    username=response['DbUser'], app_name=application)
101 | 
102 |     # Run extract job
103 |     (
104 |         extraction_name,
105 |         start_time,
106 |         end_time,
107 |         log_location,
108 |     ) = e.get_parameters_for_log_extraction()
109 |     (connections, audit_logs, databases, last_connections) = e.get_extract(
110 |         log_location, start_time, end_time
111 |     )
112 | 
113 |     e.validate_log_result(connections, audit_logs)
114 |     e.retrieve_cluster_endpoint_info(extraction_name)
115 | 
116 |     e.save_logs(
117 |         audit_logs,
118 |         last_connections,
119 |         config["workload_location"] + "/" + extraction_name,
120 |         connections,
121 |         start_time,
122 |         end_time,
123 |     )
124 | 
125 |     # save the extract logs to S3
126 |     output_directory = f'{config["workload_location"]+ "/" + extraction_name}'
127 |     if output_directory.startswith("s3://"):
128 |         output_s3_location = output_directory[5:].partition("/")
129 |         bucket_name = output_s3_location[0]
130 |         output_prefix = output_s3_location[2]
131 |         object_key = "extract_logs.zip"
132 |         zip_file_name = f"extract_logs.zip"
133 |         logger.info(f"Uploading extract logs to {bucket_name}/{output_prefix}")
134 |         dir = f"core/logs/extract/extract_log-{extract_id}"
135 |         with zipfile.ZipFile(zip_file_name, "w", zipfile.ZIP_DEFLATED) as zip_object:
136 |             for folder_name, sub_folders, file_names in os.walk(dir):
137 |                 for filename in file_names:
138 |                     file_path = os.path.join(folder_name, filename)
139 |                     zip_object.write(file_path)
140 |         with open(zip_file_name, "rb") as f:
141 |             aws_service_helper.s3_put_object(
142 |                 f, bucket_name, f"{output_prefix}/{object_key}"
143 |             )
144 | 
145 |     total_extract_time = str(datetime.timedelta(seconds=(time.time() - extract_start_time)))
146 |     logger.info(f"Extract completed in {total_extract_time}")
147 | 
148 | if __name__ == "__main__":
149 |     main()
150 | 


--------------------------------------------------------------------------------
/core/extract/local_extractor.py:
--------------------------------------------------------------------------------
 1 | import gzip
 2 | import logging
 3 | import os
 4 | from tqdm import tqdm
 5 | from core.extract import extract_parser
 6 | 
 7 | logger = logging.getLogger("WorkloadReplicatorLogger")
 8 | 
 9 | 
10 | class LocalExtractor:
11 |     disable_progress_bar = None
12 |     bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}{postfix}]"
13 | 
14 |     def __init__(self, config):
15 |         self.config = config
16 | 
17 |     def get_extract_locally(self, log_directory_path, start_time, end_time):
18 |         """
19 | 
20 |         :param log_directory_path:
21 |         :param start_time:
22 |         :param end_time:
23 |         :return:
24 |         """
25 |         connections = {}
26 |         last_connections = {}
27 |         logs = {}
28 |         databases = set()
29 | 
30 |         unsorted_list = os.listdir(log_directory_path)
31 |         log_directory = sorted(unsorted_list)
32 | 
33 |         for filename in tqdm(
34 |             log_directory,
35 |             disable=self.disable_progress_bar,
36 |             unit="files",
37 |             desc="Files processed",
38 |             bar_format=self.bar_format,
39 |         ):
40 |             if self.disable_progress_bar:
41 |                 logger.info(f"Processing {filename}")
42 |             if "start_node" in filename:
43 |                 log_file = gzip.open(
44 |                     log_directory_path + "/" + filename, "rt", encoding="ISO-8859-1"
45 |                 )
46 |             else:
47 |                 log_file = gzip.open(log_directory_path + "/" + filename, "r")
48 | 
49 |             extract_parser.parse_log(
50 |                 log_file,
51 |                 filename,
52 |                 connections,
53 |                 last_connections,
54 |                 logs,
55 |                 databases,
56 |                 start_time,
57 |                 end_time,
58 |             )
59 |             log_file.close()
60 | 
61 |         return connections, logs, databases, last_connections
62 | 


--------------------------------------------------------------------------------
/core/extract/s3_extractor.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import logging
  3 | import common.aws_service as aws_service_helper
  4 | from tqdm import tqdm
  5 | from core.util.log_validation import get_logs_in_range
  6 | from core.extract.extract_parser import parse_log
  7 | 
  8 | logger = logging.getLogger("WorkloadReplicatorLogger")
  9 | 
 10 | 
 11 | class S3Extractor:
 12 |     disable_progress_bar = None
 13 |     bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}{postfix}]"
 14 | 
 15 |     def __init__(self, config):
 16 |         self.disable_progress_bar = config.get("disable_progress_bar")
 17 | 
 18 |     def get_extract_from_s3(self, log_bucket, log_prefix, start_time, end_time):
 19 |         """
 20 |         getting logs from s3 and passing it to get_s3_audit_logs()
 21 |         :param log_bucket:
 22 |         :param log_prefix:
 23 |         :param start_time:
 24 |         :param end_time:
 25 |         :return:
 26 |         """
 27 |         connections = {}
 28 |         logs = {}
 29 |         last_connections = {}
 30 |         databases = set()
 31 |         bucket_objects = aws_service_helper.sync_s3_get_bucket_contents(log_bucket, log_prefix)
 32 | 
 33 |         s3_connection_logs = []
 34 |         s3_user_activity_logs = []
 35 | 
 36 |         for log in bucket_objects:
 37 |             filename = log["Key"].split("/")[-1]
 38 |             if "connectionlog" in filename:
 39 |                 s3_connection_logs.append(log)
 40 |             elif "useractivitylog" in filename:
 41 |                 s3_user_activity_logs.append(log)
 42 | 
 43 |         logger.info("Parsing connection logs")
 44 |         self._get_s3_audit_logs(
 45 |             log_bucket,
 46 |             log_prefix,
 47 |             start_time,
 48 |             end_time,
 49 |             s3_connection_logs,
 50 |             connections,
 51 |             logs,
 52 |             databases,
 53 |             last_connections,
 54 |         )
 55 |         logger.info("Parsing user activity logs")
 56 |         self._get_s3_audit_logs(
 57 |             log_bucket,
 58 |             log_prefix,
 59 |             start_time,
 60 |             end_time,
 61 |             s3_user_activity_logs,
 62 |             connections,
 63 |             logs,
 64 |             databases,
 65 |             last_connections,
 66 |         )
 67 |         return connections, logs, databases, last_connections
 68 | 
 69 |     def _get_s3_audit_logs(
 70 |         self,
 71 |         log_bucket,
 72 |         log_prefix,
 73 |         start_time,
 74 |         end_time,
 75 |         audit_objects,
 76 |         connections,
 77 |         logs,
 78 |         databases,
 79 |         last_connections,
 80 |     ):
 81 |         """
 82 |         Getting  audit logs from S3 for the cluster from get_s3_logs  and calling the pasrse_log()
 83 | 
 84 |         :param log_bucket:
 85 |         :param log_prefix:
 86 |         :param start_time:
 87 |         :param end_time:
 88 |         :param audit_objects:
 89 |         :param connections:
 90 |         :param logs:
 91 |         :param databases:
 92 |         :param last_connections:
 93 |         :return:
 94 |         """
 95 | 
 96 |         index_of_last_valid_log = len(audit_objects) - 1
 97 | 
 98 |         log_filenames = get_logs_in_range(audit_objects, start_time, end_time)
 99 | 
100 |         logger.info(f"Processing {len(log_filenames)} files")
101 | 
102 |         curr_index = index_of_last_valid_log
103 |         for filename in tqdm(
104 |             log_filenames,
105 |             disable=self.disable_progress_bar,
106 |             unit="files",
107 |             desc="Files processed",
108 |             bar_format=self.bar_format,
109 |         ):
110 |             log_object = aws_service_helper.s3_get_object(log_bucket, filename)
111 |             log_file = gzip.GzipFile(fileobj=log_object.get()["Body"])
112 | 
113 |             parse_log(
114 |                 log_file,
115 |                 filename,
116 |                 connections,
117 |                 last_connections,
118 |                 logs,
119 |                 databases,
120 |                 start_time,
121 |                 end_time,
122 |             )
123 | 
124 |         logger.debug(
125 |             f'First audit log in start_time range: {audit_objects[curr_index]["Key"].split("/")[-1]}'
126 |         )
127 |         return connections, logs, databases, last_connections
128 | 


--------------------------------------------------------------------------------
/core/replay/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/core/replay/__init__.py


--------------------------------------------------------------------------------
/core/replay/copy_replacements_parser.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import logging
 3 | import sys
 4 | 
 5 | import common.aws_service as aws_service_helper
 6 | 
 7 | logger = logging.getLogger("WorkloadReplicatorLogger")
 8 | 
 9 | 
10 | def parse_copy_replacements(workload_directory):
11 |     copy_replacements = {}
12 |     replacements_path = workload_directory.rstrip("/") + "/copy_replacements.csv"
13 | 
14 |     if replacements_path.startswith("s3://"):
15 |         workload_s3_location = replacements_path[5:].partition("/")
16 |         bucket_name = workload_s3_location[0]
17 |         prefix = workload_s3_location[2]
18 |         s3_object = aws_service_helper.s3_client_get_object(bucket_name, prefix)
19 |         csv_string = s3_object["Body"].read().decode("utf-8")
20 |         copy_replacements_reader = csv.reader(csv_string.splitlines())
21 |         next(copy_replacements_reader)  # Skip header
22 |         for row in copy_replacements_reader:
23 |             if len(row) == 3 and row[2]:
24 |                 copy_replacements[row[0]] = [row[1], row[2]]
25 |     else:
26 |         with open(replacements_path, "r") as csvfile:
27 |             copy_replacements_reader = csv.reader(csvfile)
28 |             next(copy_replacements_reader)  # Skip header
29 |             for idx, row in enumerate(copy_replacements_reader):
30 |                 if len(row) != 3:
31 |                     logger.error(
32 |                         f"Replacements file {replacements_path} is malformed (row {idx}, line:\n{row}"
33 |                     )
34 |                     sys.exit()
35 |                 copy_replacements[row[0]] = [row[1], row[2]]
36 | 
37 |     logger.info(f"Loaded {len(copy_replacements)} COPY replacements from {replacements_path}")
38 |     return copy_replacements
39 | 


--------------------------------------------------------------------------------
/core/replay/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/core/replay/logo.png


--------------------------------------------------------------------------------
/core/replay/report_content.yaml:
--------------------------------------------------------------------------------
 1 | # Replay Information and Report Details
 2 | title: "Simple Replay Workload Analysis"
 3 | subtitle: "<i>Replay ID: {REPLAY_ID}</i>"
 4 | 
 5 | report_paragraph: "This report summarizes the performance of the replayed workload shown above."
 6 | 
 7 | glossary_header: "Glossary"
 8 | glossary_paragraph: "The following terms are used in this report:"
 9 | glossary:
10 |   - "<b>Compile Time</b> is the total amount of time spent compiling a query."
11 |   - "<b>Queue Time</b> is the amount of time a query spends waiting before executing in a workload management (WLM) queue."
12 |   - "<b>Execution Time</b> is how long a query spends in the execution phase."
13 |   - "<b>Query Latency</b> is the total runtime of a query in Redshift."
14 |   - "<b>Commit Queue Time</b> is the time a transaction spent waiting before entering the commit phase."
15 |   - "<b>Commit Time</b> the time a transaction spent being committed."
16 | 
17 | data_header: "Accessing the data"
18 | data_paragraph: >
19 |   All of the performance data collected for this report is available in S3 at the following location:
20 |   <br/><br/>s3://{S3_BUCKET}/replays/{REPLAY_ID}/
21 |   <br/><br/>The <font face='Courier'>raw_data</font> directory contains the following raw CSV files unloaded from the Redshift cluster:
22 | raw_data:
23 |   - "<font face='Courier'>statement_types000</font> Statement counts by type (e.g. SELECT, COPY, etc.)"
24 |   - "<font face='Courier'>query_metrics000</font> Query-level performance data."
25 |   - "<font face='Courier'>cluster_level_metrics000</font> Cluster-level summary of performance data. This is used to generate the Cluster Metrics table on page 2."
26 |   - "<font face='Courier'>query_distribution000</font> User-level summary of performance data and broken down by query execution phase. This is used to generate the latency, commit, queue, compile, and execution time tables that begin on page 3."
27 | 
28 | agg_data_paragraph: "The <font face='Courier'>aggregated_data</font> directory in S3 contains CSV files of the aggregated table data used to generate this report."
29 | 
30 | notes_header: "Workload Notes"
31 | notes_paragraph: >
32 |   Redshift Test Drive attempts to replay the source cluster workload as faithfully as possible on the target cluster.
33 |   However, the replayed workload may differ from the original workload in the following ways:
34 | notes:
35 |   - "The percentiles in this report exclude DDLs, Utility statements, and any leader node-only catalog queries."
36 |   - "The reports grouped by user show the top 100 users based on the count of queries executed per user during the replay. All additional users above the top 100 are rolled up as “Others.” Data for all users is available in S3."
37 |   - "Query compilation time is distributed evenly between queries that hop between service classes and can therefore occasionally result in execution or elapsed times that are less than zero for very short queries."
38 | 
39 | 
40 | # Query Breakdown and Cluster Level Performance
41 | query_breakdown:
42 |   table1:
43 |     title: "Query Breakdown"
44 |     paragraph: "The table below shows the total number of queries, number of aborted queries, and number of queries executed on concurrency scaling clusters broken down by statement type."
45 |     note: "* note that query counts are approximate and based on statement text"
46 | 
47 | graph:
48 |   title: "Query Latency"
49 |   paragraph: "The histogram shows a breakdown of query latency on a log scale. The distribution show shorter running queries on the left and longer running queries on the right."
50 | 
51 | cluster_metrics:
52 |   table2:
53 |     title: "Cluster Metrics"
54 |     paragraph: "The table below shows performance statistics broken down by cluster-level workload metric."
55 |     note: "* note that query latency excludes compile time"
56 | 
57 | # Performance Breakdown
58 | measure_tables:
59 |   table3:
60 |     title: "Query Latency"
61 |     paragraph: "Query latency is the combined amount of time a query spends queued in WLM and executing. Note that this does not include query compilation time."
62 | 
63 |   table4:
64 |     title: "Compile Time"
65 |     paragraph: >
66 |       Redshift compiles queries before executing them, and then caches the compiled result.  This table shows how much
67 |       time is spent compiling queries, broken down by user.  Note that a workload run on a new cluster may have higher
68 |       compile time than the original source cluster workload since it may not benefit from prior caching.
69 | 
70 |   table5:
71 |     title: "Queue Time"
72 |     paragraph: >
73 |       Queue time shows how much time a query is spent waiting to start executing.  It should usually be
74 |       considered together with Execution Time (since high queue time + low execution time is the same to the user as low queue time + high execution time).
75 | 
76 | 
77 |   table6:
78 |     title: "Execution Time"
79 |     paragraph: >
80 |       Execution time shows how much time a query is spent executing. It should usually be considered together with
81 |       Queue Time (since high queue time + low execution time is the same to the user as low queue time + high execution time).
82 | 
83 |   table7:
84 |     title: "Commit Queue Time"
85 |     paragraph: "Transactions may be queued before before the commit phase starts. This table summarize how much time each user’s transactions spend waiting to start the commit."
86 | 
87 |   table8:
88 |     title: "Commit Time"
89 |     paragraph: "This table summarizes how much time is spent committing transactions, broken down by user."


--------------------------------------------------------------------------------
/core/replay/stats.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | logger = logging.getLogger("WorkloadReplicatorLogger")
 4 | 
 5 | 
 6 | def percent(num, den):
 7 |     if den == 0:
 8 |         return 0
 9 |     return float(num) / den * 100.0
10 | 
11 | 
12 | def print_stats(stats):
13 |     if 0 not in stats:
14 |         logger.warning("No stats gathered.")
15 |         return
16 | 
17 |     max_connection_diff = 0
18 |     for process_idx in stats.keys():
19 |         if abs(stats[process_idx].get("connection_diff_sec", 0)) > abs(max_connection_diff):
20 |             max_connection_diff = stats[process_idx]["connection_diff_sec"]
21 |         logger.debug(
22 |             f"[{process_idx}] Max connection offset: {stats[process_idx].get('connection_diff_sec', 0):+.3f} sec"
23 |         )
24 |     logger.debug(f"Max connection offset: {max_connection_diff:+.3f} sec")
25 | 
26 | 
27 | def display_stats(stats, total_queries, peak_connections):
28 |     stats_str = ""
29 |     stats_str += (
30 |         f"Queries executed: {stats['query_success'] + stats['query_error']} of {total_queries} "
31 |         f"({percent(stats['query_success'] + stats['query_error'], total_queries):.1f}%)"
32 |     )
33 |     stats_str += "  ["
34 |     stats_str += f"Success: {stats['query_success']} ({percent(stats['query_success'], stats['query_success'] + stats['query_error']):.1f}%), "
35 |     stats_str += f"Failed: {stats['query_error']} ({percent(stats['query_error'], stats['query_success'] + stats['query_error']):.1f}%), "
36 |     stats_str += f"Peak connections: {peak_connections.value}"
37 |     stats_str += "]"
38 | 
39 |     logger.info(f"{stats_str}")
40 | 
41 | 
42 | def init_stats(stats_dict):
43 |     # init by key to ensure Manager is notified of change, if applicable
44 |     stats_dict["connection_diff_sec"] = 0
45 |     stats_dict["transaction_success"] = 0
46 |     stats_dict["transaction_error"] = 0
47 |     stats_dict["query_success"] = 0
48 |     stats_dict["query_error"] = 0
49 |     stats_dict["connection_error_log"] = {}  # map filename to array of connection errors
50 |     stats_dict["transaction_error_log"] = {}  # map filename to array of transaction errors
51 |     stats_dict["multi_statements"] = 0
52 |     stats_dict["executed_queries"] = 0  # includes multi-statement queries
53 |     return stats_dict
54 | 
55 | 
56 | def collect_stats(aggregated_stats, stats):
57 |     """Aggregate the per-thread stats into the overall stats for this aggregated process"""
58 | 
59 |     if not stats:
60 |         return
61 | 
62 |     # take the maximum absolute connection difference between actual and expected
63 |     if abs(stats["connection_diff_sec"]) >= abs(aggregated_stats.get("connection_diff_sec", 0)):
64 |         aggregated_stats["connection_diff_sec"] = stats["connection_diff_sec"]
65 | 
66 |     # for each aggregated, add up these scalars across all threads
67 |     for stat in (
68 |         "transaction_success",
69 |         "transaction_error",
70 |         "query_success",
71 |         "query_error",
72 |     ):
73 |         aggregated_stats[stat] += stats[stat]
74 | 
75 |     # same for arrays.
76 |     for stat in ("transaction_error_log", "connection_error_log"):
77 |         # note that per the Manager python docs, this extra copy is required to
78 |         # get manager to notice the update
79 |         new_stats = aggregated_stats[stat]
80 |         new_stats.update(stats[stat])
81 |         aggregated_stats[stat] = new_stats
82 | 


--------------------------------------------------------------------------------
/core/replay/summarizer.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging
  3 | import os
  4 | from tqdm import tqdm
  5 | 
  6 | from boto3 import client
  7 | 
  8 | import common.aws_service as aws_service_helper
  9 | 
 10 | logger = logging.getLogger("WorkloadReplicatorLogger")
 11 | 
 12 | 
 13 | def summarize(
 14 |     connection_logs,
 15 |     config,
 16 |     replay_start_timestamp,
 17 |     aggregated_stats,
 18 |     query_count,
 19 |     replay_id,
 20 |     transaction_count,
 21 |     replay_end_time,
 22 | ):
 23 |     replay_summary = []
 24 |     logger.info("Replay summary:")
 25 |     replay_summary.append(
 26 |         f"Attempted to replay {query_count} queries, {transaction_count} transactions, "
 27 |         f"{len(connection_logs)} connections."
 28 |     )
 29 |     try:
 30 |         replay_summary.append(
 31 |             f"Successfully replayed {aggregated_stats.get('transaction_success', 0)} out of {transaction_count} "
 32 |             f"({round((aggregated_stats.get('transaction_success', 0) / transaction_count) * 100)}%) transactions."
 33 |         )
 34 |         replay_summary.append(
 35 |             f"Successfully replayed {aggregated_stats.get('query_success', 0)} out of {query_count} "
 36 |             f"({round((aggregated_stats.get('query_success', 0) / query_count) * 100)}%) queries."
 37 |         )
 38 |     except ZeroDivisionError:
 39 |         pass
 40 |     error_location = config.get("error_location", config["workload_location"])
 41 |     replay_summary.append(
 42 |         f"Encountered {len(aggregated_stats['connection_error_log'])} "
 43 |         f"connection errors and {len(aggregated_stats['transaction_error_log'])} transaction errors"
 44 |     )
 45 |     # and save them
 46 |     export_errors(
 47 |         aggregated_stats["connection_error_log"],
 48 |         aggregated_stats["transaction_error_log"],
 49 |         error_location,
 50 |         replay_id,
 51 |     )
 52 |     replay_summary.append(f"Replay finished in {replay_end_time - replay_start_timestamp}.")
 53 |     for line in replay_summary:
 54 |         logger.info(line)
 55 |     logger.info(
 56 |         f"Replay finished in {datetime.datetime.now(tz=datetime.timezone.utc) - replay_start_timestamp}."
 57 |     )
 58 |     return replay_summary
 59 | 
 60 | 
 61 | def export_errors(connection_errors, transaction_errors, workload_location, replay_name):
 62 |     """Save any errors that occurred during replay to a local directory or s3"""
 63 | 
 64 |     if len(connection_errors) == len(transaction_errors) == 0:
 65 |         logger.info("No errors, nothing to save")
 66 |         return
 67 | 
 68 |     logger.info(
 69 |         f"Saving {len(connection_errors)} connection errors, {len(transaction_errors)} transaction_errors"
 70 |     )
 71 | 
 72 |     connection_error_location = workload_location + "/" + replay_name + "/connection_errors"
 73 |     transaction_error_location = workload_location + "/" + replay_name + "/transaction_errors"
 74 | 
 75 | 
 76 | 
 77 |     if workload_location.startswith("s3://"):
 78 |         workload_s3_location = workload_location[5:].partition("/")
 79 |         bucket_name = workload_s3_location[0]
 80 |         prefix = workload_s3_location[2]
 81 |         s3_client = client("s3")
 82 |     else:
 83 |         os.makedirs(connection_error_location)
 84 |         os.makedirs(transaction_error_location)
 85 | 
 86 |     logger.info(f"Exporting connection errors to {connection_error_location}/")
 87 |     bar_format = "{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} [{elapsed}{postfix}]"
 88 |     for filename, connection_error_text in tqdm(
 89 |             connection_errors.items(),
 90 |             disable=False,
 91 |             unit="files",
 92 |             desc="Files processed",
 93 |             bar_format=bar_format,
 94 |         ):
 95 |     
 96 |         if workload_location.startswith("s3://"):
 97 |             if prefix:
 98 |                 key_loc = "%s/%s/connection_errors/%s.txt" % (
 99 |                     prefix,
100 |                     replay_name,
101 |                     filename,
102 |                 )
103 |             else:
104 |                 key_loc = "%s/connection_errors/%s.txt" % (replay_name, filename)
105 |             aws_service_helper.s3_put_object(connection_error_text,bucket_name, key_loc)
106 |         else:
107 |             error_file = open(connection_error_location + "/" + filename + ".txt", "w")
108 |             error_file.write(connection_error_text)
109 |             error_file.close()
110 | 
111 |     logger.info(f"Exporting transaction errors to {transaction_error_location}/")
112 |     for filename, transaction_errors in tqdm(
113 |             transaction_errors.items(),
114 |             disable=False,
115 |             unit="files",
116 |             desc="Files processed",
117 |             bar_format=bar_format,
118 |         ):
119 |         error_file_text = ""
120 |         for transaction_error in transaction_errors:
121 |             error_file_text += f"{transaction_error[0]}\n{transaction_error[1]}\n\n"
122 | 
123 |         if workload_location.startswith("s3://"):
124 |             if prefix:
125 |                 key_loc = "%s/%s/transaction_errors/%s.txt" % (
126 |                     prefix,
127 |                     replay_name,
128 |                     filename,
129 |                 )
130 |             else:
131 |                 key_loc = "%s/transaction_errors/%s.txt" % (replay_name, filename)
132 |             s3_client.put_object(
133 |                 Body=error_file_text,
134 |                 Bucket=bucket_name,
135 |                 Key=key_loc,
136 |             )
137 |         else:
138 |             error_file = open(transaction_error_location + "/" + filename + ".txt", "w")
139 |             error_file.write(error_file_text)
140 |             error_file.close()
141 | 


--------------------------------------------------------------------------------
/core/replay/unload_sys_table.py:
--------------------------------------------------------------------------------
 1 | from core.replay.prep import ReplayPrep
 2 | from common.util import db_connect
 3 | import re
 4 | import logging
 5 | 
 6 | logger = logging.getLogger("WorkloadReplicatorLogger")
 7 | 
 8 | 
 9 | class UnloadSysTable:
10 |     def __init__(self, config, replay_id):
11 |         self.config = config
12 |         self.default_interface = config["default_interface"]
13 |         self.unload_system_table_queries_file = config["unload_system_table_queries"]
14 |         self.unload_location = config["replay_output"] + "/" + replay_id
15 |         self.unload_iam_role = config["target_cluster_system_table_unload_iam_role"]
16 | 
17 |     def unload_system_table(self):
18 |         # TODO: wrap this in retries and proper logging
19 |         prep = ReplayPrep(self.config)
20 |         credentials = prep.get_connection_credentials(self.config["master_username"])
21 |         try:
22 |             conn = db_connect(
23 |                 self.default_interface,
24 |                 host=credentials["host"],
25 |                 port=int(credentials["port"]),
26 |                 username=credentials["username"],
27 |                 password=credentials["password"],
28 |                 database=credentials["database"],
29 |                 odbc_driver=credentials["odbc_driver"],
30 |             )
31 |         except Exception as e:
32 |             logger.debug(f"Unable to connect: {e}", exc_info=True)
33 |         unload_queries = {}
34 |         table_name = ""
35 |         query_text = ""
36 |         for line in open(self.unload_system_table_queries_file, "r"):
37 |             if line.startswith("--"):
38 |                 unload_queries[table_name] = query_text.strip("\n")
39 |                 table_name = line[2:].strip("\n")
40 |                 query_text = ""
41 |             else:
42 |                 query_text += line
43 | 
44 |         unload_queries[table_name] = query_text.strip("\n")
45 |         del unload_queries[""]
46 | 
47 |         cursor = conn.cursor()
48 |         for table_name, unload_query in unload_queries.items():
49 |             if table_name and unload_query:
50 |                 unload_query = re.sub(
51 |                     r"to ''",
52 |                     f"TO '{self.unload_location}/system_tables/{table_name}/'",
53 |                     unload_query,
54 |                     flags=re.IGNORECASE,
55 |                 )
56 |                 unload_query = re.sub(
57 |                     r"credentials ''",
58 |                     f"CREDENTIALS 'aws_iam_role={self.unload_iam_role}'",
59 |                     unload_query,
60 |                     flags=re.IGNORECASE,
61 |                 )
62 |                 try:
63 |                     cursor.execute(unload_query)
64 |                 except Exception as e:
65 |                     logger.error(f"Failed to unload query. {e}")
66 |                 logger.debug(f"Executed unload query: {table_name}")
67 | 


--------------------------------------------------------------------------------
/core/replay/unload_system_tables.sql:
--------------------------------------------------------------------------------
 1 | --SVL_STATEMENTTEXT
 2 | UNLOAD ('SELECT * FROM SVL_STATEMENTTEXT WHERE userid>1') TO '' CREDENTIALS '';
 3 | --STL_Query
 4 | UNLOAD ('SELECT * FROM STL_QUERY WHERE userid>1') TO '' CREDENTIALS '';
 5 | --STL_WLM_QUERY
 6 | UNLOAD ('SELECT * FROM STL_WLM_QUERY WHERE userid>1') TO '' CREDENTIALS '';
 7 | 
 8 | --stl_wlm_service_class_config
 9 | --UNLOAD ('SELECT * FROM stl_wlm_service_class_config') TO '' CREDENTIALS '';
10 | 
11 | --stv_wlm_qmr_config
12 | UNLOAD ('SELECT * FROM stv_wlm_qmr_config') TO '' CREDENTIALS '';
13 | 
14 | --stv_wlm_query_queue_state
15 | UNLOAD ('SELECT * FROM stv_wlm_query_queue_state') TO '' CREDENTIALS '';
16 | --stv_wlm_query_state
17 | UNLOAD ('SELECT * FROM stv_wlm_query_state') TO '' CREDENTIALS '';
18 | --stl_connection_log
19 | UNLOAD ('SELECT * FROM stl_connection_log') TO '' CREDENTIALS '';
20 | --stl_compile_info
21 | --UNLOAD ('SELECT * FROM stl_compile_info WHERE userid>1') TO '' CREDENTIALS '';
22 | --stl_catalog_bloat
23 | --UNLOAD ('SELECT * FROM stl_catalog_bloat WHERE userid>1') TO '' CREDENTIALS '';
24 | --stl_catalog_rebuild_info
25 | --UNLOAD ('SELECT * FROM stl_catalog_rebuild_info') TO '' CREDENTIALS '';
26 | --stl_query_metrics
27 | UNLOAD ('SELECT * FROM stl_query_metrics WHERE userid>1') TO '' CREDENTIALS '';
28 | --svl_query_summary
29 | UNLOAD ('SELECT * FROM svl_query_summary WHERE userid>1') TO '' CREDENTIALS '';
30 | --svl_query_report
31 | UNLOAD ('SELECT * FROM svl_query_report WHERE userid>1') TO '' CREDENTIALS '';
32 | --stl_vacuum
33 | UNLOAD ('SELECT * FROM stl_vacuum WHERE userid>1') TO '' CREDENTIALS '';
34 | 
35 | --stl_s3client
36 | 
37 | UNLOAD ('SELECT * FROM stl_s3client WHERE userid>1') TO '' CREDENTIALS '';
38 | 
39 | --stl_tiered_storage_s3_blocks
40 | --UNLOAD ('SELECT * FROM stl_tiered_storage_s3_blocks') TO '' CREDENTIALS '';
41 | 
42 | 
43 | --stl_commit_stats
44 | UNLOAD ('SELECT * FROM stl_commit_stats') TO '' CREDENTIALS '';
45 | 
46 | --svl_query_metrics_summary
47 | UNLOAD ('SELECT * FROM svl_query_metrics_summary') TO '' CREDENTIALS '';
48 | 


--------------------------------------------------------------------------------
/core/sql/aborted_queries.sql:
--------------------------------------------------------------------------------
 1 | /*AbortedQueries*/
 2 | CREATE TEMP TABLE aborted_queries AS (
 3 |     SELECT q.user_id                                                                       as "userid"
 4 |          , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue"
 5 |          , date_trunc('hour', q.start_time)                                                as "period"
 6 |          , q.transaction_id                                                                as "xid"
 7 |          , q.query_id                                                                      as "query"
 8 |          , q.query_text::char(50)                                                          as "querytxt"
 9 |          , q.queue_time / 1000000.00                                                       as "queue_s"
10 |          , q.execution_time / 1000000.00                                                   as "exec_time_s"     -- This includes compile time. Differs in behavior from provisioned metric
11 |          , case when q.status = 'failed' then 1 else 0 end                                    "aborted"
12 |          , q.elapsed_time / 1000000.00                                                     as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric
13 |     FROM sys_query_history q
14 |     WHERE q.user_id > 1
15 |       AND q.start_time >={{START_TIME}}
16 |       AND q.start_time <={{END_TIME}}
17 |       AND q.query_text LIKE '%replay_start%'
18 |       AND q.status = 'failed'
19 | );
20 | 
21 | SELECT a.userid,
22 |         b.usename,
23 |         a.queue,
24 |         a.period,
25 |         a.xid,
26 |         a.query,
27 |         a.querytxt,
28 |         a.queue_s,
29 |         a.exec_time_s,
30 |         a.aborted,
31 |         a.total_elapsed_s
32 | FROM aborted_queries a
33 |   LEFT JOIN pg_user b ON a.userid = b.usesysid
34 | ORDER BY a.total_elapsed_s DESC;
35 | 


--------------------------------------------------------------------------------
/core/sql/cluster_level_metrics.sql:
--------------------------------------------------------------------------------
  1 | /*ClusterLevelMetrics*/
  2 | WITH queries AS
  3 |          (
  4 |              select q.user_id                        as             "userid"
  5 |                   , date_trunc('hour', q.start_time) as             "period"
  6 |                   , q.transaction_id                 as             "xid"
  7 |                   , q.query_id                       as             "query"
  8 |                   , q.query_text::char(50)           as             "querytxt"
  9 |                   , q.queue_time / 1000000.00        as             "queue_s"
 10 |                   , q.execution_time / 1000000.00    as             "exec_time_s"     -- This includes compile time. Differs in behavior from provisioned metric
 11 |                   , case when q.status = 'failed' then 1 else 0 end "aborted"
 12 |                   , q.elapsed_time / 1000000.00      as             "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric
 13 |              FROM sys_query_history q
 14 |              WHERE q.user_id > 1
 15 |                AND q.start_time >= {{START_TIME}}
 16 |                AND q.start_time <= {{END_TIME}}
 17 |                AND q.query_text LIKE '%replay_start%'
 18 |                AND q.status != 'failed'
 19 |          ),
 20 |      elapsed_time AS
 21 |          (
 22 |              SELECT 'Query Latency'                                                         AS measure_type,
 23 |                     COUNT(*)                                                                AS query_count,
 24 |                     ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p25_s,
 25 |                     ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p50_s,
 26 |                     ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p75_s,
 27 |                     ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p90_s,
 28 |                     ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p95_s,
 29 |                     ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p99_s,
 30 |                     MAX(total_elapsed_s)                                                    AS max_s,
 31 |                     AVG(total_elapsed_s)                                                    AS avg_s,
 32 |                     stddev(total_elapsed_s)                                                 AS std_s
 33 |              FROM queries
 34 |              GROUP BY 1
 35 |          ),
 36 |      exec_time AS
 37 |          (
 38 |              SELECT 'Execution Time'                                                    AS measure_type,
 39 |                     COUNT(*)                                                            AS query_count,
 40 |                     ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p25_s,
 41 |                     ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p50_s,
 42 |                     ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p75_s,
 43 |                     ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p90_s,
 44 |                     ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p95_s,
 45 |                     ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p99_s,
 46 |                     MAX(exec_time_s)                                                    AS max_s,
 47 |                     AVG(exec_time_s)                                                    AS avg_s,
 48 |                     stddev(exec_time_s)                                                 AS std_s
 49 |              FROM queries
 50 |              GROUP BY 1
 51 |          ),
 52 |      queue_time AS
 53 |          (
 54 |              SELECT 'Queue Time'                                                    AS measure_type,
 55 |                     COUNT(*)                                                        AS query_count,
 56 |                     ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY queue_s), 2) AS p25_s,
 57 |                     ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY queue_s), 2) AS p50_s,
 58 |                     ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY queue_s), 2) AS p75_s,
 59 |                     ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY queue_s), 2) AS p90_s,
 60 |                     ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY queue_s), 2) AS p95_s,
 61 |                     ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY queue_s), 2) AS p99_s,
 62 |                     MAX(queue_s)                                                    AS max_s,
 63 |                     AVG(queue_s)                                                    AS avg_s,
 64 |                     stddev(queue_s)                                                 AS std_s
 65 |              FROM queries
 66 |              GROUP BY 1
 67 |          )
 68 | SELECT measure_type,
 69 |        query_count,
 70 |        p25_s,
 71 |        p50_s,
 72 |        p75_s,
 73 |        p90_s,
 74 |        p95_s,
 75 |        p99_s,
 76 |        max_s,
 77 |        avg_s,
 78 |        std_s
 79 | FROM exec_time
 80 | UNION ALL
 81 | SELECT measure_type,
 82 |        query_count,
 83 |        p25_s,
 84 |        p50_s,
 85 |        p75_s,
 86 |        p90_s,
 87 |        p95_s,
 88 |        p99_s,
 89 |        max_s,
 90 |        avg_s,
 91 |        std_s
 92 | FROM queue_time
 93 | UNION ALL
 94 | SELECT measure_type,
 95 |        query_count,
 96 |        p25_s,
 97 |        p50_s,
 98 |        p75_s,
 99 |        p90_s,
100 |        p95_s,
101 |        p99_s,
102 |        max_s,
103 |        avg_s,
104 |        std_s
105 | FROM elapsed_time
106 | ORDER BY 1;


--------------------------------------------------------------------------------
/core/sql/latency_distribution.sql:
--------------------------------------------------------------------------------
 1 | /*LatencyDistribution*/
 2 | WITH queries AS
 3 |          (
 4 |              SELECT q.query_id
 5 |                   , q.elapsed_time / 1000000.00 as total_elapsed_s
 6 |              FROM sys_query_history q
 7 |              WHERE q.user_id > 1
 8 |                AND q.start_time >= {{START_TIME}}
 9 |                AND q.start_time <= {{END_TIME}}
10 |                AND q.query_text LIKE '%replay_start%'
11 |                AND status != 'failed'
12 |          )
13 |         ,
14 |      pct AS
15 |          (
16 |              SELECT ROUND(PERCENTILE_CONT(0.98) WITHIN GROUP (ORDER BY q1.total_elapsed_s), 2) AS  p98_s,
17 |                     COUNT(*)                                                                   AS  query_count,
18 |                     MAX(q1.total_elapsed_s)                                                        max_s,
19 |                     MIN(q1.total_elapsed_s)                                                        min_s,
20 |                     MIN(CASE WHEN q1.total_elapsed_s = 0.00 THEN NULL ELSE q1.total_elapsed_s END) min_2s
21 |              FROM queries q1
22 |          ),
23 |      bucket_count AS
24 |          (
25 |              SELECT CASE
26 |                         WHEN query_count > 100 THEN 40
27 |                         ELSE 5
28 |                         END AS b_count
29 |              FROM pct
30 |          ),
31 |      buckets AS
32 |          (
33 |              SELECT (min_2s + ((n) * (p98_s / b_count)))     AS sec_end,
34 |                     n,
35 |                     (min_2s + ((n - 1) * (p98_s / b_count))) AS sec_start
36 |              FROM (SELECT ROW_NUMBER() OVER () n FROM pg_class LIMIT 39),
37 |                   bucket_count,
38 |                   pct
39 |              WHERE sec_end <= p98_s
40 |              UNION ALL
41 |              SELECT min_2s AS sec_end,
42 |                     0      AS n,
43 |                     0.00   AS sec_start
44 |              FROM pct
45 |              UNION ALL
46 |              SELECT (max_s + 0.01) AS sec_end,
47 |                     b_count        AS n,
48 |                     p98_s          AS sec_start
49 |              FROM pct,
50 |                   bucket_count
51 |          )
52 | SELECT sec_end,
53 |        n,
54 |        sec_start,
55 |        COUNT(query_id)
56 | FROM buckets
57 |          LEFT JOIN queries
58 |                    ON total_elapsed_s >= sec_start
59 |                        AND total_elapsed_s < sec_end
60 | GROUP BY 1,
61 |          2,
62 |          3
63 | ORDER BY 2;


--------------------------------------------------------------------------------
/core/sql/query_metrics.sql:
--------------------------------------------------------------------------------
 1 | /*QueryMetrics*/
 2 | CREATE TEMP TABLE query_metrics AS (
 3 |     select q.user_id                                                                       as "userid"
 4 |          , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue"
 5 |          , date_trunc('hour', q.start_time)                                                as "period"
 6 |          , q.transaction_id                                                                as "xid"
 7 |          , q.query_id                                                                      as "query"
 8 |          , q.query_text::char(50)                                                          as "querytxt"
 9 |          , q.queue_time / 1000000.00                                                       as "queue_s"
10 |          , q.execution_time / 1000000.00                                                   as "exec_time_s"     -- This includes compile time. Differs in behavior from provisioned metric
11 |          , case when q.status = 'failed' then 1 else 0 end                                    "aborted"
12 |          , q.elapsed_time / 1000000.00                                                     as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric
13 |     FROM sys_query_history q
14 |     WHERE q.user_id > 1
15 |       AND q.start_time >={{START_TIME}}
16 |       AND q.start_time <={{END_TIME}}
17 |       AND q.query_text LIKE '%replay_start%'
18 |       AND q.status != 'failed'
19 | );
20 | 
21 | SELECT a.userid,
22 |        u.usename,
23 |        a.queue,
24 |        a.period,
25 |        a.xid,
26 |        a.query,
27 |        a.querytxt,
28 |        a.queue_s,
29 |        a.exec_time_s,
30 |        a.aborted,
31 |        a.total_elapsed_s
32 | FROM query_metrics a
33 |      LEFT JOIN pg_user u on a.userid = u.usesysid;
34 | 


--------------------------------------------------------------------------------
/core/sql/statement_types.sql:
--------------------------------------------------------------------------------
 1 | /*StatementTypes*/
 2 | SELECT CASE
 3 |            WHEN REGEXP_INSTR("query_text", '(padb_|pg_internal)')
 4 |                THEN 'SYSTEM'
 5 |            WHEN query_type = 'DELETE'
 6 |                THEN 'DELETE'
 7 |            WHEN query_type = 'COPY'
 8 |                THEN 'COPY'
 9 |            WHEN query_type = 'UPDATE'
10 |                THEN 'UPDATE'
11 |            WHEN query_type = 'INSERT'
12 |                THEN 'INSERT'
13 |            WHEN query_type = 'SELECT'
14 |                THEN 'SELECT'
15 |            WHEN query_type = 'UNLOAD'
16 |                THEN 'UNLOAD'
17 |            WHEN query_type = 'DDL'
18 |                THEN 'DDL'
19 |            WHEN query_type = 'UTILITY'
20 |                THEN CASE
21 |                         WHEN REGEXP_INSTR("query_text", '[vV][aA][cC][uU][uU][mM][ :]')
22 |                             THEN 'VACUUM'
23 |                         WHEN REGEXP_INSTR("query_text", '[rR][oO][lL][lL][bB][aA][cC][kK] ')
24 |                             THEN 'ROLLBACK'
25 |                         WHEN REGEXP_INSTR("query_text", '[fF][eE][tT][cC][hH] ')
26 |                             THEN 'FETCH'
27 |                         WHEN REGEXP_INSTR("query_text", '[cC][uU][rR][sS][oO][rR] ')
28 |                             THEN 'CURSOR'
29 |                         ELSE 'UTILITY'
30 |                END
31 |            ELSE 'OTHER'
32 |     END            statement_type
33 |      , COUNT(CASE
34 |                  WHEN status = 'failed'
35 |                      THEN 1
36 |     END)        AS aborted
37 |      , COUNT(*) AS total_count
38 | FROM sys_query_history
39 | WHERE user_id > 1
40 |   AND query_text LIKE '%replay_start%'
41 |   AND start_time >= {{START_TIME}}
42 |   AND start_time <= {{END_TIME}}
43 | GROUP BY
44 |     1
45 | ORDER BY
46 |     2 DESC;


--------------------------------------------------------------------------------
/core/sql/sys_external_query_data.sql:
--------------------------------------------------------------------------------
 1 | /*SysExternalQueryData*/
 2 | SELECT user_id,
 3 |        query_id,
 4 |        child_query_sequence,
 5 |        transaction_id,
 6 |        segment_id,
 7 |        source_type,
 8 |        start_time,
 9 |        end_time,
10 |        duration,
11 |        total_partitions,
12 |        qualified_partitions,
13 |        scanned_files,
14 |        returned_rows,
15 |        returned_bytes,
16 |        file_format,
17 |        file_location,
18 |        external_query_text
19 | from SYS_EXTERNAL_QUERY_DETAIL
20 | WHERE user_id > 1
21 |   AND start_time >= {{START_TIME}}
22 |   AND start_time <= {{END_TIME}};


--------------------------------------------------------------------------------
/core/sql/sys_load_history.sql:
--------------------------------------------------------------------------------
 1 | /*SysLoadHistory*/
 2 | SELECT user_id,
 3 |        query_id,
 4 |        status,
 5 |        session_id,
 6 |        transaction_id,
 7 |        database_name,
 8 |        table_name,
 9 |        start_time,
10 |        end_time,
11 |        duration,
12 |        data_source,
13 |        loaded_rows,
14 |        loaded_bytes,
15 |        source_file_count,
16 |        source_file_bytes,
17 |        error_count
18 | from SYS_LOAD_HISTORY
19 | WHERE user_id > 1
20 |   AND start_time >= {{START_TIME}}
21 |   AND start_time <= {{END_TIME}};
22 | 


--------------------------------------------------------------------------------
/core/sql/sys_query_history.sql:
--------------------------------------------------------------------------------
 1 | /*SysQueryHistory*/
 2 | SELECT h.user_id,
 3 |        u.usename as user_name,
 4 |        query_id,
 5 |        transaction_id,
 6 |        session_id,
 7 |        database_name,
 8 |        start_time,
 9 |        end_time,
10 |        elapsed_time,
11 |        status,
12 |        result_cache_hit,
13 |        queue_time,
14 |        execution_time,
15 |        query_text,
16 |        query_label,
17 |        query_type,
18 |        error_message,
19 |        returned_rows,
20 |        returned_bytes,
21 |        redshift_version
22 | from sys_query_history h
23 | LEFT JOIN pg_user u on h.user_id = u.usesysid
24 | WHERE user_id > 1
25 |   AND start_time >= {{START_TIME}}
26 |   AND start_time <= {{END_TIME}};


--------------------------------------------------------------------------------
/core/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/core/tests/__init__.py


--------------------------------------------------------------------------------
/core/tests/test_cloudwatch_extractor.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch, mock_open
 3 | 
 4 | from core.extract.cloudwatch_extractor import CloudwatchExtractor
 5 | 
 6 | 
 7 | def mock_cw_describe_log_groups(log_group_name=None, region=""):
 8 |     return {"logGroups": [{"logGroupName": "useractivitylog"}]}
 9 | 
10 | 
11 | def mock_cw_describe_log_streams(log_group_name, region):
12 |     return {
13 |         "logStreams": [
14 |             {"logStreamName": "redshift-serverless.test.us-east-1"},
15 |         ]
16 |     }
17 | 
18 | 
19 | def mock_cw_get_paginated_logs(log_group_name, stream_name, start_time, end_time, region):
20 |     return []
21 | 
22 | 
23 | def mock_s3_upload():
24 |     return ""
25 | 
26 | 
27 | def mock_parse_log():
28 |     return
29 | 
30 | 
31 | class CloudwatchExtractorTestCases(unittest.TestCase):
32 |     @patch("common.aws_service.cw_describe_log_groups", mock_cw_describe_log_groups)
33 |     @patch("common.aws_service.cw_describe_log_streams", mock_cw_describe_log_streams)
34 |     @patch.object(CloudwatchExtractor, "_read_cloudwatch_logs")
35 |     def test_get_extract_from_cw_source_cluster_endpoint_specified(
36 |         self, mock_read_cloudwatch_logs
37 |     ):
38 |         cw_extractor = CloudwatchExtractor(
39 |             {
40 |                 "source_cluster_endpoint": "redshift-serverless.test.us-east-1",
41 |                 "workload_location": "s3://test/t",
42 |             }
43 |         )
44 |         cw_extractor.get_extract_from_cloudwatch("2021-08-15T15:50", "2021-08-15T18:55")
45 |         mock_read_cloudwatch_logs.assert_called()
46 | 
47 |     @patch("common.aws_service.cw_describe_log_groups", mock_cw_describe_log_groups)
48 |     @patch("common.aws_service.cw_describe_log_streams", mock_cw_describe_log_streams)
49 |     @patch.object(CloudwatchExtractor, "_read_cloudwatch_logs")
50 |     def test_get_extract_from_cw_source_cluster_endpoint_not_specified(
51 |         self, mock_read_cloudwatch_logs
52 |     ):
53 |         cw_extractor = CloudwatchExtractor({"log_location": "/aws/logs/"})
54 |         cw_extractor.get_extract_from_cloudwatch("2021-08-15T15:50", "2021-08-15T18:55")
55 |         mock_read_cloudwatch_logs.assert_called()
56 | 
57 |     def test_get_extract_from_cw_error(self):
58 |         cw_extractor = CloudwatchExtractor({})
59 |         with self.assertRaises(SystemExit):
60 |             cw_extractor.get_extract_from_cloudwatch("2021-08-15T15:50", "2021-08-15T18:55")
61 | 
62 |     @patch("core.extract.cloudwatch_extractor.parse_log")
63 |     @patch("gzip.open", mock_open())
64 |     @patch("tempfile.TemporaryDirectory")
65 |     @patch("common.aws_service.cw_get_paginated_logs", mock_cw_get_paginated_logs)
66 |     @patch("common.aws_service.cw_describe_log_streams", mock_cw_describe_log_streams)
67 |     def test_read_cloudwatch_logs_success(self, mock_tmp_dir, mock_parse_log):
68 |         cw_extractor = CloudwatchExtractor({})
69 |         response = {
70 |             "logGroups": [{"logGroupName": "useractivitylog"}, {"logGroupName": "connectionlog"}]
71 |         }
72 |         cw_extractor._read_cloudwatch_logs(
73 |             response,
74 |             "redshift-serverless.test.us-east-1",
75 |             "2021-08-15T15:50",
76 |             "2021-08-15T18:55",
77 |             "us-east-1",
78 |         )
79 |         self.assertEqual(mock_parse_log.call_count, 2)
80 | 
81 | 
82 | if __name__ == "__main__":
83 |     unittest.main()
84 | 


--------------------------------------------------------------------------------
/core/tests/test_connections_parser.py:
--------------------------------------------------------------------------------
  1 | from unittest.mock import patch, mock_open
  2 | import unittest
  3 | from core.replay.connections_parser import parse_connections
  4 | 
  5 | time_interval_between_transactions = "all on"
  6 | time_interval_between_queries = "all on"
  7 | filters = {
  8 |     "include": {"database_name": ["*"], "username": ["*"], "pid": ["*"]},
  9 |     "exclude": {"database_name": [], "username": [], "pid": []},
 10 | }
 11 | 
 12 | open_mock_1 = mock_open(
 13 |     read_data=(
 14 |         """[{
 15 |   "session_initiation_time": "2023-01-09 15:48:15.313000+00:00",
 16 |   "disconnection_time": "2023-01-09 15:48:15.872000+00:00",
 17 |   "database_name": "dev",
 18 |   "username": "awsuser",
 19 |   "pid": "1073815778",
 20 |    "application_name": "",
 21 |   "time_interval_between_transactions": "True",
 22 |   "time_interval_between_queries": "transaction"
 23 | }]"""
 24 |     )
 25 | )
 26 | 
 27 | open_mock_2 = mock_open(
 28 |     read_data=(
 29 |         """[{
 30 |                 "session_initiation_time": "",
 31 |                 "disconnection_time": "2023-01-09 15:48:15.872000+00:00",
 32 |                 "database_name": "dev",
 33 |                 "username": "awsuser",
 34 |                 "pid": "1073815778",
 35 |                 "application_name": "",
 36 |                 "time_interval_between_transactions": "True",
 37 |                 "time_interval_between_queries": "transaction"
 38 |             }]"""
 39 |     )
 40 | )
 41 | 
 42 | open_mock_3 = mock_open(
 43 |     read_data=(
 44 |         """
 45 |         [
 46 |             {
 47 |                 "session_initiation_time": "2023-01-09 15:48:15.313000+00:00",
 48 |                 "disconnection_time": "",
 49 |                 "database_name": "dev",
 50 |                 "username": "awsuser",
 51 |                 "pid": "1073815778",
 52 |                 "application_name": "",
 53 |                 "time_interval_between_transactions": "True",
 54 |                 "time_interval_between_queries": "transaction"
 55 |             }
 56 |         ]
 57 |     """
 58 |     )
 59 | )
 60 | 
 61 | open_mock_4 = mock_open(
 62 |     read_data=(
 63 |         """
 64 |         [
 65 |     {
 66 |        "session_initiation_time_error": "2023-01-09 15:48:15.313000+00:00",
 67 |        "disconnection_time_error": "2023-01-09 15:48:15.872000+00:00",
 68 |        "database_name": "dev",
 69 |        "username": "awsuser",
 70 |        "pid": "1073815778",
 71 |        "application_name": "",
 72 |        "time_interval_between_transactions": true,
 73 |        "time_interval_between_queries": "transaction"
 74 |     }
 75 |  ]
 76 |     """
 77 |     )
 78 | )
 79 | 
 80 | 
 81 | class TestConnectionsParser(unittest.TestCase):
 82 |     @patch("core.replay.connections_parser.client")
 83 |     @patch("core.replay.connections_parser.json")
 84 |     def test_parse_connections(self, mock_json, mock_client):
 85 |         workload_directory = (
 86 |             "s3://test/extracts/Edited_Extraction_2023-01-23T09:46:24.784062+00:00"
 87 |         )
 88 |         mock_json.loads.return_value = [
 89 |             {
 90 |                 "session_initiation_time": "2023-01-09 15:48:15.313000+00:00",
 91 |                 "disconnection_time": "2023-01-09 15:48:15.872000+00:00",
 92 |                 "database_name": "dev",
 93 |                 "username": "awsuser",
 94 |                 "pid": "1073815778",
 95 |                 "application_name": "",
 96 |                 "time_interval_between_transactions": True,
 97 |                 "time_interval_between_queries": "transaction",
 98 |             }
 99 |         ]
100 |         mock_client.get_object.return_value = mock_json
101 | 
102 |         connections, total_connections = parse_connections(
103 |             workload_directory,
104 |             time_interval_between_transactions,
105 |             time_interval_between_queries,
106 |             filters,
107 |         )
108 |         self.assertEqual(connections[0].pid, "1073815778")
109 |         self.assertEqual(total_connections, 1)
110 | 
111 |     @patch("core.replay.connections_parser.open", open_mock_1)
112 |     def test_parse_connections_s3_location(self):
113 |         workload_directory = "testdata/testlocation"
114 | 
115 |         connections, total_connections = parse_connections(
116 |             workload_directory,
117 |             time_interval_between_transactions,
118 |             time_interval_between_queries,
119 |             filters,
120 |         )
121 |         self.assertEqual(connections[0].pid, "1073815778")
122 |         self.assertEqual(total_connections, 1)
123 | 
124 |     @patch("core.replay.connections_parser.open", open_mock_2)
125 |     def test_parse_connections_initiation_time(self):
126 |         workload_directory = "testdata/testlocation"
127 | 
128 |         connections, total_connections = parse_connections(
129 |             workload_directory,
130 |             time_interval_between_transactions,
131 |             time_interval_between_queries,
132 |             filters,
133 |         )
134 |         self.assertEqual(connections[0].session_initiation_time, None)
135 |         self.assertEqual(total_connections, 1)
136 | 
137 |     @patch("core.replay.connections_parser.open", open_mock_3)
138 |     def test_parse_connections_disconnection_time(self):
139 |         workload_directory = "testdata/testlocation"
140 | 
141 |         connections, total_connections = parse_connections(
142 |             workload_directory,
143 |             time_interval_between_transactions,
144 |             time_interval_between_queries,
145 |             filters,
146 |         )
147 |         self.assertEqual(connections[0].disconnection_time, None)
148 |         self.assertEqual(total_connections, 1)
149 | 
150 |     @patch("core.replay.connections_parser.open", open_mock_4)
151 |     def test_parse_connections_except_case(self):
152 |         workload_directory = "testdata/testlocation"
153 | 
154 |         connections, total_connections = parse_connections(
155 |             workload_directory,
156 |             time_interval_between_transactions,
157 |             time_interval_between_queries,
158 |             filters,
159 |         )
160 |         self.assertEqual(total_connections, 0)
161 | 


--------------------------------------------------------------------------------
/core/tests/test_copy_replacements_parser.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch, Mock, mock_open
 3 | 
 4 | from core.replay.copy_replacements_parser import parse_copy_replacements
 5 | 
 6 | 
 7 | class CopyReplacementsParserTests(unittest.TestCase):
 8 |     @patch("common.aws_service.s3_client_get_object")
 9 |     def test_parse_copy_replacements_s3(self, patched_get_object):
10 |         workload_directory = "s3://test-bucket/test-folder/prefix"
11 |         mock_body = Mock()
12 |         mock_body.read.return_value = "HeaderA,HeaderB,HeaderC\nA,B,C".encode("utf-8")
13 |         patched_get_object.return_value = {"Body": mock_body}
14 |         result = parse_copy_replacements(workload_directory)
15 | 
16 |         self.assertEqual(result, {"A": ["B", "C"]})
17 | 
18 |     def test_parse_copy_replacements_local(self):
19 |         with patch(
20 |             "builtins.open", mock_open(read_data="HeaderA,HeaderB,HeaderC\nA,B,C")
21 |         ) as patched_open:
22 |             result = parse_copy_replacements("/tmp")
23 | 
24 |         self.assertEqual(result, {"A": ["B", "C"]})
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     unittest.main()
29 | 


--------------------------------------------------------------------------------
/core/tests/test_extract_parser.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, timezone
 2 | from unittest import TestCase
 3 | from unittest.mock import Mock
 4 | 
 5 | from core.extract import extract_parser
 6 | 
 7 | from core.replay.connections_parser import ConnectionLog
 8 | 
 9 | pid = "12324"
10 | xid = "123142412"
11 | start_time = datetime.fromisoformat("2023-01-01T00:00:00").replace(tzinfo=timezone.utc)
12 | end_time = datetime.fromisoformat("2023-02-01T00:00:00").replace(tzinfo=timezone.utc)
13 | 
14 | 
15 | class ExtractParserTestCases(TestCase):
16 |     def test_parse_log_useractivitylog(self):
17 |         mock_file = Mock()
18 |         mock_file.readlines.return_value = [
19 |             # valid log line
20 |             f"'2023-01-01T00:00:00Z UTC [ db=testdb user=testuser pid={pid} userid=4 xid={xid} ]' LOG: SELECT * FROM TEST_TABLE LIMIT 10;".format(
21 |                 pid, xid
22 |             ).encode(),
23 |             # invalid log line
24 |             f"'2023-01-01T01:00:00Z UTC [ db=testdb user=testuser pid={pid} userid=4 xid={xid} ]' LOG: call test.set($1, $2);".format(
25 |                 pid, xid
26 |             ).encode(),
27 |         ]
28 |         logs = {}
29 |         extract_parser.parse_log(
30 |             mock_file, "useractivitylog", {}, {}, logs, set(), start_time, end_time
31 |         )
32 |         self.assertEqual(len(logs), 1)
33 |         for key, value in logs.items():
34 |             self.assertEqual(len(value), 1)
35 |             log = value[0]
36 |             self.assertEqual(log.xid, xid)
37 |             self.assertEqual(log.pid, pid)
38 |             self.assertEqual(log.text, "SELECT * FROM TEST_TABLE LIMIT 10;")
39 | 
40 |     def test_parse_log_connectionlog(self):
41 |         mock_file = Mock()
42 |         set_application_name_line = f"set application_name |Sun, 01 Jan 2023 01:05:07:124|[local] |{xid} |{pid}|testdb |testuser |test |12312|TLSv1.2 |TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 |0| | | |JDBC-1.2.54.1082 |Linux 5.4.0-1086-aws amd64 |Amazon Redshift JDBC Driver 1.2.54.1082 |none |0|02d54c77-8302-4ae6-8e83".format(
43 |             xid, pid
44 |         ).encode()
45 |         mock_file.readlines.return_value = [
46 |             f"initiating session |Sun, 01 Jan 2023 00:00:12:212|[local] | |{pid}|testdb |testuser |Ident |0| | |0| | | | | | | |0|03e74c8e-c3cb-4a98-a3d9".format(
47 |                 pid
48 |             ).encode(),
49 |             f"disconnecting session |Sun, 01 Jan 2023 00:02:21:471|[local] | |{pid}|testdb |testuser |Ident |7460885| | |0| | | | | | | |0|03e74c8e-c3cb-4a98-a3d9".format(
50 |                 pid
51 |             ).encode(),
52 |             set_application_name_line,
53 |         ]
54 |         connections = {}
55 |         event_time = datetime.strptime(
56 |             "Sun, 01 Jan 2023 01:05:07:124", "%a, %d %b %Y %H:%M:%S:%f"
57 |         ).replace(tzinfo=timezone.utc)
58 |         last_connection = ConnectionLog(event_time, end_time, "testdb", "testuser", pid)
59 |         last_connections = {hash(set_application_name_line): last_connection.get_pk()}
60 |         extract_parser.parse_log(
61 |             mock_file,
62 |             "connectionlog",
63 |             connections,
64 |             last_connections,
65 |             {},
66 |             set(),
67 |             start_time,
68 |             end_time,
69 |         )
70 |         print(list(connections.values())[0])
71 |         print(last_connections)
72 | 


--------------------------------------------------------------------------------
/core/tests/test_local_extractor.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from unittest.mock import patch, Mock
 3 | 
 4 | from core.extract.local_extractor import LocalExtractor
 5 | 
 6 | 
 7 | class LocalExtractorTestCases(unittest.TestCase):
 8 |     def test_extract_locally(self):
 9 |         with patch("os.listdir") as mock_list_dir:
10 |             with patch("gzip.open") as mock_gzip_open:
11 |                 with patch("core.extract.extract_parser.parse_log") as mock_parse_log:
12 |                     mock_list_dir.return_value = [
13 |                         "start_node.log.gz",
14 |                         "useractivity.log.gz",
15 |                         "connections.log.gz",
16 |                     ]
17 |                     mock_gzip_open.return_value = Mock()
18 |                     mock_parse_log.return_value = None
19 |                     e = LocalExtractor({})
20 |                     e.get_extract_locally("test", "2022-11-16T00:00:00", "2022-11-18T00:00:00")
21 |         self.assertTrue(mock_list_dir.called)
22 |         self.assertTrue(mock_gzip_open.called)
23 |         self.assertTrue(mock_parse_log.called)
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     unittest.main()
28 | 


--------------------------------------------------------------------------------
/core/tests/test_s3_extractor.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import unittest
 3 | from unittest.mock import patch, Mock
 4 | 
 5 | from core.extract.s3_extractor import S3Extractor
 6 | 
 7 | 
 8 | def mock_sync_s3_get_bucket_contents(bucket, prefix):
 9 |     return [
10 |         {"Key": "s3://bucket/cluster_connectionlog_2021-08-15T15:00.gz"},
11 |         {"Key": "s3://bucket/cluster_useractivitylog_2021-08-15T19:00.gz"},
12 |     ]
13 | 
14 | 
15 | def mock_get_logs_in_range(audit_objects, start_time, end_time):
16 |     return ["A", "B"]
17 | 
18 | 
19 | def mock_s3_get_object(bucket, filename):
20 |     mock_obj = Mock()
21 |     mock_obj.get = Mock(return_value={"Body": ""})
22 |     return mock_obj
23 | 
24 | 
25 | def mock_parse_log(
26 |     log_file,
27 |     filename,
28 |     connections,
29 |     last_connections,
30 |     logs,
31 |     databases,
32 |     start_time,
33 |     end_time,
34 | ):
35 |     return
36 | 
37 | 
38 | class S3ExtractorTestCases(unittest.TestCase):
39 |     @patch("core.extract.s3_extractor.get_logs_in_range", mock_get_logs_in_range)
40 |     @patch("common.aws_service.sync_s3_get_bucket_contents", mock_sync_s3_get_bucket_contents)
41 |     @patch("common.aws_service.s3_get_object", mock_s3_get_object)
42 |     @patch("core.extract.extract_parser", mock_parse_log)
43 |     def test_get_extract_from_s3(self):
44 |         s3_extractor = S3Extractor({})
45 |         s3_extractor.get_extract_from_s3(
46 |             "test_bucket",
47 |             "test",
48 |             datetime.datetime.fromisoformat("2021-08-15T15:50").utcoffset(),
49 |             datetime.datetime.fromisoformat("2021-08-15T18:55").utcoffset(),
50 |         )
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     unittest.main()
55 | 


--------------------------------------------------------------------------------
/core/tests/test_stats.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from unittest.mock import patch, call
  3 | from core.replay.stats import percent, print_stats, display_stats, init_stats, collect_stats
  4 | from multiprocessing.managers import SyncManager
  5 | 
  6 | 
  7 | stats = {
  8 |     "connection_diff_sec": 1.734,
  9 |     "query_success": 10,
 10 |     "query_error": 2,
 11 |     "transaction_success": 10,
 12 |     "transaction_error": 2,
 13 |     "transaction_error_log": {"test": 3},
 14 |     "connection_error_log": {"conn_test": 4},
 15 | }
 16 | 
 17 | aggregated_stats = {
 18 |     "connection_diff_sec": 1,
 19 |     "query_success": 10,
 20 |     "query_error": 2,
 21 |     "transaction_success": 0,
 22 |     "transaction_error": 0,
 23 |     "transaction_error_log": {},
 24 |     "connection_error_log": {},
 25 | }
 26 | 
 27 | 
 28 | class TestStats(unittest.TestCase):
 29 |     def test_percentage(self):
 30 |         den = 0
 31 |         num = 1
 32 | 
 33 |         response = percent(num, den)
 34 |         self.assertEqual(response, 0)
 35 | 
 36 |     def test_percentage_with_non_zero_den(self):
 37 |         den = 100
 38 |         num = 10
 39 | 
 40 |         response = percent(num, den)
 41 |         self.assertEqual(response, 10)
 42 | 
 43 |     def test_print_stats(self):
 44 |         stats = [1]
 45 | 
 46 |         response = print_stats(stats)
 47 |         self.assertEqual(response, None)
 48 | 
 49 |     @patch("replay.stats.logger.debug")
 50 |     def test_print_stats_zero_in_stats(self, mock_logger):
 51 |         stats = {
 52 |             0: {"connection_diff_sec": 1.734, "query_success": 10, "query_error": 2},
 53 |             1: {"connection_diff_sec": 1},
 54 |         }
 55 | 
 56 |         print_stats(stats)
 57 |         calls = [
 58 |             call("[0] Max connection offset: +1.734 sec"),
 59 |             call("[1] Max connection offset: +1.000 sec"),
 60 |             call("Max connection offset: +1.734 sec"),
 61 |         ]
 62 | 
 63 |         mock_logger.assert_has_calls(calls)
 64 | 
 65 |     @patch("replay.stats.logger.info")
 66 |     def test_display_stats(self, mock_logger):
 67 |         manager = SyncManager()
 68 |         manager.start()
 69 |         stats = {
 70 |             "connection_diff_sec": 1.734,
 71 |             "query_success": 10,
 72 |             "query_error": 2,
 73 |         }
 74 | 
 75 |         peak_conn = manager.Value(int, 3)
 76 | 
 77 |         display_stats(stats, 100, peak_conn)
 78 | 
 79 |         mock_logger.assert_called_once_with(
 80 |             "Queries executed: 12 of 100 (12.0%)  [Success: 10 (83.3%), Failed: 2 (16.7%), Peak connections: 3]"
 81 |         )
 82 | 
 83 |     def test_init_stats(self):
 84 |         stats_test_value = {}
 85 | 
 86 |         response = init_stats(stats_test_value)
 87 |         self.assertEqual(response["connection_diff_sec"], 0)
 88 |         self.assertEqual(response["connection_error_log"], {})
 89 | 
 90 |     def test_collect_stats_not_stats(self):
 91 |         aggregated_stats = {}
 92 |         stats = {}
 93 | 
 94 |         response = collect_stats(aggregated_stats, stats)
 95 |         self.assertEqual(response, None)
 96 | 
 97 |     def test_collect_stats(self):
 98 |         collect_stats(aggregated_stats, stats)
 99 |         self.assertEqual(aggregated_stats["connection_diff_sec"], stats["connection_diff_sec"])
100 |         self.assertEqual(aggregated_stats["transaction_success"], stats["transaction_success"])
101 |         self.assertEqual(aggregated_stats["transaction_error_log"], stats["transaction_error_log"])
102 | 


--------------------------------------------------------------------------------
/core/tests/test_unload_sys_table.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch, mock_open, MagicMock
 2 | from core.replay.unload_sys_table import UnloadSysTable
 3 | from core.replay.prep import ReplayPrep
 4 | import unittest
 5 | 
 6 | 
 7 | config = {
 8 |     "tag": "",
 9 |     "workload_location": "test-location/extract",
10 |     "target_cluster_endpoint": "test-redshift-test-testing.us-east-1.redshift.amazonaws.com:5439/dev",
11 |     "target_cluster_region": "us-east-1",
12 |     "master_username": "awsuser",
13 |     "nlb_nat_dns": None,
14 |     "odbc_driver": None,
15 |     "default_interface": "psql",
16 |     "time_interval_between_transactions": "all off",
17 |     "time_interval_between_queries": "all off",
18 |     "execute_copy_statements": "false",
19 |     "execute_unload_statements": "false",
20 |     "replay_output": "s3://location/replay",
21 |     "analysis_output": "s3://location/analysis",
22 |     "unload_system_table_queries": "unload_system_tables.sql",
23 |     "target_cluster_system_table_unload_iam_role": "arn:iam:role/test",
24 | }
25 | 
26 | replay_id = "2023-02-13T04:59:40.864968+00:00_test-redshift-test-testing_76f32"
27 | 
28 | file = mock_open(read_data=("--stl_test\nselect * from stl_test"))
29 | file_2 = mock_open(
30 |     read_data=("--stl_unload\nunload (select * from stl_unload) to '' credentials ''")
31 | )
32 | file_3 = mock_open(read_data=("--stl_test\nselect * from stl_test"))
33 | 
34 | conn = MagicMock()
35 | cursor = MagicMock()
36 | 
37 | cursor.execute.return_value = True
38 | conn.cursor.return_value = cursor
39 | 
40 | 
41 | def mock_get_connection_cred(self, val):
42 |     return {
43 |         "host": "somehost",
44 |         "port": 5437,
45 |         "username": "myname",
46 |         "password": "cantshare",
47 |         "database": "idk",
48 |         "odbc_driver": None,
49 |     }
50 | 
51 | 
52 | def mock_db_connect(interface, host, port, username, password, database, odbc_driver):
53 |     return conn
54 | 
55 | 
56 | def mock_db_connect_error(interface, host, port, username, password, database, odbc_driver):
57 |     cursor.execute.side_effect = KeyError
58 |     conn.cursor.return_value = cursor
59 | 
60 |     return conn
61 | 
62 | 
63 | class TestReplay(unittest.TestCase):
64 |     @patch.object(ReplayPrep, "get_connection_credentials", mock_get_connection_cred)
65 |     @patch("core.replay.unload_sys_table.db_connect", mock_db_connect)
66 |     @patch("core.replay.unload_sys_table.logger.debug")
67 |     @patch("builtins.open", file)
68 |     @patch("core.replay.prep.boto3")
69 |     def test_unload_system_table(self, mock_boto, mock_debug):
70 |         unload_object = UnloadSysTable(config, replay_id)
71 |         unload_object.unload_system_table()
72 | 
73 |         mock_debug.assert_called_once_with("Executed unload query: stl_test")
74 | 
75 |     @patch.object(ReplayPrep, "get_connection_credentials", mock_get_connection_cred)
76 |     @patch("core.replay.unload_sys_table.db_connect", mock_db_connect)
77 |     @patch("core.replay.unload_sys_table.logger.debug")
78 |     @patch("builtins.open", file_2)
79 |     @patch("core.replay.prep.boto3")
80 |     def test_unload_system_table_with_unload_query(self, mock_boto, mock_debug):
81 |         unload_object = UnloadSysTable(config, replay_id)
82 | 
83 |         unload_object.unload_system_table()
84 | 
85 |         mock_debug.assert_called_once_with("Executed unload query: stl_unload")
86 | 
87 |     @patch.object(ReplayPrep, "get_connection_credentials", mock_get_connection_cred)
88 |     @patch("core.replay.unload_sys_table.db_connect", mock_db_connect_error)
89 |     @patch("core.replay.unload_sys_table.logger.error")
90 |     @patch("builtins.open", file_3)
91 |     @patch("core.replay.prep.boto3")
92 |     def test_unload_system_table_with_error(self, mock_boto, mock_error):
93 |         unload_object = UnloadSysTable(config, replay_id)
94 | 
95 |         unload_object.unload_system_table()
96 | 
97 |         mock_error.assert_called_once_with("Failed to unload query. ")
98 | 


--------------------------------------------------------------------------------
/core/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/core/util/__init__.py


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | boto3==1.24.24
 2 | botocore==1.27.24
 3 | Flask==2.2.5
 4 | matplotlib==3.5.2
 5 | moto==3.1.16
 6 | numpy==1.26.4
 7 | pandas
 8 | python-dateutil==2.8.1
 9 | PyYAML==6.0
10 | redshift-connector
11 | reportlab==3.6.13
12 | sqlparse==0.4.2
13 | tabulate==0.8.10
14 | tqdm==4.59.0
15 | coverage
16 | pre-commit
17 | black
18 | flake8
19 | pytest
20 | 


--------------------------------------------------------------------------------
/tools/ExternalObjectReplicator/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/ExternalObjectReplicator/__init__.py


--------------------------------------------------------------------------------
/tools/ExternalObjectReplicator/sql/external_table_query.sql:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT
 2 | cast(q.database as varchar(100)) as rs_db
 3 | ,cast(v.table_schema as varchar(100)) as rs_external_schema
 4 | ,cast(v.table_name as varchar(100)) as external_table
 5 | ,cast(es.databasename as varchar(100)) as glue_databasename
 6 | FROM stl_query q
 7 | JOIN svl_s3query_summary s
 8 | ON q.query = s.query
 9 | JOIN svv_tables v
10 | on s.external_table_name like '%' + v.table_schema + '%'
11 | and s.external_table_name like '%' + v.table_catalog + '%'
12 | and s.external_table_name like '%' + v.table_name + '%'
13 | join svv_external_schemas es
14 | on es.schemaname like v.table_schema
15 | WHERE q.userid > 1 and v.table_type = 'EXTERNAL TABLE'
16 | and q.starttime >= cast('{start}' as datetime)
17 | and q.starttime <= cast('{end}' as datetime)
18 | AND q.DATABASE = '{db}';


--------------------------------------------------------------------------------
/tools/ExternalObjectReplicator/sql/stl_load_query.sql:
--------------------------------------------------------------------------------
 1 | SELECT DISTINCT trim(filename) AS filename
 2 | FROM STL_LOAD_COMMITS
 3 | WHERE QUERY IN
 4 | (SELECT DISTINCT QUERY FROM STL_QUERY
 5 | WHERE  starttime >= cast('{start}' as datetime)
 6 | AND  starttime <= cast('{end}' as datetime)
 7 | AND DATABASE = '{db}')
 8 | OR QUERY IN
 9 | (SELECT DISTINCT QUERY FROM STL_QUERY
10 | WHERE starttime >= cast('{start}' as datetime)
11 | AND  starttime <= cast('{end}' as datetime)
12 | AND (querytxt LIKE '%manifest%' OR querytxt LIKE '%Manifest%' OR querytxt LIKE '%MANIFEST%')
13 | AND DATABASE = '{db}')
14 | order by 1;
15 | 


--------------------------------------------------------------------------------
/tools/ExternalObjectReplicator/sql/svl_s3_list.sql:
--------------------------------------------------------------------------------
1 | SELECT DISTINCT bucket, prefix
2 | FROM SVL_S3LIST WHERE QUERY IN
3 | ( SELECT DISTINCT QUERY FROM STL_QUERY
4 | WHERE userid>1
5 | and starttime >= cast('{start}' as datetime)
6 | and starttime <= cast('{end}' as datetime)
7 | AND DATABASE = '{db}')
8 | ORDER BY 1;
9 | 


--------------------------------------------------------------------------------
/tools/ExternalObjectReplicator/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/ExternalObjectReplicator/tests/__init__.py


--------------------------------------------------------------------------------
/tools/ExternalObjectReplicator/util/glue_util.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import uuid
  3 | 
  4 | from moto.glue.exceptions import DatabaseNotFoundException, TableNotFoundException
  5 | 
  6 | import common.aws_service as aws_helper
  7 | 
  8 | from tqdm import tqdm
  9 | from common.util import bucket_dict
 10 | 
 11 | logger = logging.getLogger("ExternalObjectReplicatorLogger")
 12 | 
 13 | 
 14 | def clone_glue_catalog(records, dest_location, region):
 15 |     """
 16 |     It reads through the systems table to create clone of the database,tables and partitions
 17 |     record[3]['stringValue'] : glue database vale from table
 18 |     record[2]['stringValue'] : external glue table value
 19 |     @param records:
 20 |     @param region
 21 |     @param dest_location
 22 |     @return:
 23 |     """
 24 |     glue_db_append_name = uuid.uuid1()
 25 |     new_glue_db_list = []
 26 |     checked_db_list = []
 27 |     pbar = tqdm(range(len(records)))
 28 |     for i in pbar:
 29 |         record = records[i]
 30 |         original_glue_db = record[3]["stringValue"]
 31 |         original_glue_table = record[2]["stringValue"]
 32 |         new_glue_db = f"{glue_db_append_name}-{original_glue_db}"
 33 |         pbar.set_description_str(
 34 |             f"Cloning {original_glue_table} in {original_glue_db} - {i + 1} out of {len(records)} glue objects"
 35 |         )
 36 |         # if the database hasn't been checked yet
 37 |         if original_glue_db not in checked_db_list:
 38 |             database_copy(new_glue_db, original_glue_db, original_glue_table, region)
 39 |             checked_db_list.append(original_glue_db)
 40 |             new_glue_db_list.append(new_glue_db)
 41 |         glue_table_copy(original_glue_db, new_glue_db, original_glue_table, dest_location, region)
 42 |     logger.debug(f"New Glue database created: {new_glue_db_list}.")
 43 |     logger.info("== Finished cloning Glue databases and tables ==")
 44 |     return new_glue_db_list
 45 | 
 46 | 
 47 | def database_copy(new_glue_db, original_glue_db, original_glue_table, region):
 48 |     """
 49 |     Create a new database
 50 |     @return:
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     region
 55 |     original_glue_table
 56 |     original_glue_db
 57 |     new_glue_db
 58 |     """
 59 |     try:
 60 |         aws_helper.glue_get_database(name=new_glue_db, region=region)
 61 |     except DatabaseNotFoundException as _:
 62 |         aws_helper.glue_create_database(
 63 |             new_glue_db, "Database clone created by External Object Replicator", region
 64 |         )
 65 |     except Exception as e:
 66 |         logger.error(f"Error doing database copy in Glue: {e}")
 67 |         exit(-1)
 68 | 
 69 |     return original_glue_db, new_glue_db, original_glue_table
 70 | 
 71 | 
 72 | def glue_table_copy(original_glue_db, new_glue_db, original_glue_table, dest_location, region):
 73 |     """
 74 |     CHeck if glue table exists in the new glue database, if not create the table structure along with the partitions
 75 |     @param original_glue_db:
 76 |     @param new_glue_db:
 77 |     @param original_glue_table:
 78 |     @param dest_location
 79 |     @param region
 80 |     @return:
 81 |     """
 82 |     dest_bucket = bucket_dict(dest_location)["bucket_name"]
 83 |     try:
 84 |         table_get_response = aws_helper.glue_get_table(
 85 |             database=new_glue_db, table=original_glue_table, region=region
 86 |         )
 87 |         new_s3_loc = table_get_response["Table"]["StorageDescriptor"]["Location"]
 88 |     except TableNotFoundException as _:
 89 |         table_get_response = aws_helper.glue_get_table(
 90 |             database=original_glue_db,
 91 |             table=original_glue_table,
 92 |             region=region,
 93 |         )
 94 |         index_response = aws_helper.glue_get_partition_indexes(
 95 |             database=original_glue_db, table=original_glue_table, region=region
 96 |         )
 97 |         orig_s3_loc = table_get_response["Table"]["StorageDescriptor"]["Location"].split("/")
 98 |         new_s3_loc = f"{dest_bucket}/spectrumfiles/{'/'.join(orig_s3_loc[2:])}"
 99 |         table_input = {
100 |             "Name": table_get_response["Table"]["Name"],
101 |             "Description": "For use with Redshfit candidate release testing",
102 |             "StorageDescriptor": {
103 |                 "Columns": table_get_response["Table"]["StorageDescriptor"]["Columns"],
104 |                 "Location": new_s3_loc,
105 |             },
106 |             "PartitionKeys": table_get_response["Table"]["PartitionKeys"],
107 |         }
108 | 
109 |         if index_response["PartitionIndexDescriptorList"]:
110 |             aws_helper.glue_create_table(
111 |                 new_database=new_glue_db,
112 |                 table_input=table_input.update(
113 |                     {"PartitionIndexes": index_response["PartitionIndexDescriptorList"]}
114 |                 ),
115 |                 region=region,
116 |             )
117 |         else:
118 |             aws_helper.glue_create_table(
119 |                 new_database=new_glue_db, table_input=table_input, region=region
120 |             )
121 |         return new_s3_loc
122 |     except Exception as e:
123 |         logger.error(f"Failed to copy table in Glue: {e}")
124 |         exit(-1)
125 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/IAM_Permissions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/IAM_Permissions.pdf


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/__init__.py


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/bootstrap_scripts/extract_bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "bucket_name: $BUCKET_NAME"
 4 | echo "simple_replay_extract_overwrite_s3_path: $SIMPLE_REPLAY_EXTRACT_OVERWRITE_S3_PATH"
 5 | echo "simple_replay_log_location: $SIMPLE_REPLAY_LOG_LOCATION"
 6 | echo "redshift_user_name: $REDSHIFT_USER_NAME"
 7 | echo "what_if_timestamp: $WHAT_IF_TIMESTAMP"
 8 | echo "simple_replay_extract_start_time: $SIMPLE_REPLAY_EXTRACT_START_TIME"
 9 | echo "simple_replay_extract_end_time: $SIMPLE_REPLAY_EXTRACT_END_TIME"
10 | echo "extract_prefix: $EXTRACT_PREFIX"
11 | echo "script_prefix: $SCRIPT_PREFIX"
12 | 
13 | yum update -y
14 | yum -y install git
15 | yum -y install python3
16 | yum -y install python3-pip
17 | yum -y install aws-cfn-bootstrap
18 | yum -y install gcc gcc-c++ python3 python3-devel unixODBC unixODBC-devel
19 | mkdir amazonutils
20 | cd amazonutils
21 | git clone https://github.com/aws/redshift-test-drive.git
22 | cd redshift-test-drive
23 | make setup
24 | if [[ "$SIMPLE_REPLAY_EXTRACT_OVERWRITE_S3_PATH" != "N/A" ]]; then
25 |   aws s3 cp $SIMPLE_REPLAY_EXTRACT_OVERWRITE_S3_PATH config/extract.yaml
26 | fi
27 | WORKLOAD_LOCATION="s3://${BUCKET_NAME}/${EXTRACT_PREFIX}/${WHAT_IF_TIMESTAMP}"
28 | sed -i "s#master_username: \".*\"#master_username: \"$REDSHIFT_USER_NAME\"#g" config/extract.yaml
29 | sed -i "s#log_location: \".*\"#log_location: \"$SIMPLE_REPLAY_LOG_LOCATION\"#g" config/extract.yaml
30 | sed -i "s#workload_location: \".*\"#workload_location: \"$WORKLOAD_LOCATION\"#g" config/extract.yaml
31 | sed -i "s#start_time: \".*\"#start_time: \"$SIMPLE_REPLAY_EXTRACT_START_TIME\"#g" config/extract.yaml
32 | sed -i "s#end_time: \".*\"#end_time: \"$SIMPLE_REPLAY_EXTRACT_END_TIME\"#g" config/extract.yaml
33 | aws s3 cp config/extract.yaml s3://$BUCKET_NAME/$SCRIPT_PREFIX/
34 | make extract


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/bootstrap_scripts/performance_test_bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # This script bootstraps a base amazonlinux image to run the Redshift
 3 | # Node Config concurrency test.
 4 | # 1. Install the AWS CLI, Python3, and necessary Python libraries.
 5 | # 2. Copy Python program source for concurrency test
 6 | # 3. Execute that Python program
 7 | # We expect all configuration to be defined as environment variables
 8 | # for the Batch job.
 9 | 
10 | set -eu
11 | 
12 | yum install -y awscli python3
13 | pip3 install boto3 psycopg2-binary pandas sqlalchemy
14 | 
15 | aws s3 cp "$PYTHON_SCRIPT" ./script.py
16 | 
17 | # This Python program requires these environment variables to be set:
18 | # `$SQL_SCRIPT_S3_PATH`, `$REDSHIFT_CLUSTER_ENDPOINT`,
19 | # `$REDSHIFT_IAM_ROLE`, `$BUCKET_NAME`, `$REDSHIFT_USER_NAME`
20 | python3 ./script.py
21 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/bootstrap_scripts/replay_bootstrap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | echo "bucket_name: $BUCKET_NAME"
 4 | echo "simple_replay_overwrite_s3_path: $SIMPLE_REPLAY_OVERWRITE_S3_PATH"
 5 | echo "redshift_user_name: $REDSHIFT_USER_NAME"
 6 | echo "what_if_timestamp: $WHAT_IF_TIMESTAMP"
 7 | echo "extract_prefix: $EXTRACT_PREFIX"
 8 | echo "replay_prefix: $REPLAY_PREFIX"
 9 | echo "script_prefix: $SCRIPT_PREFIX"
10 | echo "redshift_iam_role: $REDSHIFT_IAM_ROLE"
11 | echo "workload_location: $WORKLOAD_LOCATION"
12 | echo "cluster_endpoint: $CLUSTER_ENDPOINT"
13 | echo "cluster_identifier: $CLUSTER_IDENTIFIER"
14 | echo "execute_unload_statements: $SIMPLE_REPLAY_UNLOAD_STATEMENTS"
15 | echo "snapshot_account_id: $SNAPSHOT_ACCOUNT_ID"
16 | account_id=`aws sts get-caller-identity --query Account --output text`
17 | echo "account_id: $account_id"
18 | echo "endpoint_type: $ENDPOINT_TYPE"
19 | TARGET_CLUSTER_REGION=$(echo $CLUSTER_ENDPOINT | cut -f3 -d'.')
20 | ##region = os.environ['AWS_REGION']
21 | yum update -y
22 | yum -y install git
23 | yum -y install python3
24 | yum -y install python3-pip
25 | yum -y install aws-cfn-bootstrap
26 | yum -y install gcc gcc-c++ python3 python3-devel unixODBC unixODBC-devel
27 | mkdir amazonutils
28 | cd amazonutils
29 | git clone https://github.com/aws/redshift-test-drive.git
30 | cd redshift-test-drive
31 | make setup
32 | if [[ "$SIMPLE_REPLAY_OVERWRITE_S3_PATH" != "N/A" ]]; then
33 |   aws s3 cp $SIMPLE_REPLAY_OVERWRITE_S3_PATH config/replay.yaml
34 | fi
35 | 
36 | sed -i "s#master_username: \".*\"#master_username: \"$REDSHIFT_USER_NAME\"#g" config/replay.yaml
37 | sed -i "s#unload_iam_role: \".*\"#unload_iam_role: \"$REDSHIFT_IAM_ROLE\"#g" config/replay.yaml
38 | sed -i "s#workload_location: \".*\"#workload_location: \"$WORKLOAD_LOCATION\"#g" config/replay.yaml
39 | sed -i "s#target_cluster_endpoint: \".*\"#target_cluster_endpoint: \"$CLUSTER_ENDPOINT\"#g" config/replay.yaml
40 | sed -i "s#target_cluster_region: \".*\"#target_cluster_region: \"$TARGET_CLUSTER_REGION\"#g" config/replay.yaml
41 | sed -i "s#analysis_iam_role: \".*\"#analysis_iam_role: \"$REDSHIFT_IAM_ROLE\"#g" config/replay.yaml
42 | sed -i "s#analysis_output: \".*\"#analysis_output: \"$WORKLOAD_LOCATION\"#g" config/replay.yaml
43 | 
44 | if [ "$SIMPLE_REPLAY_UNLOAD_STATEMENTS" == "true" ]; then
45 |     sed -i "s#unload_iam_role: \".*\"#unload_iam_role: \"$REDSHIFT_IAM_ROLE\"#g" config/replay.yaml
46 |     sed -i "s#replay_output: \".*\"#replay_output: \"s3://$BUCKET_NAME/$REPLAY_PREFIX/$WHAT_IF_TIMESTAMP/$CLUSTER_IDENTIFIER\"#g" config/replay.yaml
47 | fi
48 | 
49 | 
50 | if [[ "$account_id" == "$SNAPSHOT_ACCOUNT_ID" ]]; then
51 |    sed -i "s#execute_copy_statements: \"false\"#execute_copy_statements: \"true\"#g" config/replay.yaml
52 |    aws s3 cp $WORKLOAD_LOCATION/copy_replacements.csv . || true
53 |    sed -z -i "s#,,\n#,,$REDSHIFT_IAM_ROLE\n#g" copy_replacements.csv || true
54 |    aws s3 cp copy_replacements.csv $WORKLOAD_LOCATION/copy_replacements.csv || true
55 | fi
56 | aws s3 cp config/replay.yaml s3://$BUCKET_NAME/$SCRIPT_PREFIX/replay_$CLUSTER_IDENTIFIER.yaml
57 | make replay
58 | if [[ $ENDPOINT_TYPE == 'SERVERLESS' ]]; then
59 |   aws s3 cp s3://$BUCKET_NAME/$SCRIPT_PREFIX/system_config.json .
60 |   aws s3 cp s3://$BUCKET_NAME/$SCRIPT_PREFIX/create_external_schema.py .
61 |   python3 tools/NodeConfigCompare/python_scripts/create_external_schema.py
62 | fi
63 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/configuration/parameter_group_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "Parameters": [
  3 |         {
  4 |             "ParameterName": "auto_analyze",
  5 |             "ParameterValue": "true",
  6 |             "Description": "Use auto analyze",
  7 |             "Source": "engine-default",
  8 |             "DataType": "boolean",
  9 |             "AllowedValues": "true,false",
 10 |             "ApplyType": "static",
 11 |             "IsModifiable": true
 12 |         },
 13 |         {
 14 |             "ParameterName": "datestyle",
 15 |             "ParameterValue": "ISO, MDY",
 16 |             "Description": "Sets the display format for date and time values.",
 17 |             "Source": "engine-default",
 18 |             "DataType": "string",
 19 |             "ApplyType": "static",
 20 |             "IsModifiable": true
 21 |         },
 22 |         {
 23 |             "ParameterName": "enable_case_sensitive_identifier",
 24 |             "ParameterValue": "false",
 25 |             "Description": "Preserve case sensitivity for database identifiers such as table or column names in parser",
 26 |             "Source": "engine-default",
 27 |             "DataType": "boolean",
 28 |             "AllowedValues": "true,false",
 29 |             "ApplyType": "static",
 30 |             "IsModifiable": true
 31 |         },
 32 |         {
 33 |             "ParameterName": "enable_user_activity_logging",
 34 |             "ParameterValue": "false",
 35 |             "Description": "parameter for audit logging purpose",
 36 |             "Source": "user",
 37 |             "DataType": "boolean",
 38 |             "AllowedValues": "true,false",
 39 |             "ApplyType": "static",
 40 |             "IsModifiable": true
 41 |         },
 42 |         {
 43 |             "ParameterName": "extra_float_digits",
 44 |             "ParameterValue": "0",
 45 |             "Description": "Sets the number of digits displayed for floating-point values",
 46 |             "Source": "engine-default",
 47 |             "DataType": "integer",
 48 |             "AllowedValues": "-15-2",
 49 |             "ApplyType": "static",
 50 |             "IsModifiable": true
 51 |         },
 52 |         {
 53 |             "ParameterName": "max_concurrency_scaling_clusters",
 54 |             "ParameterValue": "2",
 55 |             "Description": "The maximum concurrency scaling clusters can be used.",
 56 |             "Source": "user",
 57 |             "DataType": "integer",
 58 |             "AllowedValues": "0-10",
 59 |             "ApplyType": "static",
 60 |             "IsModifiable": true
 61 |         },
 62 |         {
 63 |             "ParameterName": "max_cursor_result_set_size",
 64 |             "ParameterValue": "default",
 65 |             "Description": "Sets the max cursor result set size",
 66 |             "Source": "engine-default",
 67 |             "DataType": "integer",
 68 |             "AllowedValues": "0-14400000",
 69 |             "ApplyType": "static",
 70 |             "IsModifiable": true
 71 |         },
 72 |         {
 73 |             "ParameterName": "query_group",
 74 |             "ParameterValue": "default",
 75 |             "Description": "This parameter applies a user-defined label to a group of queries that are run during the same session..",
 76 |             "Source": "engine-default",
 77 |             "DataType": "string",
 78 |             "ApplyType": "static",
 79 |             "IsModifiable": true
 80 |         },
 81 |         {
 82 |             "ParameterName": "require_ssl",
 83 |             "ParameterValue": "true",
 84 |             "Description": "require ssl for all databaseconnections",
 85 |             "Source": "user",
 86 |             "DataType": "boolean",
 87 |             "AllowedValues": "true,false",
 88 |             "ApplyType": "static",
 89 |             "IsModifiable": true
 90 |         },
 91 |         {
 92 |             "ParameterName": "search_path",
 93 |             "ParameterValue": "$user, public",
 94 |             "Description": "Sets the schema search order for names that are not schema-qualified.",
 95 |             "Source": "engine-default",
 96 |             "DataType": "string",
 97 |             "ApplyType": "static",
 98 |             "IsModifiable": true
 99 |         },
100 |         {
101 |             "ParameterName": "statement_timeout",
102 |             "ParameterValue": "0",
103 |             "Description": "Aborts any statement that takes over the specified number of milliseconds.",
104 |             "Source": "engine-default",
105 |             "DataType": "integer",
106 |             "AllowedValues": "0,100-2147483647",
107 |             "ApplyType": "static",
108 |             "IsModifiable": true
109 |         },
110 |         {
111 |             "ParameterName": "use_fips_ssl",
112 |             "ParameterValue": "false",
113 |             "Description": "Use fips ssl library",
114 |             "Source": "engine-default",
115 |             "DataType": "boolean",
116 |             "AllowedValues": "true,false",
117 |             "ApplyType": "static",
118 |             "IsModifiable": true
119 |         },
120 |         {
121 |             "ParameterName": "wlm_json_configuration",
122 |             "ParameterValue": "[{\"auto_wlm\":true}]",
123 |             "Description": "wlm json configuration",
124 |             "Source": "engine-default",
125 |             "DataType": "string",
126 |             "ApplyType": "static",
127 |             "IsModifiable": true
128 |         }
129 |     ]
130 | }
131 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/configuration/source-wlm.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "query_group": [],
 4 |     "query_group_wild_card": 0,
 5 |     "user_group": [],
 6 |     "user_group_wild_card": 0,
 7 |     "concurrency_scaling": "off",
 8 |     "rules": [
 9 |       {
10 |         "rule_name": "DiskSpilling",
11 |         "predicate": [
12 |           {
13 |             "metric_name": "query_temp_blocks_to_disk",
14 |             "operator": ">",
15 |             "value": 100000
16 |           }
17 |         ],
18 |         "action": "log"
19 |       },
20 |       {
21 |         "rule_name": "QueryRunningMoreThan30min",
22 |         "predicate": [
23 |           {
24 |             "metric_name": "query_execution_time",
25 |             "operator": ">",
26 |             "value": 1800
27 |           }
28 |         ],
29 |         "action": "log"
30 |       }
31 |     ],
32 |     "priority": "normal",
33 |     "queue_type": "auto",
34 |     "auto_wlm": true
35 |   },
36 |   {
37 |     "short_query_queue": true
38 |   }
39 | ]
40 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/configuration/wlm-concurrency-scaling.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "query_group": [],
 4 |     "query_group_wild_card": 0,
 5 |     "user_group": [],
 6 |     "user_group_wild_card": 0,
 7 |     "concurrency_scaling": "auto",
 8 |     "rules": [
 9 |       {
10 |         "rule_name": "DiskSpilling",
11 |         "predicate": [
12 |           {
13 |             "metric_name": "query_temp_blocks_to_disk",
14 |             "operator": ">",
15 |             "value": 100000
16 |           }
17 |         ],
18 |         "action": "log"
19 |       },
20 |       {
21 |         "rule_name": "QueryRunningMoreThan30min",
22 |         "predicate": [
23 |           {
24 |             "metric_name": "query_execution_time",
25 |             "operator": ">",
26 |             "value": 1800
27 |           }
28 |         ],
29 |         "action": "log"
30 |       }
31 |     ],
32 |     "priority": "normal",
33 |     "queue_type": "auto",
34 |     "auto_wlm": true
35 |   },
36 |   {
37 |     "short_query_queue": true
38 |   }
39 | ]
40 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/architecure-serverless.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/architecure-serverless.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/batch-cw-log-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/batch-cw-log-group.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/redshift-clusters-provisioned.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/redshift-clusters-provisioned.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/redshift-clusters-serverless.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/redshift-clusters-serverless.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/redshift-clusters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/redshift-clusters.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/statemachine-log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/statemachine-log.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/images/statemachine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/images/statemachine.png


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/python_scripts/RedshiftConfigTestingLambda.py.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/python_scripts/RedshiftConfigTestingLambda.py.zip


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/python_scripts/StartUpLambda.py.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/python_scripts/StartUpLambda.py.zip


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/python_scripts/boto3-redshift-serverless.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/python_scripts/boto3-redshift-serverless.zip


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/python_scripts/create_external_schema.py:
--------------------------------------------------------------------------------
 1 | import redshift_connector
 2 | import boto3
 3 | import yaml
 4 | import json
 5 | 
 6 | rs_client = boto3.client("redshift")
 7 | with open("config/replay.yaml", "r") as fr:
 8 |     config_read = yaml.safe_load(fr)
 9 | target_cluster_endpoint = config_read["target_cluster_endpoint"]
10 | cluster_endpoint_split = target_cluster_endpoint.split(".")
11 | workgroup_id = cluster_endpoint_split[0]
12 | db_host = target_cluster_endpoint.split(":")[0]
13 | db_port = cluster_endpoint_split[5].split("/")[0][4:]
14 | db_name = cluster_endpoint_split[5].split("/")[1]
15 | db_username = config_read["master_username"]
16 | serverless_cluster_id = f"redshift-serverless-{workgroup_id}"
17 | with open("system_config.json", "r") as jr:
18 |     json_data = json.load(jr)
19 | script = json_data["EXTERNAL_SCHEMA_SCRIPT"]
20 | try:
21 |     response = rs_client.get_cluster_credentials(
22 |         DbUser=db_username,
23 |         ClusterIdentifier=serverless_cluster_id,
24 |         AutoCreate=False,
25 |         DurationSeconds=3600,
26 |     )
27 | except rs_client.exceptions.ClientError as e:
28 |     if e.response["Error"]["Code"] == "ExpiredToken":
29 |         print(
30 |             f"Error retrieving credentials for {serverless_cluster_id}: IAM credentials have expired."
31 |         )
32 |         exit(-1)
33 |     elif e.response["Error"]["Code"] == "ResourceNotFoundException":
34 |         print(
35 |             f"Serverless endpoint could not be found "
36 |             f"RedshiftServerless:GetCredentials. {e}"
37 |         )
38 |         exit(-1)
39 |     else:
40 |         print(f"Got exception retrieving credentials ({e.response['Error']['Code']})")
41 |         raise e
42 | db_user = response["DbUser"]
43 | db_password = response["DbPassword"]
44 | try:
45 |     conn = redshift_connector.connect(
46 |         host=db_host, database=db_name, user=db_user, password=db_password
47 |     )
48 |     cursor = conn.cursor()
49 |     conn.autocommit = True
50 |     cursor.execute(script)
51 |     print(f"Executed script.{script}")
52 | except Exception as err:
53 |     if "already exists" not in str(err):
54 |         print(f"Got exception while executing script {err}")
55 |         raise
56 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/python_scripts/python.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/NodeConfigCompare/python_scripts/python.zip


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/python_scripts/redshift-performance-test.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import boto3
  3 | import psycopg2
  4 | import time
  5 | import pandas
  6 | from sqlalchemy import create_engine
  7 | from sqlalchemy import text
  8 | from concurrent.futures import ThreadPoolExecutor
  9 | from concurrent.futures import as_completed
 10 | from urllib.parse import quote_plus as urlquote
 11 | import urllib
 12 | import re
 13 | import os
 14 | 
 15 | SQL_SCRIPT_S3_PATH = os.environ["SQL_SCRIPT_S3_PATH"]
 16 | REDSHIFT_CLUSTER_ENDPOINT = os.environ["REDSHIFT_CLUSTER_ENDPOINT"]
 17 | REDSHIFT_IAM_ROLE = os.environ["REDSHIFT_IAM_ROLE"]
 18 | BUCKET_NAME = os.environ["SQL_SCRIPT_S3_PATH"]
 19 | REDSHIFT_USER_NAME = os.environ["REDSHIFT_USER_NAME"]
 20 | NUMBER_OF_PARALLEL_SESSIONS_LIST = os.environ["NUMBER_OF_PARALLEL_SESSIONS_LIST"]
 21 | DISABLE_RESULT_CACHE = os.environ["DISABLE_RESULT_CACHE"]
 22 | DEFAULT_OUTPUT_LIMIT = os.environ["DEFAULT_OUTPUT_LIMIT"]
 23 | MAX_NUMBER_OF_QUERIES = os.environ["MAX_NUMBER_OF_QUERIES"]
 24 | MAX_PARALLEL_SESSIONS = os.environ["MAX_PARALLEL_SESSIONS"]
 25 | QUERY_LABEL_PREFIX = os.environ["QUERY_LABEL_PREFIX"]
 26 | 
 27 | 
 28 | def connect_to_redshift(host, username):
 29 |     client = boto3.client("redshift")
 30 |     cluster_creds = client.get_cluster_credentials(
 31 |         DbUser=username,
 32 |         DbName=REDSHIFT_CLUSTER_ENDPOINT.split("/")[1],
 33 |         ClusterIdentifier=REDSHIFT_CLUSTER_ENDPOINT.split(".")[0],
 34 |     )
 35 | 
 36 |     connection_string = (
 37 |         "postgresql://"
 38 |         + urlquote(cluster_creds["DbUser"])
 39 |         + ":"
 40 |         + urlquote(cluster_creds["DbPassword"])
 41 |         + "@"
 42 |         + REDSHIFT_CLUSTER_ENDPOINT
 43 |     )
 44 |     return create_engine(connection_string, pool_size=0, max_overflow=-1)
 45 | 
 46 | 
 47 | def get_json_config_from_s3(script_s3_path):
 48 |     bucket, key = script_s3_path.replace("s3://", "").split("/", 1)
 49 |     obj = boto3.client("s3").get_object(Bucket=bucket, Key=key)
 50 |     return json.loads(obj["Body"].read().decode("utf-8"))
 51 | 
 52 | 
 53 | def get_sql_scripts_from_s3():
 54 | 
 55 |     bucket, key = SQL_SCRIPT_S3_PATH.replace("s3://", "").split("/", 1)
 56 |     obj = boto3.client("s3").get_object(Bucket=bucket, Key=key)
 57 |     script = obj["Body"].read().decode("utf-8")
 58 |     script = script.format(redshift_iam_role=REDSHIFT_IAM_ROLE, bucket_name=BUCKET_NAME)
 59 |     split_scripts = script.split(";")[:-1]
 60 |     if len(split_scripts) > int(MAX_NUMBER_OF_QUERIES):
 61 |         split_scripts = split_scripts[0 : int(MAX_NUMBER_OF_QUERIES)]
 62 |     return split_scripts
 63 | 
 64 | 
 65 | def get_sql(engine, number_of_parallel_sessions):
 66 |     sql_script = ""
 67 | 
 68 |     pattern = re.compile(r"limit[\s|\t|\n]+[\d]+[\s]*$", re.IGNORECASE)
 69 |     for query in get_sql_scripts_from_s3():
 70 |         if not re.search(pattern, query):
 71 |             query += " limit " + DEFAULT_OUTPUT_LIMIT
 72 |         sql_script += query + ";\n"
 73 | 
 74 |     if DISABLE_RESULT_CACHE == "true":
 75 |         sql_script = "set enable_result_cache_for_session to false;\n" + sql_script
 76 | 
 77 |     sql_script = (
 78 |         "set query_group to '"
 79 |         + QUERY_LABEL_PREFIX
 80 |         + str(number_of_parallel_sessions)
 81 |         + "';\n"
 82 |         + sql_script
 83 |     )
 84 | 
 85 |     df = pandas.read_sql(text(sql_script), engine)
 86 |     return df
 87 | 
 88 | 
 89 | def run_concurrency_test(number_of_parallel_sessions):
 90 |     engine = connect_to_redshift(REDSHIFT_CLUSTER_ENDPOINT, REDSHIFT_USER_NAME)
 91 |     start_time = time.time()
 92 |     try:
 93 |         with ThreadPoolExecutor(max_workers=number_of_parallel_sessions) as executor:
 94 |             futures = []
 95 |             for _ in range(number_of_parallel_sessions):
 96 |                 futures.append(executor.submit(get_sql, engine, number_of_parallel_sessions))
 97 |             for future in as_completed(futures):
 98 |                 rs = future.result()
 99 | 
100 |     except Exception as e:
101 |         raise e
102 |     elapsed_time_in_secs = time.time() - start_time
103 |     print("--- %s seconds ---" % elapsed_time_in_secs)
104 |     return elapsed_time_in_secs
105 | 
106 | 
107 | print(
108 |     f"script:{SQL_SCRIPT_S3_PATH}, cluster:{REDSHIFT_CLUSTER_ENDPOINT},role:{REDSHIFT_IAM_ROLE},bucket:{BUCKET_NAME},user:{REDSHIFT_USER_NAME},sessions:{NUMBER_OF_PARALLEL_SESSIONS_LIST}"
109 | )
110 | for sessions in NUMBER_OF_PARALLEL_SESSIONS_LIST.split(","):
111 |     number_of_parallel_sessions = int(sessions)
112 |     if number_of_parallel_sessions <= int(MAX_PARALLEL_SESSIONS):
113 |         print(f"running {number_of_parallel_sessions} parallel threads ..")
114 |         run_concurrency_test(number_of_parallel_sessions)
115 |     else:
116 |         print(
117 |             f"parallel sessions {number_of_parallel_sessions} exceeds maximum allowed {MAX_PARALLEL_SESSIONS} .."
118 |         )
119 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/sql/ddl.sql:
--------------------------------------------------------------------------------
1 | create table if not exists example_table
2 | (id INTEGER IDENTITY(1, 1) NOT NULL, column_value varchar(10), insert_timestamp timestamp default sysdate);
3 | 
4 | insert into example_table (column_value) values('data');
5 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/sql/populate_comparison_results.sql:
--------------------------------------------------------------------------------
 1 | unload ($$
 2 | select * from public.redshift_config_comparison_raw
 3 | $$) to '{raw_comparison_results_s3_path}/{what_if_timestamp}/'
 4 | FORMAT AS CSV HEADER ALLOWOVERWRITE iam_role '{redshift_iam_role}';
 5 | 
 6 | 
 7 | unload ($$
 8 | select * from public.redshift_config_comparison_results
 9 | $$) to '{comparison_results_s3_path}/{what_if_timestamp}/'
10 | parallel off FORMAT AS CSV HEADER ALLOWOVERWRITE iam_role '{redshift_iam_role}';
11 | 


--------------------------------------------------------------------------------
/tools/NodeConfigCompare/sql/test_queries.sql:
--------------------------------------------------------------------------------
  1 | --first_query
  2 | 
  3 | SELECT
  4 |     s_acctbal
  5 |     , s_name
  6 |     , n_name
  7 |     , p_partkey
  8 |     , p_mfgr
  9 |     , s_address
 10 |     , s_phone
 11 |     , s_comment
 12 | FROM
 13 |     part,
 14 |     supplier,
 15 |     partsupp,
 16 |     nation,
 17 |     REGION
 18 | WHERE    p_partkey = ps_partkey
 19 |          AND s_suppkey = ps_suppkey
 20 |          AND p_size = 34
 21 |          AND p_type LIKE '%COPPER'
 22 |          AND s_nationkey = n_nationkey
 23 |          AND n_regionkey = r_regionkey
 24 |          AND r_name = 'MIDDLE EAST'
 25 |          AND ps_supplycost = (SELECT
 26 |                                   MIN(ps_supplycost)
 27 |                               FROM
 28 |                                   partsupp,
 29 |                                   supplier,
 30 |                                   nation,
 31 |                                   REGION
 32 |                               WHERE  p_partkey = ps_partkey
 33 |                                      AND s_suppkey = ps_suppkey
 34 |                                      AND s_nationkey = n_nationkey
 35 |                                      AND n_regionkey = r_regionkey
 36 |                                      AND r_name = 'MIDDLE EAST')
 37 | ORDER BY
 38 |     s_acctbal DESC
 39 |     , n_name
 40 |     , s_name
 41 |     , p_partkey ;
 42 | 
 43 | --second_query
 44 | 
 45 | SELECT
 46 |     ps_partkey
 47 |     , SUM(ps_supplycost * ps_availqty) AS value
 48 | FROM
 49 |     partsupp,
 50 |     supplier,
 51 |     nation
 52 | WHERE    ps_suppkey = s_suppkey
 53 |          AND s_nationkey = n_nationkey
 54 |          AND n_name = 'SAUDI ARABIA'
 55 | GROUP BY
 56 |     ps_partkey
 57 | HAVING
 58 |      SUM(ps_supplycost * ps_availqty) > (SELECT
 59 |                                              SUM(ps_supplycost * ps_availqty) * 0.0000000333
 60 |                                          FROM
 61 |                                              partsupp,
 62 |                                              supplier,
 63 |                                              nation
 64 |                                          WHERE  ps_suppkey = s_suppkey
 65 |                                                 AND s_nationkey = n_nationkey
 66 |                                                 AND n_name = 'SAUDI ARABIA')
 67 | ORDER BY
 68 |     value DESC ;
 69 | 
 70 | --third_query
 71 | 
 72 | SELECT
 73 |     p_brand
 74 |     , p_type
 75 |     , p_size
 76 |     , COUNT(DISTINCT ps_suppkey) AS supplier_cnt
 77 | FROM
 78 |     partsupp,
 79 |     part
 80 | WHERE    p_partkey = ps_partkey
 81 |          AND p_brand <> 'Brand#23'
 82 |          AND p_type NOT LIKE 'MEDIUM ANODIZED%'
 83 |          AND p_size IN (1, 32, 33, 46, 7, 42, 21, 40)
 84 |          AND ps_suppkey NOT IN (SELECT
 85 |                                     s_suppkey
 86 |                                 FROM
 87 |                                     supplier
 88 |                                 WHERE  s_comment LIKE '%Customer%Complaints%')
 89 | GROUP BY
 90 |     p_brand
 91 |     , p_type
 92 |     , p_size
 93 | ORDER BY
 94 |     supplier_cnt DESC
 95 |     , p_brand
 96 |     , p_type
 97 |     , p_size ;
 98 | 
 99 | 
100 | --fourth_query
101 | 
102 | SELECT r_name,count(1) number_of_supplies
103 |       FROM
104 |           part,
105 |           partsupp,
106 |           supplier,
107 |           nation,
108 |           REGION
109 |       WHERE  p_partkey = ps_partkey
110 |               AND s_suppkey = ps_suppkey
111 |               AND s_nationkey = n_nationkey
112 |               AND n_regionkey = r_regionkey
113 |               group by 1
114 |               order by 1;
115 | 
116 | 
117 | --fifth_query
118 | 
119 | SELECT
120 |     n_name
121 |     , COUNT(1) total_count
122 | FROM
123 |     supplier,
124 |     nation
125 | WHERE    s_suppkey IN (SELECT
126 |                            ps_suppkey
127 |                        FROM
128 |                            partsupp
129 |                        WHERE  ps_partkey IN (SELECT
130 |                                                  p_partkey
131 |                                              FROM
132 |                                                  part
133 |                                              WHERE  p_name LIKE 'olive%')
134 |                               AND ps_availqty > 1)
135 |          AND s_nationkey = n_nationkey
136 | GROUP BY
137 |     1
138 | ORDER BY
139 |     1;
140 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/api/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/ReplayAnalysis/api/__init__.py


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "gui",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "proxy": "http://127.0.0.1:5000",
 6 |   "dependencies": {
 7 |     "@awsui/collection-hooks": "^1.0.49",
 8 |     "@awsui/components-react": "^3.0.724",
 9 |     "@awsui/design-tokens": "^3.0.34",
10 |     "@awsui/global-styles": "^1.0.19",
11 |     "@awsui/test-utils-core": "^1.0.33",
12 |     "@emotion/react": "^11.10.5",
13 |     "@emotion/styled": "^11.10.5",
14 |     "@mui/material": "^5.11.6",
15 |     "@testing-library/jest-dom": "^5.16.5",
16 |     "@testing-library/react": "^13.4.0",
17 |     "@testing-library/user-event": "^14.4.3",
18 |     "react": "^18.2.0",
19 |     "react-dom": "^18.2.0",
20 |     "react-router-dom": "^6.8.0",
21 |     "web-vitals": "^3.1.1"
22 |   },
23 |   "devDependencies": {
24 |     "react-scripts": "^5.0.1",
25 |     "@svgr/webpack": "^6.5.1"
26 |   },
27 |   "overrides": {
28 |     "@svgr/webpack": "$@svgr/webpack"
29 |   },
30 |   "scripts": {
31 |     "start": "react-scripts start",
32 |     "start-backend": "cd ../api && flask run",
33 |     "build": "react-scripts build",
34 |     "test": "react-scripts test",
35 |     "eject": "react-scripts eject"
36 |   },
37 |   "eslintConfig": {
38 |     "extends": [
39 |       "react-app",
40 |       "react-app/jest"
41 |     ]
42 |   },
43 |   "browserslist": {
44 |     "production": [
45 |       ">0.2%",
46 |       "not dead",
47 |       "not op_mini all"
48 |     ],
49 |     "development": [
50 |       "last 1 chrome version",
51 |       "last 1 firefox version",
52 |       "last 1 safari version"
53 |     ]
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 6 |     <meta name="theme-color" content="#000000" />
 7 |     <meta
 8 |       name="description"
 9 |       content="Web site created using create-react-app"
10 |     />
11 |     <!--
12 |       manifest.json provides metadata used when your web app is installed on a
13 |       user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
14 |     -->
15 |     <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
16 |     <!--
17 |       Notice the use of %PUBLIC_URL% in the tags above.
18 |       It will be replaced with the URL of the `public` folder during the build.
19 |       Only files inside the `public` folder can be referenced from the HTML.
20 | 
21 |       Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
22 |       work correctly both with client-side routing and a non-root public URL.
23 |       Learn how to configure a non-root public URL by running `npm run build`.
24 |     -->
25 |     <title>Simple Replay Analysis</title>
26 |   </head>
27 |   <body>
28 |     <noscript>You need to enable JavaScript to run this app.</noscript>
29 |     <div id="root"></div>
30 |     <!--
31 |       This HTML file is a template.
32 |       If you open it directly in the browser, you will see an empty page.
33 | 
34 |       You can add webfonts, meta tags, or analytics to this file.
35 |       The build step will place the bundled scripts into the <body> tag.
36 | 
37 |       To begin the development, run `npm start` or `yarn start`.
38 |       To create a production bundle, use `npm run build` or `yarn build`.
39 |     -->
40 |   </body>
41 | </html>
42 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/public/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 |   "short_name": "SRA",
3 |   "name": "Simple Replay Analysis",
4 |   "start_url": ".",
5 |   "display": "standalone",
6 |   "theme_color": "#000000",
7 |   "background_color": "#ffffff"
8 | }
9 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/App.js:
--------------------------------------------------------------------------------
 1 | import "@awsui/global-styles/index.css"
 2 | import {HomePage} from "./pages/home";
 3 | import {AnalysisPage} from "./pages/analysis";
 4 | import {BrowserRouter, Routes, Route} from "react-router-dom";
 5 | 
 6 | function App() {
 7 |   return (
 8 |       <BrowserRouter>
 9 |           <div className="App">
10 |               <Routes>
11 |                   <Route path="/" element={<HomePage />} />
12 |                   <Route path="/analysis" element={<AnalysisPage />}/>
13 |               </Routes>
14 |             </div>
15 |       </BrowserRouter>
16 |   );
17 | }
18 | 
19 | export default App;
20 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/App.test.js:
--------------------------------------------------------------------------------
1 | import { render, screen } from '@testing-library/react';
2 | import App from './App';
3 | 
4 | test('renders learn react link', () => {
5 |   render(<App />);
6 |   const linkElement = screen.getByText(/learn react/i);
7 |   expect(linkElement).toBeInTheDocument();
8 | });
9 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/AccessControl.js:
--------------------------------------------------------------------------------
  1 | import React, {useEffect, useState} from 'react';
  2 | import {Box, FormField, RadioGroup, Select, SpaceBetween, StatusIndicator} from "@awsui/components-react";
  3 | import Input from "@awsui/components-react/input";
  4 | import Button from "@awsui/components-react/button";
  5 | 
  6 | export default function AccessControl({profiles}) {
  7 |     const [type, setType] = useState("profile");
  8 |     const [placeholder, setPlaceholder] = useState("");
  9 |     const [credentials, setCredentials] = useState("");
 10 |     const [disabled, setDisabled] = useState(true);
 11 |     const [saved, setSaved] = useState(false);
 12 |     const [selectedOption, setSelectedOption] = useState({label: 'default', value: 'default'});
 13 |     const options = profiles.map(item => ({label: item, value: item}))
 14 |     const [valid, setValid] = useState(true)
 15 | 
 16 | 
 17 |     useEffect(() => {
 18 |         function toggle() {
 19 |             if (type === "profile") {
 20 |                 if (selectedOption.label !== "default") {
 21 |                     setDisabled(false);
 22 |                 }
 23 |             } else if (type === "role") {
 24 |                 setPlaceholder("arn:aws:iam::123456789012:role/customrole");
 25 |                 setDisabled(false);
 26 |             }
 27 |         }
 28 | 
 29 |         toggle()
 30 |     }, [type, selectedOption]);
 31 | 
 32 |     function save() {
 33 |         if (type === "profile") {
 34 |             fetch(`/profile?name=${selectedOption.label}`).then(response => response.json())
 35 |                 .then(response => {
 36 |                     if (response.success === false) {
 37 |                         setValid(false)
 38 |                     } else {
 39 |                         setSaved(true)
 40 |                     }
 41 |                 })
 42 | 
 43 |                 .catch((error) => {
 44 |                     console.error('Error:', error);
 45 |                     setValid(false)
 46 | 
 47 |                 });
 48 | 
 49 |         } else if (type === "role") {
 50 |             fetch(`/role?arn=${credentials}`).then(response => response.json())
 51 |                 .then(response => {
 52 |                     if (response.success === false) {
 53 |                         // TODO: Assume role Access denied
 54 |                         setValid(false)
 55 | 
 56 |                     } else {
 57 |                         setSaved(true)
 58 | 
 59 |                     }
 60 |                 })
 61 |                 .catch((error) => {
 62 |                     console.error('Error:', error);
 63 |                 });
 64 |         }
 65 |     }
 66 | 
 67 | 
 68 |     return (
 69 |         <Box>
 70 |             <SpaceBetween size={"xs"}>
 71 |                 <FormField label={"Credentials Type"}
 72 |                            description={"Provide an IAM user or role with access to S3. "}>
 73 |                     <RadioGroup
 74 |                         onChange={({detail}) => {
 75 |                             setSaved(false);
 76 |                             setType(detail.value);
 77 |                         }}
 78 |                         value={type}
 79 |                         items={[
 80 |                             {value: "profile", label: "Use a Profile"},
 81 |                             {value: "role", label: "Use an IAM Role"}
 82 |                         ]}/>
 83 |                 </FormField>
 84 | 
 85 | 
 86 |                 <FormField
 87 |                     label=""
 88 |                     errorText={!valid && "Unable to assume provided role. Please check credentials."}
 89 |                     secondaryControl={
 90 |                         <Button
 91 |                             variant={'primary'}
 92 |                             disabled={disabled}
 93 |                             onClick={() => save()}>
 94 |                             Save
 95 |                         </Button>}>
 96 | 
 97 | 
 98 |                     {type === "profile" &&
 99 | 
100 |                         <Select
101 |                             selectedOption={selectedOption}
102 |                             onChange={({detail}) => {
103 |                                 setSaved(false);
104 |                                 setValid(true)
105 |                                 setSelectedOption(detail.selectedOption);
106 |                             }
107 |                             }
108 |                             options={options}
109 |                             selectedAriaLabel="Selected"
110 |                             empty="No options"
111 |                         />
112 | 
113 |                     }
114 | 
115 |                     {type === "role" &&
116 | 
117 |                         <Input value={credentials}
118 |                                placeholder={placeholder}
119 |                                type={'search'}
120 | 
121 |                                disabled={disabled}
122 |                                onChange={(event) => {
123 |                                    setSaved(false);
124 |                                    setValid(true)
125 |                                    setCredentials(event.detail.value)
126 |                                }}></Input>
127 |                     }
128 | 
129 |                     {saved &&
130 | 
131 |                         <StatusIndicator>Success</StatusIndicator>
132 | 
133 |                     }
134 | 
135 |                 </FormField>
136 |             </SpaceBetween>
137 |         </Box>
138 |     )
139 | }


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayAnalysis/AggregateMetrics.js:
--------------------------------------------------------------------------------
 1 | import React, {useEffect, useState} from 'react';
 2 | import {Header, Table} from "@awsui/components-react";
 3 | import millisToMinutesAndSeconds from "../../helpers/msFormatter";
 4 | 
 5 | 
 6 | /**
 7 |  * Aggregate Metrics Table
 8 |  * Displays p-values, averages, standard deviation values for each replay
 9 |  */
10 | const AggregateMetrics = ({selectedQueryTypes, selectedUser, selectedDuration}) => {
11 |     /** @prop selectedQueryTypes, array of selected "query type" options */
12 |     /** @prop selectedUser, array of selected "user" options */
13 |     /** @prop selectedDuration, array of selected "duration" range */
14 | 
15 |     /** Table data */
16 |     const [data, setData] = useState([]);
17 | 
18 |     /** Loading validator to render component given successful response */
19 |     const [loading, setLoading] = useState(true);
20 | 
21 | 
22 |     useEffect(() => {
23 |         const fetchData = async () => {
24 |             fetch(`/agg_metrics?qtype=${JSON.stringify(selectedQueryTypes)}&user=${JSON.stringify(selectedUser)}&start=${(selectedDuration[0])}&end=${(selectedDuration[1])}`).then(response => response.json())
25 |                 .then(response => {
26 |                     if (response.success === false) {
27 | 
28 |                     } else {
29 |                         setData(response.data);
30 |                         setLoading(false);
31 |                     }
32 |                 })
33 |                 .catch((error) => {
34 |                     console.error('Error:', error);
35 |                 });
36 |         };
37 |         fetchData();
38 |     }, [selectedQueryTypes, selectedUser, selectedDuration]);
39 | 
40 |     /** Render components */
41 |     return !loading && (
42 |         <div>
43 |             <Header
44 |                 description={"Percentiles of execution time, elapsed time, and queue time across selected replays." +
45 |                     " These values are representative of the selected query types, users, and time range."}>
46 |                 Aggregate Metrics</Header>
47 |             <Table items={data} columnDefinitions={COL_DEF}></Table>
48 |         </div>
49 |     )
50 | };
51 | 
52 | /** Array of column definitions for Aggregate Metrics table */
53 | const COL_DEF = [
54 |     {
55 |         id: 'sid',
56 |         header: 'Replay',
57 |         cell: item => item.sid,
58 |         width: 50
59 |     },
60 |     {
61 |         id: 'p25',
62 |         header: 'P25 (s)',
63 |         cell: item => millisToMinutesAndSeconds(item.p25, 3),
64 |         width: 50
65 |     },
66 |     {
67 |         id: 'p50',
68 |         header: 'P50 (s)',
69 |         cell: item => millisToMinutesAndSeconds(item.p50, 3),
70 |         width: 50
71 |     },
72 |     {
73 |         id: 'p75',
74 |         header: 'P75 (s)',
75 |         cell: item => millisToMinutesAndSeconds(item.p75, 3),
76 |         width: 50
77 |     },
78 |     {
79 |         id: 'p99',
80 |         header: 'P99 (s)',
81 |         cell: item => millisToMinutesAndSeconds(item.p99, 3),
82 |         width: 50
83 |     },
84 |     {
85 |         id: 'avg',
86 |         header: 'Average (s)',
87 |         cell: item => millisToMinutesAndSeconds(item.avg, 3),
88 |         width: 50
89 |     },
90 |     {
91 |         id: 'std',
92 |         header: 'Standard Deviation (s)',
93 |         cell: item => millisToMinutesAndSeconds(item.std, 3),
94 |         width: 50
95 |     }
96 | ]
97 | 
98 | export default AggregateMetrics;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayAnalysis/CompareThroughput.js:
--------------------------------------------------------------------------------
  1 | import React, {useEffect, useState} from 'react';
  2 | import {Box, Button, Header, LineChart} from "@awsui/components-react";
  3 | import millisToMinutesAndSeconds from "../../helpers/msFormatter";
  4 | 
  5 | /**
  6 |  * Compare Throughput Chart
  7 |  * Displays p-values, averages, standard deviation values for each replay
  8 |  */
  9 | const CompareThroughput = ({selectedQueryTypes, selectedDuration, selectedUser}) => {
 10 |     /** @prop selectedQueryTypes, array of selected "query type" options */
 11 |     /** @prop selectedUser, array of selected "user" options */
 12 |     /** @prop selectedDuration, array of selected "duration" range */
 13 | 
 14 |     /** Series data */
 15 |     const [data, setData] = useState([]);
 16 | 
 17 |     /** Loading validator to render component given successful response */
 18 |     const [loading, setLoading] = useState(true);
 19 | 
 20 |     useEffect(() => {
 21 |         const fetchData = async () => {
 22 |             fetch(`/compare_throughput?qtype=${JSON.stringify(selectedQueryTypes)}&user=${JSON.stringify(selectedUser)}`).then(response => response.json())
 23 |                 .then(response => {
 24 |                     if (response.success === false) {
 25 |                         console.log(response.message);
 26 |                     } else {
 27 |                         /** Maps response data to LineChart formatting */
 28 |                         setData(response.data.map((entry) =>
 29 |                             ({
 30 |                                 title: entry.replay,
 31 |                                 type: "line",
 32 |                                 data: entry.values.map((val) =>
 33 |                                     ({x: (val.rel_time), y: val.freq}))
 34 |                             })));
 35 | 
 36 |                         setLoading(false);
 37 |                     }
 38 | 
 39 |                 })
 40 |                 .catch((error) => {
 41 |                     console.error('Error:', error);
 42 | 
 43 |                 });
 44 |         };
 45 |         fetchData();
 46 |     }, [selectedQueryTypes, selectedUser]);
 47 | 
 48 |     /**
 49 |      * Filters a series by given duration range
 50 |      * @param {Object} series Total data set of query frequency values.
 51 |      * @return {Object} filtered data set on duration
 52 |      */
 53 |     function filterRange(series) {
 54 |         return series.map(singleSerie => ({
 55 |             ...singleSerie,
 56 |             data: singleSerie.data.filter(value => value.x >= selectedDuration[0] && value.x <= selectedDuration[1])
 57 |         }));
 58 |     }
 59 | 
 60 |     return !loading && (
 61 |         <div>
 62 |             <Header
 63 |                 description={"Total number of queries executed per second. This data is filtered by the selected query types, users, and time range."}>
 64 |                 Compare Throughput</Header>
 65 | 
 66 |             <LineChart
 67 |                 series={filterRange(data)}
 68 |                 hideFilter={true}
 69 |                 height={300}
 70 |                 statusType={loading ? "loading" : "finished"}
 71 |                 i18nStrings={{
 72 |                     filterLabel: "Filter by replay",
 73 |                     filterPlaceholder: "Filter data",
 74 |                     filterSelectedAriaLabel: "selected",
 75 |                     legendAriaLabel: "Legend",
 76 |                     chartAriaRoleDescription: "line chart",
 77 |                     xTickFormatter: e =>
 78 |                         millisToMinutesAndSeconds(e, 0)
 79 |                 }}
 80 |                 xScaleType={'linear'}
 81 |                 xTitle={'Timestamp (relative to start time)'}
 82 |                 yTitle={'Queries Executed'}
 83 |                 empty={
 84 |                     <Box textAlign="center" color="inherit">
 85 |                         <b>No data available</b>
 86 |                         <Box variant="p" color="inherit">
 87 |                             There is no data available
 88 |                         </Box>
 89 |                     </Box>
 90 |                 }
 91 |                 noMatch={
 92 |                     <Box textAlign="center" color="inherit">
 93 |                         <b>No matching data</b>
 94 |                         <Box variant="p" color="inherit">
 95 |                             There is no matching data to display
 96 |                         </Box>
 97 |                         <Button>Clear filter</Button>
 98 |                     </Box>
 99 |                 }
100 |                 loadingText={"Loading"}
101 |             ></LineChart>
102 |         </div>
103 | 
104 |     );
105 | };
106 | 
107 | export default CompareThroughput


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayAnalysis/QueryLatency.js:
--------------------------------------------------------------------------------
 1 | import React, {useEffect, useState} from 'react';
 2 | import {BarChart, Box, Button, Header} from "@awsui/components-react";
 3 | import millisToMinutesAndSeconds from "../../helpers/msFormatter";
 4 | 
 5 | 
 6 | /**
 7 |  * Compare Throughput Chart
 8 |  * Displays p-values, averages, standard deviation values for each replay
 9 |  */
10 | 
11 | const QueryLatency = ({selectedQueryTypes, selectedUser, selectedDuration}) => {
12 |     /** @prop selectedQueryTypes, array of selected "query type" options */
13 |     /** @prop selectedUser, array of selected "user" options */
14 |     /** @prop selectedDuration, array of selected "duration" range */
15 | 
16 |     /** Series data */
17 |     const [data, setData] = useState([]);
18 | 
19 |     /** Loading validator to render component given successful response */
20 |     const [loading, setLoading] = useState(true);
21 | 
22 |     useEffect(() => {
23 |         const fetchData = async () => {
24 |             fetch(`/query_latency?qtype=${JSON.stringify(selectedQueryTypes)}&user=${JSON.stringify(selectedUser)}&start=${(selectedDuration[0])}&end=${(selectedDuration[1])}`).then(response => response.json())
25 |                 .then(response => {
26 |                     if (response.success === false) {
27 |                     } else {
28 |                         setData(response.data.map((entry) =>
29 |                             ({
30 |                                 title: entry.replay,
31 |                                 type: "bar",
32 |                                 data: entry.values.map((val) => ({x: (val.bin), y: (val.count)}))
33 |                             })
34 |                         ))
35 |                         setLoading(false)
36 |                     }
37 | 
38 |                 })
39 | 
40 |                 .catch((error) => {
41 |                     console.error('Error:', error);
42 | 
43 |                 })
44 |         };
45 | 
46 |         fetchData();
47 |     }, [selectedQueryTypes, selectedUser, selectedDuration]);
48 | 
49 | 
50 |     return !loading && (
51 |         <div>
52 |             <Header description={"Distribution of query latency."}>
53 |                 Query Latency</Header>
54 |             <BarChart
55 |                 series={data}
56 | 
57 |                 i18nStrings={{
58 |                     filterLabel: "Filter displayed data",
59 |                     filterPlaceholder: "Filter data",
60 |                     filterSelectedAriaLabel: "selected",
61 |                     legendAriaLabel: "Legend",
62 |                     chartAriaRoleDescription: "bar chart",
63 |                     xTickFormatter: e =>
64 |                         millisToMinutesAndSeconds(e, 1)
65 |                 }}
66 |                 errorText="Error loading data."
67 |                 height={300}
68 |                 loadingText="Loading chart"
69 |                 recoveryText="Retry"
70 |                 xScaleType="categorical"
71 |                 xTitle="Elapsed Time"
72 |                 yTitle="# of Queries"
73 |                 empty={
74 |                     <Box textAlign="center" color="inherit">
75 |                         <b>No data available</b>
76 |                         <Box variant="p" color="inherit">
77 |                             There is no data available
78 |                         </Box>
79 |                     </Box>
80 |                 }
81 |                 noMatch={
82 |                     <Box textAlign="center" color="inherit">
83 |                         <b>No matching data</b>
84 |                         <Box variant="p" color="inherit">
85 |                             There is no matching data to display
86 |                         </Box>
87 |                         <Button>Clear filter</Button>
88 |                     </Box>
89 |                 }
90 |             />
91 |         </div>
92 |     );
93 | };
94 | 
95 | export default QueryLatency;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayAnalysis/ThroughputBreakdown.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {AreaChart, Box} from "@awsui/components-react";
 3 | import Button from "@awsui/components-react/button";
 4 | 
 5 | /** COMPONENT NOT IN USE */
 6 | /** TODO: Format request for breakdown chart */
 7 | 
 8 | const ThroughputBreakdown = () => {
 9 |     return (
10 |         <AreaChart
11 |             series={[]}
12 |             xDomain={[
13 |                 new Date(1601017200000),
14 |                 new Date(1601045100000)
15 |             ]}
16 |             yDomain={[0, 1]}
17 |             i18nStrings={{
18 |                 filterLabel: "Filter displayed data",
19 |                 filterPlaceholder: "Filter data",
20 |                 filterSelectedAriaLabel: "selected",
21 |                 legendAriaLabel: "Legend",
22 |                 chartAriaRoleDescription: "line chart",
23 |                 detailTotalLabel: "Total",
24 |                 xTickFormatter: e =>
25 |                     e
26 |                         .toLocaleDateString("en-US", {
27 |                             month: "short",
28 |                             day: "numeric",
29 |                             hour: "numeric",
30 |                             minute: "numeric",
31 |                             hour12: !1
32 |                         })
33 |                         .split(",")
34 |                         .join("\n"),
35 |                 yTickFormatter: function o(e) {
36 |                     return (100 * e).toFixed(0) + "%";
37 |                 }
38 |             }}
39 |             ariaLabel="Stacked area chart, multiple metrics"
40 |             errorText="Error loading data."
41 |             height={200}
42 |             loadingText="Loading chart"
43 |             recoveryText="Retry"
44 |             xScaleType="time"
45 |             xTitle="Time (UTC)"
46 |             yTitle="Total CPU load"
47 |             empty={
48 |                 <Box textAlign="center" color="inherit">
49 |                     <b>No data available</b>
50 |                     <Box variant="p" color="inherit">
51 |                         There is no data available
52 |                     </Box>
53 |                 </Box>
54 |             }
55 |             noMatch={
56 |                 <Box textAlign="center" color="inherit">
57 |                     <b>No matching data</b>
58 |                     <Box variant="p" color="inherit">
59 |                         There is no matching data to display
60 |                     </Box>
61 |                     <Button>Clear filter</Button>
62 |                 </Box>
63 |             }
64 |         />
65 |     );
66 | };
67 | 
68 | export default ThroughputBreakdown;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayOverview.js:
--------------------------------------------------------------------------------
 1 | import React, {useEffect} from 'react';
 2 | import {Table} from "@awsui/components-react";
 3 | 
 4 | 
 5 | export default function ReplayOverview({replays, setReplays}) {
 6 | 
 7 |     useEffect(() => {
 8 |         const fetchData = async () => {
 9 |             const response = await fetch(`/submit_replays`);
10 |             const newData = await response.json();
11 |             setReplays(newData.replays);
12 |         };
13 |         fetchData();
14 |     }, [setReplays]);
15 | 
16 | 
17 |     return (
18 |         <Table items={replays} columnDefinitions={COL_DEF}></Table>
19 |     )
20 | 
21 | 
22 | };
23 | 
24 | const COL_DEF = [
25 |         {
26 |             id: 'sid',
27 |             header: 'Replay',
28 |             cell: item => item.sid,
29 |             width: 50
30 |         },
31 | 
32 |         {
33 |             id: 'id',
34 |             header: 'Cluster',
35 |             cell: item => item.id,
36 |             width: 50
37 |         },
38 |         {
39 |             id: 'status',
40 |             header: 'Status',
41 |             cell: item => item.status,
42 |             width: 50
43 |         },
44 |         {
45 |             id: 'instance',
46 |             header: 'Instance',
47 |             cell: item => item.instance,
48 |             width: 50
49 |         },
50 |         {
51 |             id: 'num_nodes',
52 |             header: 'Nodes',
53 |             cell: item => item.num_nodes,
54 |             width: 50
55 |         },
56 |         {
57 |             id: 'database',
58 |             header: 'Database',
59 |             cell: item => item.database,
60 |             width: 50
61 |         },
62 |         {
63 |             id: 'start_time',
64 |             header: 'Start Time (UTC)',
65 |             cell: item => item.start_time.slice(0, -6),
66 |             width: 50
67 |         },
68 |         {
69 |             id: 'end_time',
70 |             header: 'End Time (UTC)',
71 |             cell: item => item.end_time.slice(0, -6),
72 |             width: 50
73 |         },
74 |         {
75 |             id: 'duration',
76 |             header: 'Duration',
77 |             cell: item => item.duration,
78 |             width: 50
79 |         },
80 |         {
81 |             id: 'query_success',
82 |             header: 'Queries',
83 |             cell: item => item.query_success,
84 |             width: 50
85 |         },
86 |         {
87 |             id: 'connection_success',
88 |             header: 'Connections',
89 |             cell: item => item.connection_success,
90 |             width: 50
91 |         },
92 | 
93 | 
94 |     ]


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayValidation/CopyAgg.js:
--------------------------------------------------------------------------------
 1 | import React, {useEffect, useState} from 'react';
 2 | import {Header, Table} from "@awsui/components-react";
 3 | import CopyDiff from "./CopyDiff";
 4 | 
 5 | 
 6 | const CopyAgg = ({selectedUser, selectedDuration, replays}) => {
 7 | 
 8 |     const [data, setData] = useState([]);
 9 |     const [loading, setLoading] = useState(true);
10 | 
11 |     useEffect(() => {
12 |         const fetchData = async () => {
13 | 
14 |             fetch(`/copy_agg?user=${JSON.stringify(selectedUser)}&start=${(selectedDuration[0])}&end=${(selectedDuration[1])}`).then(response => response.json())
15 |                 .then(response => {
16 |                     if (response.success === false) {
17 |                         console.log(response.message);
18 |                     } else {
19 |                         setData(response.data);
20 |                         setLoading(response.data.length === 0)
21 |                     }
22 |                 })
23 |                 .catch((error) => {
24 |                     console.error('Error:', error);
25 |                 });
26 |         };
27 |         fetchData();
28 |     }, [selectedDuration, selectedUser]);
29 | 
30 | 
31 |     return !loading && (
32 |         <div>
33 |             <Header description={"Aggregated execution metrics of COPY ingestion by replay."}>
34 |                 COPY Ingestion Metrics</Header>
35 |             <Table items={data} columnDefinitions={COL_DEF}></Table>
36 |             <CopyDiff selectedDuration={selectedDuration} replays={replays}/>
37 | 
38 |         </div>
39 |     )
40 | };
41 | 
42 | const COL_DEF = [
43 |     {
44 |         id: 'replay',
45 |         header: 'Replay',
46 |         cell: item => item.sid,
47 |         width: 50,
48 |     },
49 |     {
50 |         id: 'loadedRows',
51 |         header: 'Loaded Rows',
52 |         cell: item => item.loaded_rows,
53 |         width: 50,
54 |         maxWidth: 300
55 |     },
56 |     {
57 |         id: 'loadedBytes',
58 |         header: 'Loaded Bytes',
59 |         cell: item => item.loaded_bytes,
60 |         width: 50
61 |     },
62 |     {
63 |         id: 'sourceFileCount',
64 |         header: 'Source File Count',
65 |         cell: item => item.source_file_count,
66 |         width: 50
67 |     }
68 | 
69 | ]
70 | 
71 | export default CopyAgg;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayValidation/ErrorDistribution.js:
--------------------------------------------------------------------------------
 1 | import React, {useEffect, useState} from 'react';
 2 | import {BarChart, Box, Header} from "@awsui/components-react";
 3 | import Button from "@awsui/components-react/button";
 4 | 
 5 | const ErrorDistribution = () => {
 6 | 
 7 |     const [data, setData] = useState([]);
 8 |     const [loading, setLoading] = useState(true);
 9 | 
10 |     useEffect(() => {
11 |         const fetchData = async () => {
12 |             fetch(`/err_distribution`).then(response => response.json())
13 |                 .then(response => {
14 |                     if (response.success === false) {
15 |                     } else {
16 |                         setData(response.data.map((entry) =>
17 |                             ({
18 |                                 title: entry.replay,
19 |                                 type: "bar",
20 |                                 data: entry.values.map((val) =>
21 |                                     ({x: (val.category), y: val.freq}))
22 |                             })))
23 |                         setLoading(false)
24 |                     }
25 | 
26 |                 })
27 | 
28 |                 .catch((error) => {
29 |                     console.error('Error:', error);
30 | 
31 |                 });
32 |         };
33 |         fetchData();
34 |     }, []);
35 | 
36 | 
37 |     return !loading && (
38 |         <div>
39 |             <Header description={"Frequency of errors across selected replays. Not filtered on any filter criteria."}>
40 |                 Error Category Distribution</Header>
41 |             <BarChart
42 |                 series={data}
43 | 
44 |                 i18nStrings={{
45 |                     filterLabel: "Filter displayed data",
46 |                     filterPlaceholder: "Filter data",
47 |                     filterSelectedAriaLabel: "selected",
48 |                     legendAriaLabel: "Legend",
49 |                     chartAriaRoleDescription: "bar chart"
50 |                 }}
51 |                 ariaLabel="Multiple data series line chart"
52 |                 errorText="Error loading data."
53 |                 height={300}
54 |                 loadingText="Loading chart"
55 |                 recoveryText="Retry"
56 |                 xScaleType="categorical"
57 |                 xTitle="Error Category"
58 |                 yTitle="Queries"
59 |                 empty={
60 |                     <Box textAlign="center" color="inherit">
61 |                         <b>No data available</b>
62 |                         <Box variant="p" color="inherit">
63 |                             There is no data available
64 |                         </Box>
65 |                     </Box>
66 |                 }
67 |                 noMatch={
68 |                     <Box textAlign="center" color="inherit">
69 |                         <b>No matching data</b>
70 |                         <Box variant="p" color="inherit">
71 |                             There is no matching data to display
72 |                         </Box>
73 |                         <Button>Clear filter</Button>
74 |                     </Box>
75 |                 }
76 |             />
77 |         </div>
78 |     );
79 | };
80 | 
81 | export default ErrorDistribution;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/ReplayValidation/SpectrumDiff.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import {Table} from "@awsui/components-react";
 3 | 
 4 | 
 5 | export default function SpectrumDiff() {
 6 |     const COL_DEF = [
 7 |         {
 8 |             id: 'id',
 9 |             header: 'Replay',
10 |             cell: item => item.id,
11 |             width: 50
12 |         },
13 |         {
14 |             id: 'cluster',
15 |             header: 'Cluster',
16 |             cell: item => item.cluster,
17 |             width: 50
18 |         },
19 |         {
20 |             id: 'instance',
21 |             header: 'Instance',
22 |             cell: item => item.instance,
23 |             width: 50
24 |         },
25 |         {
26 |             id: 'nodes',
27 |             header: 'Nodes',
28 |             cell: item => item.nodes,
29 |             width: 50
30 |         },
31 |         {
32 |             id: 'db',
33 |             header: 'Database',
34 |             cell: item => item.db,
35 |             width: 50
36 |         },
37 |         {
38 |             id: 'start',
39 |             header: 'Start Time',
40 |             cell: item => item.start,
41 |             width: 50
42 |         },
43 |         {
44 |             id: 'end',
45 |             header: 'End Time',
46 |             cell: item => item.end,
47 |             width: 50
48 |         },
49 |         {
50 |             id: 'duration',
51 |             header: 'Duration',
52 |             cell: item => item.duration,
53 |             width: 50
54 |         },
55 |         {
56 |             id: 'executed',
57 |             header: 'Queries Executed',
58 |             cell: item => item.executed,
59 |             width: 50
60 |         },
61 |         {
62 |             id: 'aborted',
63 |             header: 'Queries Aborted',
64 |             cell: item => item.aborted,
65 |             width: 50
66 |         },
67 |         {
68 |             id: 'connections',
69 |             header: 'Connections',
70 |             cell: item => item.connections,
71 |             width: 50
72 |         },
73 | 
74 | 
75 |     ]
76 | 
77 |     return (
78 |         <Table items={[]} columnDefinitions={COL_DEF}></Table>
79 |     )
80 | };


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/navigation/GlobalFilters.js:
--------------------------------------------------------------------------------
  1 | import React, {useState, useEffect} from 'react';
  2 | import {Box, Button, Header, Multiselect, SpaceBetween,} from "@awsui/components-react";
  3 | import Slider from '@mui/material/Slider';
  4 | import * as awsui from '@awsui/design-tokens';
  5 | import millisToMinutesAndSeconds from "../../helpers/msFormatter";
  6 | import prepareSelectOptions from "../../helpers/PrepareOptions";
  7 | 
  8 | /**
  9 |  * Global filters
 10 |  * Manipulates query type, user, and duration selection values
 11 |  * Updates global selectedQueryTypes, selectedUser, selectedDuration variables
 12 |  */
 13 | const GlobalFilters = ({selectedQueryTypes, setSelectedQueryTypes,
 14 |                            selectedUser, setSelectedUser,
 15 |                            selectedDuration, setSelectedDuration}) => {
 16 |     /** @prop selectedQueryTypes, array of selected "query type" options */
 17 |     /** @prop setSelectedQueryTypes, useState setter for selectedQueryTypes */
 18 |     /** @prop selectedUser, array of selected "user" options */
 19 |     /** @prop setSelectedUser, useState setter for selectedUser  */
 20 |     /** @prop selectedDuration, array of selected "duration" range in milliseconds. ex: [0,190290]  */
 21 |     /** @prop setSelectedDuration, useState setter for selectedDuration */
 22 | 
 23 |     /** Longest relative duration in milliseconds */
 24 |     const [maxDuration, setMaxDuration ] = useState(0);
 25 | 
 26 |     /** Array of user options from response data */
 27 |     const [selectUserOptions, setSelectUserOptions] = useState();
 28 | 
 29 |     useEffect(() => {
 30 |         const fetchData = async () => {
 31 |             const response = await fetch(`/time_range`);
 32 |             const newData = await response.json();
 33 | 
 34 |             setMaxDuration(newData.time);
 35 |             setSelectedDuration([0, maxDuration])
 36 |             setSelectUserOptions(prepareSelectOptions(newData.users))
 37 |         };
 38 |         fetchData();
 39 |     },
 40 |     [maxDuration, setSelectedDuration]);
 41 | 
 42 | 
 43 |     function clearFilter() {
 44 |         setSelectedQueryTypes(queryTypes)
 45 |         setSelectedUser([])
 46 |         setSelectedDuration([0, maxDuration])
 47 |     }
 48 | 
 49 |     return (
 50 |         <div style={boxStyle}>
 51 |             <Header variant={"h2"}>Filter Results: </Header>
 52 | 
 53 |             <SpaceBetween size={'m'}>
 54 |                 <Multiselect selectedOptions={selectedQueryTypes}
 55 |                              options={queryTypes}
 56 |                              placeholder={"Filter by query types"}
 57 |                              onChange={({ detail }) => setSelectedQueryTypes(detail.selectedOptions)}
 58 |                 />
 59 |                 <Multiselect selectedOptions={selectedUser}
 60 |                              options={selectUserOptions}
 61 |                              placeholder={"Filter by user"}
 62 |                              onChange={({ detail }) => setSelectedUser(detail.selectedOptions)
 63 |                 }/>
 64 |             </SpaceBetween>
 65 | 
 66 | 
 67 |             <Box>
 68 |                 <h4 >Filter by time frame</h4>
 69 |                 <Slider getAriaLabel={() => 'Range'}
 70 |                         valueLabelFormat={(value)=> `${millisToMinutesAndSeconds(value)}`}
 71 |                         value={selectedDuration}
 72 |                         min={0}
 73 |                         max={maxDuration}
 74 |                         onChange={ (event, newValue) => setSelectedDuration(newValue)}
 75 |                         size={'large'}
 76 |                         valueLabelDisplay="auto"
 77 |                         disableSwap
 78 |                         marks={[{ value: 0, label: millisToMinutesAndSeconds(0,0)},
 79 |                                 {value: maxDuration, label: millisToMinutesAndSeconds(maxDuration,0)}]}
 80 | 
 81 |                 />
 82 |             </Box>
 83 | 
 84 |             <Button onClick={() => clearFilter()}>Clear filters</Button>
 85 | 
 86 |         </div>
 87 | 
 88 |     )
 89 | };
 90 | 
 91 | /**
 92 |  * Custom styling for filters box, uses AWS-UI design tokens to mimic default styles
 93 |  * @const {object}
 94 |  */
 95 | const boxStyle = {
 96 |     position: 'sticky',
 97 |     top: 0,
 98 |     display: 'block',
 99 |     backgroundColor: awsui.colorBackgroundControlDefault,
100 |     borderColor: awsui.colorBorderControlDefault,
101 |     borderWidth: 2,
102 |     padding: 20,
103 |     boxShadow: 20,
104 |     boxShadowColor:awsui.colorBorderControlDefault
105 | };
106 | 
107 | /**
108 |  * Array of query type options
109 |  * @const {object}
110 |  */
111 | const queryTypes = [
112 |     {
113 |         label: "SELECT",
114 |         value: "1",
115 |     },
116 |     {
117 |         label: "INSERT",
118 |         value: "2",
119 |     },
120 |     {
121 |         label: "UPDATE",
122 |         value: "3",
123 |     },
124 |     {
125 |         label: "DELETE",
126 |         value: "4",
127 |     },
128 |     {
129 |         label: "COPY",
130 |         value: "5",
131 |     },
132 |     {
133 |         label: "UNLOAD",
134 |         value: "6",
135 |     },
136 |     {
137 |         label: "DDL",
138 |         value: "7",
139 |     },
140 |     {
141 |         label: "COMMAND",
142 |         value: "8",
143 |     },
144 |     {
145 |         label: "CTAS",
146 |         value: "9",
147 |     },
148 |     {
149 |         label: "UTILITY",
150 |         value: "10",
151 |     },
152 |     {
153 |         label: "OTHER",
154 |         value: "11",
155 |     }];
156 | 
157 | export default GlobalFilters;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/navigation/NavDrawer.js:
--------------------------------------------------------------------------------
 1 | import {SideNavigation} from "@awsui/components-react";
 2 | import * as React from 'react';
 3 | 
 4 | /**
 5 |  * Navigation Sidebar
 6 |  * List of anchor tags
 7 |  */
 8 | const Nav = () => {
 9 |     return (
10 |       <SideNavigation
11 |         header={"Simple Replay Analysis"}
12 |         items={[
13 |             {type: "link", text: "Home", href: "/"},
14 |             {type: "link", text: "Analysis", href: "/analysis#analysis"},
15 |             {type: "link", text: "Validation", href: "/analysis#validation"},
16 |             {type: "link", text: "Resources", href: "/analysis#resources"},
17 |         ]}
18 |       />
19 |     );
20 | }
21 | export default Nav;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/components/navigation/ToolBar.js:
--------------------------------------------------------------------------------
 1 | import {SideNavigation} from "@awsui/components-react";
 2 | import * as React from 'react';
 3 | 
 4 | /**
 5 |  * Help Sidebar
 6 |  * List of anchor tags
 7 |  */
 8 | const ToolBar = () => {
 9 |     return (
10 |       <SideNavigation
11 |                 header={{text: "Help"}}
12 |                 items={[
13 |          {
14 |           type: "section",
15 |           text: "Troubleshooting",
16 |           items: [
17 |               {
18 |               type: "link",
19 |                   external:true,
20 |               text: "Common Query Problems",
21 |               href: "https://docs.aws.amazon.com/redshift/latest/dg/queries-troubleshooting.html"
22 |             },
23 |             {
24 |               type: "link", external:true,
25 |               text: "Redshift Spectrum Queries",
26 |               href: "https://docs.aws.amazon.com/redshift/latest/dg/c-spectrum-troubleshooting.html"
27 |             },
28 | 
29 |             {
30 |               type: "link", external:true,
31 |               text: "1023: Serializable Isolation Error",
32 |               href: "https://aws.amazon.com/premiumsupport/knowledge-center/redshift-serializable-isolation/"
33 |             }
34 |           ]
35 |         },
36 |         {
37 |           type: "section",
38 |           text: "Reference",
39 |           items: [
40 |             { type: "link", external:true, text: "Redshift Documentation", href: "https://docs.aws.amazon.com/redshift/index.html" },
41 |             { type: "link",  external:true,text: "Database Developer Guide", href: "https://docs.aws.amazon.com/redshift/latest/dg/welcome.html" },
42 |               { type: "link",  external:true, text: "Cluster Management Guide", href: "https://docs.aws.amazon.com/redshift/latest/mgmt/welcome.html" },
43 | 
44 |           ]
45 |         }
46 |       ]}
47 |               />
48 |     );
49 | }
50 | export default ToolBar;


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/helpers/PrepareOptions.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Iterates through provided
 3 |  * @param {Object} field name of field
 4 |  * @param {Object} data Total data set of items
 5 |  * @return {Object} list of unique values formatted as options for selection component
 6 |  */
 7 | export default function prepareSelectOptions(field, data) {
 8 |     const optionSet = [];
 9 | 
10 |     /** If data exists, iterate through data set and collect unique values */
11 |     if (data) {
12 |         data.forEach(item => {
13 |             if (optionSet.indexOf(item[field]) === -1) {
14 |                 optionSet.push(item[field]);
15 |             }
16 |         })
17 | 
18 |         /** else no data,  iterate through field object to format values as options */
19 |     } else {
20 |         field.forEach(item => {
21 |             if (optionSet.indexOf(item) === -1) {
22 |                 optionSet.push(item);
23 |             }
24 |         });
25 |     }
26 | 
27 |     optionSet.sort();
28 |     const options = [];
29 | 
30 |     /** format list as options Object */
31 |     optionSet.forEach((item, index) => options.push({label: item, value: (index + 1).toString()}));
32 | 
33 |     return options;
34 | }
35 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/helpers/msFormatter.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Converts ms value to min:sec.ms string for visual formatting
 3 |  * @param {*} milliseconds, value in milliseconds
 4 |  * @param {number} digits, number of digits to round to (default 2)
 5 |  * @return {string} Formatted string
 6 |  */
 7 | 
 8 | export default function millisToMinutesAndSeconds(milliseconds, digits = 2) {
 9 |   const minutes = Math.floor(milliseconds / 60000);
10 |   const seconds = ((milliseconds % 60000) / 1000).toFixed(digits);
11 | 
12 |   return minutes + ":" + (seconds < 10 ? '0' : '') + seconds;
13 | }


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/index.js:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom/client';
 3 | import App from './App';
 4 | import reportWebVitals from './reportWebVitals';
 5 | 
 6 | const root = ReactDOM.createRoot(document.getElementById('root'));
 7 | root.render(
 8 |   <React.StrictMode>
 9 |     <App />
10 |   </React.StrictMode>
11 | );
12 | 
13 | // If you want to start measuring performance in your app, pass a function
14 | // to log results (for example: reportWebVitals(console.log))
15 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
16 | reportWebVitals();
17 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/pages/home.js:
--------------------------------------------------------------------------------
  1 | import Input from "@awsui/components-react/input";
  2 | import Button from "@awsui/components-react/button";
  3 | import AppLayout from "@awsui/components-react/app-layout";
  4 | import React, {useEffect, useState} from "react";
  5 | import {Container, FormField, Header, SpaceBetween, TokenGroup} from "@awsui/components-react";
  6 | import ReplayList from "../components/ReplayList";
  7 | import AccessControl from "../components/AccessControl";
  8 | 
  9 | export const HomePage = () => {
 10 | 
 11 |     const [resource, setResource] = useState('');
 12 |     const [replays, setReplays] = useState([])
 13 |     const [buckets, setBuckets] = useState([])
 14 |     const [bucketLabels, setBucketLabels] = useState([])
 15 |     const [searching, setSearching] = useState(false)
 16 |     const [profiles, setProfiles] = useState([])
 17 |     const [valid, setValid] = useState(true)
 18 | 
 19 |     useEffect(() => {
 20 |         const fetchData = async () => {
 21 |             const response = await fetch(`/getprofile`);
 22 |             const newData = await response.json();
 23 |             setProfiles(newData.profiles);
 24 |         };
 25 |         fetchData();
 26 |     }, []);
 27 | 
 28 |     function search(uri) {
 29 |         // TODO: explicit s3 uri validation
 30 | 
 31 |         if (uri !== '' && uri.startsWith('s3://')) {
 32 |             setSearching(true);
 33 | 
 34 |             fetch(`/search?uri=${encodeURIComponent(uri)}`).then(response => response.json())
 35 |                 .then(response => {
 36 |                     if (!response.success) {
 37 |                         setValid(false)
 38 |                     } else {
 39 |                         if (!buckets.includes(response.bucket)) {
 40 |                             setReplays(replays => [...replays, ...response.replays]);
 41 |                             setBuckets(buckets => [...buckets, response.bucket]);
 42 |                             setBucketLabels(buckets => [...buckets, {label: response.bucket}]);
 43 |                         }
 44 |                     }
 45 | 
 46 |                     setSearching(false);
 47 | 
 48 | 
 49 |                 }).catch((error) => {
 50 |                 console.error('Error:', error);
 51 |                 setSearching(false);
 52 | 
 53 |             });
 54 |             setResource("");
 55 |         } else {
 56 |             setValid(false)
 57 | 
 58 |         }
 59 |     }
 60 | 
 61 |     /**
 62 |      * Removes entries from list of replays when bucket is removed
 63 |      * @param {number} itemIndex Total data set of query frequency values.
 64 |      */
 65 |     function removeBucket(itemIndex) {
 66 |         let bucket = bucketLabels[itemIndex].label
 67 |         setBucketLabels([...bucketLabels.slice(0, itemIndex),
 68 |             ...bucketLabels.slice(itemIndex + 1)]);
 69 |         setBuckets([...buckets.slice(0, itemIndex),
 70 |             ...buckets.slice(itemIndex + 1)]);
 71 |         let result = replays.filter((data) => {
 72 |             return data.bucket.search(bucket) === -1;
 73 |         });
 74 |         setReplays(result);
 75 |     }
 76 | 
 77 |     return (
 78 |         <AppLayout
 79 |             navigationHide={true}
 80 |             content={
 81 |                 <Container
 82 |                     header={
 83 |                         <Header variant="h1" description="An analysis tool provided by Redshift.">
 84 |                             Test Drive Replay Analysis
 85 |                         </Header>
 86 |                     }>
 87 |                     <SpaceBetween size={"l"}>
 88 |                         <AccessControl profiles={profiles}></AccessControl>
 89 | 
 90 | 
 91 |                         <FormField label="Replay analysis file location"
 92 |                                    errorText={!valid && "Unable to access S3. Please check the provided URI."}
 93 |                                    secondaryControl={
 94 |                                        <Button
 95 |                                            disabled={resource === ""}
 96 |                                            loading={searching}
 97 |                                            variant={'primary'}
 98 |                                            onClick={() => search(resource)}>
 99 |                                            Search
100 |                                        </Button>}>
101 | 
102 |                             <Input value={resource}
103 |                                    errorText="This is an error message."
104 | 
105 |                                    type={'search'}
106 |                                    placeholder={"s3://bucket/prefix/object"}
107 |                                    onChange={(event) => {
108 |                                        setResource(event.detail.value);
109 |                                        setValid(true)
110 |                                    }}/>
111 | 
112 |                         </FormField>
113 | 
114 |                         <TokenGroup
115 |                             onDismiss={({detail: {itemIndex}}) => {
116 |                                 removeBucket(itemIndex)
117 |                             }}
118 |                             items={bucketLabels}>
119 | 
120 |                         </TokenGroup>
121 | 
122 |                         <ReplayList search={searching} replays={replays}/>
123 | 
124 |                     </SpaceBetween>
125 | 
126 | 
127 |                 </Container>
128 |             }
129 | 
130 |         />
131 |     );
132 | 
133 | }
134 | 
135 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/reportWebVitals.js:
--------------------------------------------------------------------------------
 1 | const reportWebVitals = onPerfEntry => {
 2 |   if (onPerfEntry && onPerfEntry instanceof Function) {
 3 |     import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
 4 |       getCLS(onPerfEntry);
 5 |       getFID(onPerfEntry);
 6 |       getFCP(onPerfEntry);
 7 |       getLCP(onPerfEntry);
 8 |       getTTFB(onPerfEntry);
 9 |     });
10 |   }
11 | };
12 | 
13 | export default reportWebVitals;
14 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/gui/src/setupTests.js:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom';
6 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/replay_analysis.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import common.log as log_helper
 4 | 
 5 | 
 6 | def launch_analysis_v2():
 7 |     """Package install and server init"""
 8 | 
 9 |     # add explicit instructions for user
10 | 
11 |     os.system("pip install -r requirements.txt")
12 |     os.chdir(f"{os.getcwd()}/tools/ReplayAnalysis/gui")
13 | 
14 |     # explicit version checking
15 |     if os.system("node -v") != 0:
16 |         print("Please install node before proceeding.")
17 |         exit(-1)
18 | 
19 |     if os.system("npm install") != 0:
20 |         print("Could not install npm packages. ")
21 | 
22 |     os.system("npm run start-backend &")
23 |     os.system("npm start")
24 | 
25 | 
26 | def main():
27 |     log_helper.init_logging("replay_analysis.log",dir='tools/ReplayAnalysis/logs',logger_name="ReplayAnalysisLogger")
28 |     log_helper.log_version()
29 |     launch_analysis_v2()
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     main()
34 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/ReplayAnalysis/tests/__init__.py


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/tests/test_replay_analysis.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch, MagicMock, mock_open, Mock
 2 | import unittest
 3 | import botocore.session
 4 | import tools.ReplayAnalysis.replay_analysis as replay_analysis
 5 | 
 6 | 
 7 | class TestReplayAnalysis(unittest.TestCase):
 8 |     def setUp(self):
 9 |         self.severless_cluster = {
10 |             "is_serverless": True,
11 |             "secret_name": None,
12 |             "host": "host",
13 |             "region": "someregion",
14 |             "port": 5439,
15 |             "database": "somedb",
16 |             "id": "someid",
17 |         }
18 |         self.bucket = {"url": "someurl", "bucket_name": "somebucket", "prefix": "someprefix"}
19 |         self.provisioned_cluster = {
20 |             "is_serverless": False,
21 |             "secret_name": None,
22 |             "host": "host",
23 |             "region": "someregion",
24 |             "port": 5439,
25 |             "database": "somedb",
26 |             "id": "someid",
27 |         }
28 |         self.report = MagicMock()
29 |         self.replay = "someid"
30 |         self.cluster_endpoint = "someid"
31 |         self.start_time = "sometime"
32 |         self.end_time = "sometime"
33 |         self.bucket_url = "url"
34 |         self.iam_role = "somerole"
35 |         self.user = "someuser"
36 |         self.rs_client_response = {"DbUser": self.user, "DbPassword": "password123"}
37 |         model = botocore.session.get_session().get_service_model("redshift")
38 |         factory = botocore.errorfactory.ClientExceptionsFactory()
39 |         self.exceptions = factory.create_client_exceptions(model)
40 | 
41 |     @patch("os.system")
42 |     @patch("os.chdir")
43 |     @patch("builtins.print")
44 |     def test_launch_analysis_v2_exit(self, mock_print, mock_chdir, mock_os):
45 |         mock_os.side_effect = [5, 10]
46 |         with self.assertRaises(SystemExit):
47 |             replay_analysis.launch_analysis_v2()
48 |         mock_print.assert_called_once_with("Please install node before proceeding.")
49 | 
50 |     @patch("os.system")
51 |     @patch("os.chdir")
52 |     @patch("builtins.print")
53 |     def test_launch_analysis_v2_cannot_install(self, mock_print, mock_chdir, mock_os):
54 |         mock_os.side_effect = [0, 0, 1, 1, 1]
55 |         replay_analysis.launch_analysis_v2()
56 |         mock_print.assert_called_once_with("Could not install npm packages. ")
57 | 
58 |     @patch("os.system", return_value=0)
59 |     @patch("os.chdir")
60 |     @patch("builtins.print")
61 |     def test_launch_analysis_v2_success(self, mock_print, mock_chdir, mock_os):
62 |         replay_analysis.launch_analysis_v2()
63 |         mock_print.assert_not_called()
64 | 


--------------------------------------------------------------------------------
/tools/ReplayAnalysis/util/report_gen.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | import yaml
  4 | 
  5 | from functools import partial
  6 | from reportlab.lib.pagesizes import letter
  7 | from reportlab.lib.units import inch
  8 | from reportlab.platypus import (
  9 |     PageBreak,
 10 |     TableStyle,
 11 |     Table,
 12 |     Spacer,
 13 |     Image,
 14 |     SimpleDocTemplate,
 15 |     Paragraph,
 16 |     ListFlowable,
 17 |     ListItem,
 18 | )
 19 | from report_util import (
 20 |     styles,
 21 |     build_pdf_tables,
 22 |     df_to_np,
 23 |     first_page,
 24 |     later_pages,
 25 |     hist_gen,
 26 |     sub_yaml_vars,
 27 | )
 28 | 
 29 | g_stylesheet = styles()
 30 | 
 31 | 
 32 | def pdf_gen(report, summary=None):
 33 |     """This function formats the summary report using the content from report_content.yaml to populate the paragraphs,
 34 |        titles, and headers. The tables are populated via the Report param which has all the dataframes.
 35 | 
 36 |     @param report: Report object
 37 |     @param summary: list, replay summary
 38 | 
 39 |     """
 40 |     with open("report_content.yaml", "r") as stream:
 41 |         docs = yaml.safe_load(stream)
 42 | 
 43 |         style = g_stylesheet.get("styles")
 44 |         elems = []  # elements array used to build pdf structure
 45 |         pdf = SimpleDocTemplate(
 46 |             f"{report.replay_id}_report.pdf",
 47 |             pagesize=letter,
 48 |             leftMargin=0.75 * inch,
 49 |             rightMargin=0.75 * inch,
 50 |             topMargin=0.75 * inch,
 51 |             bottomMargin=0.75 * inch,
 52 |         )
 53 | 
 54 |         # title and subtitle and cluster info table
 55 |         elems.append(Paragraph(docs["title"], style["Title"]))
 56 |         elems.append(Paragraph(sub_yaml_vars(report, docs["subtitle"]), style["Heading4"]))
 57 |         cluster_info = pd.DataFrame.from_dict(report.cluster_details, orient="index")
 58 |         elems.append(
 59 |             Table(
 60 |                 df_to_np(report.cluster_details.keys(), cluster_info.transpose()),
 61 |                 hAlign="LEFT",
 62 |                 style=g_stylesheet.get("table_style"),
 63 |             )
 64 |         )
 65 |         # replay summary
 66 |         if summary is not None:
 67 |             elems.append(Paragraph(f"Replay Summary", style["Heading4"]))
 68 |             elems.append(
 69 |                 ListFlowable(
 70 |                     [ListItem(Paragraph(x, style["Normal"])) for x in summary],
 71 |                     bulletType="bullet",
 72 |                 )
 73 |             )
 74 |             elems.append(Spacer(0, 5))
 75 | 
 76 |         elems.append(Paragraph(docs["report_paragraph"], style["Normal"]))
 77 | 
 78 |         # glossary section
 79 |         elems.append(Paragraph(docs["glossary_header"], style["Heading4"]))
 80 |         elems.append(Paragraph(docs["glossary_paragraph"], style["Normal"]))
 81 |         elems.append(
 82 |             ListFlowable(
 83 |                 [ListItem(Paragraph(x, style["Normal"])) for x in docs["glossary"]],
 84 |                 bulletType="bullet",
 85 |             )
 86 |         )
 87 |         elems.append(Spacer(0, 5))
 88 | 
 89 |         # access data section
 90 |         elems.append(Paragraph(docs["data_header"], style["Heading4"]))
 91 |         elems.append(Paragraph(sub_yaml_vars(report, docs["data_paragraph"]), style["Normal"]))
 92 |         elems.append(
 93 |             ListFlowable(
 94 |                 [ListItem(Paragraph(x, style["Normal"])) for x in docs["raw_data"]],
 95 |                 bulletType="bullet",
 96 |             )
 97 |         )
 98 |         elems.append(Spacer(0, 5))
 99 |         elems.append(Paragraph(sub_yaml_vars(report, docs["agg_data_paragraph"]), style["Normal"]))
100 | 
101 |         # notes section
102 |         elems.append(Paragraph(docs["notes_header"], style["Heading4"]))
103 |         elems.append(Paragraph(docs["notes_paragraph"], style["Normal"]))
104 |         elems.append(
105 |             ListFlowable(
106 |                 [ListItem(Paragraph(x, style["Normal"])) for x in docs["notes"]],
107 |                 bulletType="bullet",
108 |             )
109 |         )
110 | 
111 |         elems.append(PageBreak())  # page 2: cluster details
112 | 
113 |         # query breakdown
114 |         build_pdf_tables(elems, docs["query_breakdown"], report)
115 |         elems.append(Spacer(0, 5))
116 | 
117 |         # histogram and description
118 |         image_path = hist_gen(
119 |             x_data=report.feature_graph["sec_start"],
120 |             y_data=report.feature_graph["count"],
121 |             title=docs["graph"].get("title"),
122 |             x_label="Average Elapsed Time (s)",
123 |         )
124 | 
125 |         desc = Paragraph(docs["graph"].get("paragraph"), style["Normal"])
126 |         data = [[Image(image_path, width=300, height=200, hAlign="LEFT"), desc]]
127 |         elems.append(Table(data, style=TableStyle([("VALIGN", (0, 0), (-1, -1), "MIDDLE")])))
128 |         elems.append(Spacer(0, 5))
129 | 
130 |         # cluster metrics table
131 |         build_pdf_tables(elems, docs["cluster_metrics"], report)
132 | 
133 |         elems.append(PageBreak())  # page 3+ measure tables
134 | 
135 |         build_pdf_tables(
136 |             elems, docs["measure_tables"], report
137 |         )  # build 5 measure tables all at once
138 | 
139 |         # build pdf
140 |         pdf.build(
141 |             elems,
142 |             onFirstPage=partial(first_page, report=report),
143 |             onLaterPages=partial(later_pages, report=report),
144 |         )
145 |         os.remove(image_path)
146 | 
147 |         return pdf.filename
148 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws/redshift-test-drive/354b7ed75180a6b915d856175cffd6414cae998e/tools/__init__.py


--------------------------------------------------------------------------------