├── .dockerignore
├── .github
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── NOTICE
├── README.md
├── compat-tool
    ├── README.md
    ├── check-percentages.py
    ├── compat.py
    ├── create-compat-csv.py
    └── test
    │   ├── mongod.log.2020-11-10T19-33-14
    │   ├── mongodb.log
    │   ├── not_a_log_file.txt
    │   ├── sample-5-0-features.py
    │   ├── sample-python-1.py
    │   ├── sample-python-2.py
    │   ├── testlog.txt
    │   └── testlog2.txt
├── global-clusters-automation
    ├── README.md
    ├── add_secondarycluster.py
    ├── convert_to_global_lambda_function.py
    ├── deploy
    │   ├── global_clusters_automation.yml
    │   ├── package_lambda.sh
    │   ├── package_lambda_convert.sh
    │   ├── package_lambda_failover.sh
    │   └── package_lambda_failover_and_convert.sh
    ├── failover_and_convert_lambda_function.py
    ├── failover_and_convert_to_global.py
    ├── failover_and_delete_global_cluster.py
    ├── failover_and_delete_lambda_function.py
    ├── images
    │   ├── GlobalClustersAutomation-BCP.png
    │   └── GlobalClustersAutomation-DR.png
    ├── requirements.txt
    ├── route53_endpoint_management.py
    └── test
    │   ├── test_convert_to_global.py
    │   ├── test_failover_and_convert.py
    │   └── test_failover_and_delete.py
├── index-tool
    ├── Dockerfile
    ├── README.md
    ├── migrationtools
    │   ├── __init__.py
    │   └── documentdb_index_tool.py
    ├── requirements.txt
    └── test
    │   ├── __init__.py
    │   ├── fixtures
    │       └── metadata
    │       │   ├── capped_collection
    │       │       ├── capped_collection.bson
    │       │       └── capped_collection.metadata.json
    │       │   ├── geo_index
    │       │       └── geo_indexed_col.metadata.json
    │       │   └── storage_engine
    │       │       └── storage_engine.metadata.json
    │   ├── test1.bash
    │   ├── test1.expects
    │   ├── test1
    │       └── idxtest
    │       │   └── tmc.metadata.json
    │   └── test_compatibility.py
├── migration
    ├── README.md
    ├── cosmos-db-migration-utility
    │   ├── .gitignore
    │   ├── README.md
    │   ├── docs
    │   │   ├── architecture
    │   │   │   ├── architecture-diagram.drawio
    │   │   │   └── architecture-diagram.png
    │   │   └── images
    │   │   │   ├── cloud-trail-log.png
    │   │   │   ├── cloud-watch-log-group.png
    │   │   │   ├── core-resources-create-stack.png
    │   │   │   ├── core-resources-review-stack.png
    │   │   │   ├── core-resources-stack-details.png
    │   │   │   ├── core-resources-stack-status.png
    │   │   │   ├── documentdb-connection-string.png
    │   │   │   ├── documentdb-resources-create-stack.png
    │   │   │   ├── documentdb-resources-stack-details.png
    │   │   │   ├── documentdb-resources-stack-status.png
    │   │   │   ├── ec2-instance-ami.png
    │   │   │   ├── ec2-instance-review.png
    │   │   │   ├── ec2-instance-review3.png
    │   │   │   └── s3-bucket-with-lambda-functions.png
    │   ├── lib
    │   │   └── lambda
    │   │   │   └── lambda-pack-pymongo.zip
    │   ├── scripts
    │   │   └── build-package.sh
    │   └── src
    │   │   ├── cloudformation
    │   │       ├── core-resources.yaml
    │   │       └── documentdb.yaml
    │   │   ├── configure
    │   │       ├── application.py
    │   │       ├── commandline_parser.py
    │   │       ├── common
    │   │       │   ├── application_exception.py
    │   │       │   └── logger.py
    │   │       ├── json_encoder.py
    │   │       ├── main.py
    │   │       ├── rds-combined-ca-bundle.pem
    │   │       └── requirements.txt
    │   │   ├── lambda
    │   │       ├── app-request-reader
    │   │       │   ├── lambda_function.py
    │   │       │   ├── sample_request_start.json
    │   │       │   └── sample_request_stop.json
    │   │       ├── batch-request-reader
    │   │       │   ├── lambda_function.py
    │   │       │   ├── rds-combined-ca-bundle.pem
    │   │       │   └── sample_request.json
    │   │       └── gap-watch-request-reader
    │   │       │   ├── lambda_function.py
    │   │       │   └── sample_request.json
    │   │   └── migrator-app
    │   │       ├── commandline_parser.py
    │   │       ├── common
    │   │           ├── Singleton.py
    │   │           ├── __init__.py
    │   │           ├── application_exception.py
    │   │           ├── json_encoder.py
    │   │           ├── logger.py
    │   │           └── timer.py
    │   │       ├── helpers
    │   │           ├── __init__.py
    │   │           ├── change_manager.py
    │   │           ├── document_batcher.py
    │   │           ├── dynamodb_helper.py
    │   │           ├── file_helper.py
    │   │           ├── s3_helper.py
    │   │           └── tokens_manager.py
    │   │       ├── main.py
    │   │       ├── migrators
    │   │           ├── ClusterMigrator.py
    │   │           ├── CollectionMigrator.py
    │   │           ├── DatabaseMigrator.py
    │   │           ├── TokenTracker.py
    │   │           └── __init__.py
    │   │       ├── requirements.txt
    │   │       └── tokens.yaml
    ├── couchbase-migration-utility
    │   ├── README.md
    │   ├── couchbase-to-amazon-documentdb-connectors.yaml
    │   ├── couchbase-to-amazon-documentdb.yaml
    │   └── static
    │   │   ├── images
    │   │       ├── amazon-documentdb-connect-with-an-application.png
    │   │       ├── amazon-documentdb-connect-with-mongo-shell.png
    │   │       ├── amazon-documentdb-connectivity-and-security.png
    │   │       ├── cloudformation-couchbase-to-amazon-documentdb-output.png
    │   │       ├── ec2-cfn-migration-security-group.png
    │   │       ├── ec2-copy-ssh-command.png
    │   │       ├── ec2-delete-inbound-rule.png
    │   │       ├── ec2-edit-couchbase-security-group-inbound-rules.png
    │   │       ├── ec2-select-couchbase-security-group-inbound-rules.png
    │   │       ├── msk-cluster-bootstrap-servers.png
    │   │       ├── msk-cluster-client-information.png
    │   │       ├── s3-confirm-specified-objects.png
    │   │       ├── s3-delete-objects.png
    │   │       └── solution-overview.png
    │   │   └── scripts
    │   │       ├── createTruststore.sh
    │   │       └── setup.sh
    ├── data-differ
    │   ├── README.md
    │   ├── data-differ.py
    │   └── test-scripts
    │   │   ├── README.md
    │   │   ├── dict_id.bash
    │   │   ├── dict_id_diff.bash
    │   │   ├── dict_id_diff_source.json
    │   │   ├── dict_id_diff_target.json
    │   │   ├── dict_id_source.json
    │   │   ├── dict_id_target.json
    │   │   ├── everything_same.bash
    │   │   ├── everything_same.json
    │   │   ├── extra_target_field.bash
    │   │   ├── extra_target_field_source.json
    │   │   ├── extra_target_field_target.json
    │   │   ├── nested_dict_diff.bash
    │   │   ├── nested_dict_diff_source.json
    │   │   ├── nested_dict_diff_target.json
    │   │   ├── order_change.bash
    │   │   ├── order_change_source.json
    │   │   ├── order_change_target.json
    │   │   ├── wrong_id.bash
    │   │   ├── wrong_id_source.json
    │   │   └── wrong_id_target.json
    ├── dms-segments
    │   ├── README.md
    │   └── dms-segments.py
    ├── export-users
    │   ├── README.md
    │   └── docdbExportUsers.py
    ├── json-import
    │   ├── README.md
    │   └── json-import.py
    ├── migrator
    │   ├── .gitignore
    │   ├── README.md
    │   ├── cdc-multiprocess.py
    │   ├── fl-multiprocess-filtered.py
    │   ├── fl-multiprocess.py
    │   └── test
    │   │   └── cdc-correctness.py
    ├── mongodb-changestream-review
    │   ├── README.md
    │   └── mongodb-changestream-review.py
    ├── mongodb-oplog-review
    │   ├── README.md
    │   └── mongodb-oplog-review.py
    ├── mongodb-ops
    │   ├── README.md
    │   └── mongodb-ops.py
    └── mvu-tool
    │   ├── README.md
    │   └── mvu-cdc-migrator.py
├── monitoring
    ├── README.md
    ├── custom-metrics
    │   ├── README.md
    │   └── custom-metrics.py
    ├── docdb-dashboarder
    │   ├── .gitignore
    │   ├── README.md
    │   ├── create-docdb-dashboard.py
    │   └── widgets.py
    ├── docdb-stat
    │   ├── README.md
    │   └── docdbstat.py
    ├── documentdb-top
    │   ├── README.md
    │   └── documentdb-top.py
    └── gc-watchdog
    │   ├── .gitignore
    │   ├── README.md
    │   └── gc-watchdog.py
├── operations
    ├── README.md
    ├── document-compression-updater
    │   ├── .gitignore
    │   ├── README.md
    │   ├── requirements.txt
    │   └── update_apply_compression.py
    ├── index-creator
    │   ├── .gitignore
    │   ├── README.md
    │   └── index-creator.py
    ├── large-doc-finder
    │   ├── README.md
    │   ├── large-docs.py
    │   └── requirements.txt
    └── server-certificate-check
    │   ├── .gitignore
    │   ├── README.md
    │   ├── iam-policy.json
    │   ├── requirements.txt
    │   └── server-certificate-check.py
└── performance
    ├── README.md
    ├── compression-review
        ├── .gitignore
        ├── README.md
        ├── compression-review.py
        └── requirements.txt
    ├── deployment-scanner
        ├── README.md
        ├── deployment-scanner-debug.py
        ├── deployment-scanner.py
        ├── iam-policy.json
        └── requirements.txt
    ├── index-cardinality-detection
        ├── .gitignore
        ├── README.md
        ├── detect-cardinality.py
        └── requirements.txt
    ├── index-review
        ├── README.md
        ├── index-review-testing.txt
        └── index-review.py
    ├── metric-analyzer
        ├── README.md
        ├── context
        │   ├── buffer_cache_low.html
        │   ├── connection_limit.html
        │   ├── cpu_overutilized.html
        │   ├── cpu_underutilized.html
        │   ├── graviton_upgrade.html
        │   ├── index_cache_low.html
        │   ├── read_preference.html
        │   ├── remove_instances.html
        │   └── single_az.html
        ├── metric-analyzer.py
        └── requirements.txt
    └── metric-collector
        ├── IAM-policy.json
        ├── README.md
        ├── metric-collector.py
        └── requirements.txt


/.dockerignore:
--------------------------------------------------------------------------------
1 | .github
2 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | *Issue #, if available:*
2 | 
3 | *Description of changes:*
4 | 
5 | 
6 | By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check [existing open](https://github.com/awslabs/amazon-documentdb-tools/issues), or [recently closed](https://github.com/awslabs/amazon-documentdb-tools/issues?utf8=%E2%9C%93&q=is%3Aissue%20is%3Aclosed%20), issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *master* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any ['help wanted'](https://github.com/awslabs/amazon-documentdb-tools/labels/help%20wanted) issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](https://github.com/awslabs/amazon-documentdb-tools/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 
61 | We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes.
62 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Amazon DocumentDB Tools
2 | Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Tools
 2 | 
 3 | This repository contains several tools to help users with Amazon DocumentDB including migration, monitoring, and performance. A few of the most popular tools are listed below but there are additional tools in the [migration](./migration), [monitoring](./monitoring), [operations](./operations), and [performance](./performance) folders.
 4 | 
 5 | ## Amazon DocumentDB Index Tool 
 6 | 
 7 | The [DocumentDB Index Tool](./index-tool) makes it easy to migrate only indexes (not data) between a source MongoDB deployment and an Amazon DocumentDB cluster.
 8 | 
 9 | ## Amazon DocumentDB Compatibility Tool 
10 | 
11 | The [DocumentDB Compatibility Tool](./compat-tool) examines log files from MongoDB or source code from MongoDB applications to determine if there are any queries which use operators that are not supported in Amazon DocumentDB.
12 | 
13 | ## Support
14 | 
15 | The contents of this repository are maintained by Amazon DocumentDB Specialist SAs and are not officially supported by AWS. Please file a [Github Issue](https://github.com/awslabs/amazon-documentdb-tools/issues) if you experience any problems.
16 | 
17 | ## License
18 | 
19 | This library is licensed under the Apache 2.0 License. 
20 | 


--------------------------------------------------------------------------------
/compat-tool/check-percentages.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import compat
 4 | 
 5 | def main():
 6 |     versions = ['3.6','4.0','5.0','EC5.0']
 7 |     keywords = compat.load_keywords()
 8 | 
 9 |     totOps = 0
10 |     numOps = {}
11 |     numOpsSupported = {}
12 | 
13 |     for thisKeyword in keywords.keys():
14 |         # get counts by mongodb version
15 |         totOps += 1
16 |         thisMongodbVersion = keywords[thisKeyword]["mongodbversion"]
17 |         if thisMongodbVersion in numOps:
18 |             numOps[thisMongodbVersion] += 1
19 |         else:
20 |             numOps[thisMongodbVersion] = 1
21 | 
22 |         # get supported count by documentdb version
23 |         for docDbVersion in versions:
24 |             if keywords[thisKeyword][docDbVersion] == "Yes":
25 |                 if docDbVersion in numOpsSupported:
26 |                     numOpsSupported[docDbVersion] += 1
27 |                 else:
28 |                     numOpsSupported[docDbVersion] = 1
29 | 
30 |     print("")
31 |     print("MongoDB Operations By Version, total = {}".format(totOps))
32 |     for thisVersion in sorted(numOps.keys()):
33 |         print("  {} in version {}".format(numOps[thisVersion],thisVersion))
34 |         
35 |     print("")
36 |     print("DocumentDB Supported Operations By Version")
37 |     for thisVersion in sorted(numOpsSupported.keys()):
38 |         print("  {} supported by DocumentDB version {} ({:.1f}%)".format(numOpsSupported[thisVersion],thisVersion,numOpsSupported[thisVersion]/totOps*100))
39 |     print("")
40 | 
41 |     print("")
42 |     print("DocumentDB EC Compat Check")
43 |     for thisKeyword in sorted(keywords.keys()):
44 |         if keywords[thisKeyword]["5.0"] == "Yes" and keywords[thisKeyword]["EC5.0"] == "No":
45 |             print("  {}".format(thisKeyword))
46 |     print("")
47 | 
48 |     #print("")
49 |     #print("DocumentDB 5.0 Check")
50 |     #for thisKeyword in sorted(keywords.keys()):
51 |     #    if keywords[thisKeyword]["5.0"] == "No":
52 |     #        print("  {} from MongoDB {}".format(thisKeyword,keywords[thisKeyword]["mongodbversion"]))
53 |     #print("")
54 | 
55 |     
56 | if __name__ == '__main__':
57 |     main()
58 | 


--------------------------------------------------------------------------------
/compat-tool/create-compat-csv.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | 
 3 | import compat
 4 | 
 5 | def main():
 6 |     versions = ['3.6','4.0','5.0','EC5.0']
 7 |     keywords = compat.load_keywords()
 8 | 
 9 |     print("{},{},{},{},{},{}".format('operator','mdb-version','docdb-36','docdb-40','docdb-50','docdb-ec'))
10 | 
11 |     for thisKeyword in keywords.keys():
12 |         thisEntry = keywords[thisKeyword]
13 |         print("{},{},{},{},{},{}".format(thisKeyword,thisEntry["mongodbversion"],thisEntry["3.6"],thisEntry["4.0"],thisEntry["5.0"],thisEntry["EC5.0"]))
14 | 
15 |     
16 | if __name__ == '__main__':
17 |     main()
18 | 


--------------------------------------------------------------------------------
/compat-tool/test/not_a_log_file.txt:
--------------------------------------------------------------------------------
  1 |   "t": {
  2 |     "$date": "2020-05-20T20:10:08.731+00:00"
  3 |   },
  4 |   "s": "I",
  5 |   "c": "COMMAND",
  6 |   "id": 51803,
  7 |   "ctx": "conn281",
  8 |   "msg": "Slow query",
  9 |   "attr": {
 10 |     "type": "command",
 11 |     "ns": "stocks.trades",
 12 |     "appName": "MongoDB Shell",
 13 |     "command": {
 14 |       "aggregate": "trades",
 15 |       "pipeline": [
 16 |         {
 17 |           "$project": {
 18 |             "ticker": 1,
 19 |             "price": 1,
 20 |             "priceGTE110": {
 21 |               "$gte": [
 22 |                 "$price",
 23 |                 110
 24 |               ]
 25 |             },
 26 |             "_id": 0
 27 |           }
 28 |         },
 29 |         {
 30 |           "$sort": {
 31 |             "price": -1
 32 |           }
 33 |         }
 34 |       ],
 35 |       "allowDiskUse": true,
 36 |       "cursor": {},
 37 |       "lsid": {
 38 |         "id": {
 39 |           "$uuid": "fa658f9e-9cd6-42d4-b1c8-c9160fabf2a2"
 40 |         }
 41 |       },
 42 |       "$clusterTime": {
 43 |         "clusterTime": {
 44 |           "$timestamp": {
 45 |             "t": 1590005405,
 46 |             "i": 1
 47 |           }
 48 |         },
 49 |         "signature": {
 50 |           "hash": {
 51 |             "$binary": {
 52 |               "base64": "AAAAAAAAAAAAAAAAAAAAAAAAAAA=",
 53 |               "subType": "0"
 54 |             }
 55 |           },
 56 |           "keyId": 0
 57 |         }
 58 |       },
 59 |       "$db": "test"
 60 |     },
 61 |     "planSummary": "COLLSCAN",
 62 |     "cursorid": 1912190691485054700,
 63 |     "keysExamined": 0,
 64 |     "docsExamined": 1000001,
 65 |     "hasSortStage": true,
 66 |     "usedDisk": true,
 67 |     "numYields": 1002,
 68 |     "nreturned": 101,
 69 |     "reslen": 17738,
 70 |     "locks": {
 71 |       "ReplicationStateTransition": {
 72 |         "acquireCount": {
 73 |           "w": 1119
 74 |         }
 75 |       },
 76 |       "Global": {
 77 |         "acquireCount": {
 78 |           "r": 1119
 79 |         }
 80 |       },
 81 |       "Database": {
 82 |         "acquireCount": {
 83 |           "r": 1119
 84 |         }
 85 |       },
 86 |       "Collection": {
 87 |         "acquireCount": {
 88 |           "r": 1119
 89 |         }
 90 |       },
 91 |       "Mutex": {
 92 |         "acquireCount": {
 93 |           "r": 117
 94 |         }
 95 |       }
 96 |     },
 97 |     "storage": {
 98 |       "data": {
 99 |         "bytesRead": 232899899,
100 |         "timeReadingMicros": 186017
101 |       },
102 |       "timeWaitingMicros": {
103 |         "cache": 849
104 |       }
105 |     },
106 |     "protocol": "op_msg",
107 |     "durationMillis": 22427
108 |   }
109 | }


--------------------------------------------------------------------------------
/compat-tool/test/sample-5-0-features.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pymongo
 3 | from datetime import datetime
 4 | 
 5 | connectionString = sys.argv[1]
 6 | 
 7 | client = pymongo.MongoClient(connectionString)
 8 | col = client["db1"]["coll1"]
 9 | 
10 | start = datetime(2022, 1, 1, 00, 00, 00)
11 | end = datetime(2022, 1, 1, 23, 59, 59)
12 | inList = [1,2,3,4,5,6,7,8,9,10]
13 | 
14 | result = col.count_documents({"docId":{"$in":inList},"createDate": { "$gt": start, "$lte": end }})
15 | print(result)
16 | result = col.aggregate([{ "$match": {"docId":{"$in":inList},"createDate": { "$gt": start, "$lte": end }} },{ "$group": { "_id": None, "n": { "$sum": 1 } } }])
17 | print(result)
18 | result = col.find(filter={"createDate": { "$gt": start, "$lte": end }},projection={"_id":0, "patientId":1})
19 | print(result)
20 | result = col.aggregate([
21 |                          { "$match" : { "createDate": { "$gt": start, "$lte": end }}},
22 |                          { "$lookup": { "from": "tempCollection", "localField": "docId", "foreignField": "_id", "as": "am-allowed"}},
23 |                          { "$match" : { "am-allowed": { "$ne": [] }}},
24 |                          { "$count" : "numdocs" }
25 |                        ],
26 |                        allowDiskUse = True)
27 | print(result)                       
28 | result = col.aggregate([ { "$project" : { "createDatePlus1": { "$dateAdd": { startDate: "$originalDate", unit: "day", amount: 1 }}}} ])
29 | print(result)                       
30 | result = col.aggregate([ { "$project" : { "createDatePlus1": { "$dateSubtract": { startDate: "$originalDate", unit: "day", amount: 1 }}}} ])
31 | print(result)                       
32 | 
33 | client.close()
34 | 


--------------------------------------------------------------------------------
/compat-tool/test/sample-python-1.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pymongo
 3 | from datetime import datetime
 4 | 
 5 | connectionString = sys.argv[1]
 6 | 
 7 | client = pymongo.MongoClient(connectionString)
 8 | col = client["db1"]["coll1"]
 9 | 
10 | start = datetime(2022, 1, 1, 00, 00, 00)
11 | end = datetime(2022, 1, 1, 23, 59, 59)
12 | inList = [1,2,3,4,5,6,7,8,9,10]
13 | 
14 | result = col.count_documents({"docId":{"$in":inList},"createDate": { "$gt": start, "$lte": end }})
15 | print(result)
16 | result = col.aggregate([{ "$match": {"docId":{"$in":inList},"createDate": { "$gt": start, "$lte": end }} },{ "$group": { "_id": None, "n": { "$sum": 1 } } }])
17 | print(result)
18 | result = col.find(filter={"createDate": { "$gt": start, "$lte": end }},projection={"_id":0, "patientId":1})
19 | print(result)
20 | result = col.aggregate([
21 |                          { "$match" : { "createDate": { "$gt": start, "$lte": end }}},
22 |                          { "$lookup": { "from": "tempCollection", "localField": "docId", "foreignField": "_id", "as": "am-allowed"}},
23 |                          { "$match" : { "am-allowed": { "$ne": [] }}},
24 |                          { "$count" : "numdocs" }
25 |                        ],
26 |                        allowDiskUse = True)
27 | print(result)                       
28 | 
29 | client.close()


--------------------------------------------------------------------------------
/compat-tool/test/sample-python-2.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pymongo
 3 | from datetime import datetime
 4 | 
 5 | connectionString = sys.argv[1]
 6 | 
 7 | client = pymongo.MongoClient(connectionString)
 8 | col = client["db1"]["coll1"]
 9 | 
10 | start = datetime(2022, 1, 1, 00, 00, 00)
11 | end = datetime(2022, 1, 1, 23, 59, 59)
12 | inList = [1,2,3,4,5,6,7,8,9,10]
13 | 
14 | result = col.count_documents({"docId":{"$in":inList},"createDate": { "$gt": start, "$lte": end }})
15 | print(result)
16 | result = col.aggregate([{ "$match": {"docId":{"$in":inList},"createDate": { "$gt": start, "$lte": end }} },{ "$group": { "_id": None, "n": { "$sum": 1 } } }])
17 | print(result)
18 | result = col.find(filter={"createDate": { "$gt": start, "$lte": end }},projection={"_id":0, "patientId":1})
19 | print(result)
20 | result = col.aggregate([
21 |                          { "$match" : { "createDate": { "$gt": start, "$lte": end }}},
22 |                          { "$lookup": { "from": "tempCollection", "localField": "docId", "foreignField": "_id", "as": "am-allowed"}},
23 |                          { "$match" : { "am-allowed": { "$ne": [] }}},
24 |                          { "$count" : "numdocs" }
25 |                        ])
26 | print(result)                       
27 | result = col.aggregate([
28 |                          { "$match" : { "createDate": { "$gt": start, "$lte": end }}},
29 |                          { "$sortByCount": "$state" }
30 |                        ])
31 | print(result)                       
32 | 
33 | client.close()


--------------------------------------------------------------------------------
/compat-tool/test/testlog.txt:
--------------------------------------------------------------------------------
1 | 2020-06-24T10:48:39.784-0500 I COMMAND  [conn1370] command social.people appName: "MongoDB Shell" command: find { find: "people", filter: { fname: "Cale" }, limit: 1.0, singleBatch: true, lsid: { id: UUID("66865bbd-1cd5-4efa-acd4-b1bc9ac63bde") }, $clusterTime: { clusterTime: Timestamp(1593013719, 116), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "social" } planSummary: COLLSCAN keysExamined:0 docsExamined:4 cursorExhausted:1 numYields:0 nreturned:1 reslen:293 locks:{ Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 1 } }, Collection: { acquireCount: { r: 1 } } } protocol:op_msg 0ms
2 | 2020-06-23T10:01:26.597-0500 I COMMAND  [conn13] command social.product appName: "MongoDB Shell" command: find { find: "product", filter: { qty: { $gt: 4.0 } }, lsid: { id: UUID("105a4d97-5d50-4ba4-97b3-31982f338d27") }, $clusterTime: { clusterTime: Timestamp(1592924486, 119), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "social" } planSummary: COLLSCAN keysExamined:0 docsExamined:4 cursorExhausted:1 numYields:0 nreturned:2 reslen:287 locks:{ Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 1 } }, Collection: { acquireCount: { r: 1 } } } protocol:op_msg 0ms
3 | 2020-06-23T10:01:26.608-0500 I COMMAND  [conn13] command social.product appName: "MongoDB Shell" command: find { find: "product", filter: { _id: { $in: [ "apples", "oranges" ] } }, lsid: { id: UUID("105a4d97-5d50-4ba4-97b3-31982f338d27") }, $clusterTime: { clusterTime: Timestamp(1592924486, 119), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "social" } planSummary: IXSCAN { _id: 1 } keysExamined:3 docsExamined:2 cursorExhausted:1 numYields:0 nreturned:2 reslen:319 locks:{ Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 1 } }, Collection: { acquireCount: { r: 1 } } } protocol:op_msg 0ms
4 | 2015-10-26T16:03:22.480+1100 I QUERY    [conn129292] query sample-db.contact query: { $query: { $and: [ { $or: [ { type: "CAR" }, { type: "Manager" } ] }, { terminated: false }, { dateTo: { $gt: "2015-10-26T05:03:22.269Z" } }, { dateFrom: { $lte: "2015-10-26T05:03:22.269Z" } } ] }, $orderby: { code: -1 } } planSummary: IXSCAN { code: 1 } ntoreturn:10 ntoskip:0 nscanned:40 nscannedObjects:40 keyUpdates:0 writeConflicts:0 numYields:3 nreturned:9 reslen:15647 locks:{ Global: { acquireCount: { r: 4 } }, Database: { acquireCount: { r: 4 } }, Collection: { acquireCount: { r: 4 } } } 106ms
5 | 2020-11-09T16:56:57.728+0000 I  COMMAND  [conn1] command test.testn appName: "MongoDB Shell" command: aggregate { aggregate: "testn", pipeline: [ { $match: { a: { $gte: 3.0 } } }, { $group: { _id: "a", sum: { $sum: "$y" } } } ], cursor: {}, lsid: { id: UUID("27fbb732-5c00-422e-8e0f-5546b6802541") }, $db: "test" } planSummary: COLLSCAN keysExamined:0 docsExamined:4 cursorExhausted:1 numYields:0 nreturned:1 queryHash:0D966491 planCacheKey:0D966491 reslen:131 locks:{ ReplicationStateTransition: { acquireCount: { w: 2 } }, Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } }, Mutex: { acquireCount: { r: 2 } } } storage:{} protocol:op_msg 0ms
6 | 2020-11-09T17:34:55.534+0000 I  COMMAND  [conn1] command test.testn appName: "MongoDB Shell" command: aggregate { aggregate: "testn", pipeline: [ { $facet: { xxx: [ { $bucketAuto: { groupBy: "$x", buckets: 4.0 } } ] } } ], cursor: {}, lsid: { id: UUID("27fbb732-5c00-422e-8e0f-5546b6802541") }, $db: "test" } planSummary: COLLSCAN keysExamined:0 docsExamined:4 cursorExhausted:1 numYields:0 nreturned:1 reslen:337 locks:{ ReplicationStateTransition: { acquireCount: { w: 2 } }, Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } }, Mutex: { acquireCount: { r: 2 } } } storage:{} protocol:op_msg 0ms
7 | 2020-11-09T20:28:41.673+0000 I  COMMAND  [conn1] command test.testn appName: "MongoDB Shell" command: aggregate { aggregate: "testn", pipeline: [ { $facet: { xxx: [ { $bucket: { groupBy: "$x", boundaries: [ 0.0, 1000.0 ] } } ] } } ], cursor: {}, lsid: { id: UUID("27fbb732-5c00-422e-8e0f-5546b6802541") }, $db: "test" } planSummary: COLLSCAN keysExamined:0 docsExamined:4 cursorExhausted:1 numYields:0 nreturned:1 reslen:149 locks:{ ReplicationStateTransition: { acquireCount: { w: 2 } }, Global: { acquireCount: { r: 2 } }, Database: { acquireCount: { r: 2 } }, Collection: { acquireCount: { r: 2 } }, Mutex: { acquireCount: { r: 2 } } } storage:{} protocol:op_msg 0ms
8 | 


--------------------------------------------------------------------------------
/global-clusters-automation/deploy/package_lambda.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 |    # Upload Lambda Code
 3 |    mkdir app
 4 | 
 5 |    cp failover_and_delete_lambda_function.py app/
 6 |    cp failover_and_delete_global_cluster.py app/
 7 |    cp route53_endpoint_management.py app/
 8 |    cp convert_to_global_lambda_function.py app/
 9 |    cp add_secondarycluster.py app/
10 |    cp failover_and_convert_lambda_function.py app/
11 |    cp failover_and_convert_to_global.py app/
12 |    cp requirements.txt app/
13 | 
14 |    sh ./deploy/package_lambda_failover.sh $1
15 |    sh ./deploy/package_lambda_convert.sh $1
16 |    sh ./deploy/package_lambda_failover_and_convert.sh $1
17 | 


--------------------------------------------------------------------------------
/global-clusters-automation/deploy/package_lambda_convert.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 |    echo 'Packaging and uploading global cluster automation code to S3'
 3 |    # Upload Lambda Code
 4 |    cd app 
 5 |    
 6 |    python3 -m venv convertToGlobalLambda
 7 |    source convertToGlobalLambda/bin/activate
 8 |    mv convert_to_global_lambda_function.py convertToGlobalLambda/lib/python*/site-packages/
 9 |    mv add_secondarycluster.py convertToGlobalLambda/lib/python*/site-packages/
10 |    cp requirements.txt convertToGlobalLambda/lib/python*/site-packages/
11 |    cd convertToGlobalLambda/lib/python*/site-packages/
12 |    pip install -r requirements.txt 
13 |    deactivate
14 |    mv ../dist-packages/* .
15 |    zip -r9 convertToGlobalLambdaFunction.zip .
16 |    aws s3 cp convertToGlobalLambdaFunction.zip s3://$1
17 |    cd ..
18 | 


--------------------------------------------------------------------------------
/global-clusters-automation/deploy/package_lambda_failover.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 |    echo 'Packaging and uploading global cluster automation code to S3'
 3 |    # Upload Lambda Code
 4 |    cd app
 5 | 
 6 |    python3 -m venv failoverLambda
 7 |    source failoverLambda/bin/activate
 8 |    mv failover_and_delete_lambda_function.py failoverLambda/lib/python*/site-packages/
 9 |    mv failover_and_delete_global_cluster.py failoverLambda/lib/python*/site-packages/
10 |    mv route53_endpoint_management.py failoverLambda/lib/python*/site-packages/
11 |    cp requirements.txt failoverLambda/lib/python*/site-packages/
12 |    cd failoverLambda/lib/python*/site-packages/
13 |    pip install -r requirements.txt 
14 |    deactivate
15 |    mv ../dist-packages/* .
16 |    zip -r9 failoverLambdaFunction.zip .
17 |    aws s3 cp failoverLambdaFunction.zip s3://$1
18 | 
19 |    cd ..
20 | 


--------------------------------------------------------------------------------
/global-clusters-automation/deploy/package_lambda_failover_and_convert.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 |    echo 'Packaging and uploading global cluster automation code to S3'
 3 |    # Upload Lambda Code
 4 |    cd app    
 5 |    python3 -m venv failoverAndConvertToGlobalLambda
 6 |    source failoverAndConvertToGlobalLambda/bin/activate
 7 |    mv failover_and_convert_lambda_function.py failoverAndConvertToGlobalLambda/lib/python*/site-packages/
 8 |    mv failover_and_convert_to_global.py failoverAndConvertToGlobalLambda/lib/python*/site-packages/
 9 |    cp requirements.txt failoverAndConvertToGlobalLambda/lib/python*/site-packages/
10 |    cd failoverAndConvertToGlobalLambda/lib/python*/site-packages/
11 |    pip install -r requirements.txt 
12 |    deactivate
13 |    mv ../dist-packages/* .
14 |    zip -r9 failoverAndConvertToGlobalLambda.zip .
15 |    aws s3 cp failoverAndConvertToGlobalLambda.zip s3://$1
16 |    cd ..


--------------------------------------------------------------------------------
/global-clusters-automation/failover_and_convert_to_global.py:
--------------------------------------------------------------------------------
  1 | import boto3
  2 | from botocore.exceptions import ClientError
  3 | from datetime import datetime
  4 | 
  5 | session = boto3.Session()
  6 | client = session.client('docdb')
  7 | now = datetime.now()
  8 | dt_string = now.strftime("%H%M%S")
  9 | 
 10 | 
 11 | # Retrieve all cluster members for the global cluster
 12 | def get_global_cluster_members(global_cluster_id):
 13 |     try:
 14 |         response = client.describe_global_clusters(
 15 |             GlobalClusterIdentifier=global_cluster_id
 16 |         )
 17 |         global_cluster_members = response['GlobalClusters'][0]['GlobalClusterMembers']
 18 |     except ClientError as e:
 19 |         print('ERROR OCCURRED WHILE PROCESSING: ', e)
 20 |         print('PROCESSING WILL STOP')
 21 |         raise ClientError
 22 |     return global_cluster_members
 23 | 
 24 | 
 25 | def prepare_to_convert(global_cluster_members, global_cluster_id, secondary_cluster_arn, io_optimized_storage, enable_performance_insights):
 26 |     try:
 27 |         # populate the list of clusters in the global cluster and remove the secondary cluster to be promoted from
 28 |         # the list
 29 |         regional_clusters = get_regional_clusters(global_cluster_members)
 30 |         for each_cluster in regional_clusters:
 31 |             if each_cluster == secondary_cluster_arn:
 32 |                 new_primary_cluster_arn = each_cluster
 33 |                 regional_clusters.remove(each_cluster)
 34 |                 break
 35 | 
 36 |         secondary_clusters = []
 37 |         for each_cluster in regional_clusters:
 38 |             cluster_details = get_cluster_details(each_cluster)
 39 |             if io_optimized_storage:
 40 |                 cluster_details["StorageType"] = "iopt1"
 41 |             secondary_clusters.append(cluster_details)
 42 | 
 43 |         convert_to_global_request = {
 44 |             "global_cluster_id": global_cluster_id,
 45 |             "primary_cluster_arn": new_primary_cluster_arn,
 46 |             "secondary_clusters": secondary_clusters,
 47 |             "io_optimized_storage": io_optimized_storage,
 48 |             "enable_performance_insights": enable_performance_insights
 49 |         }
 50 |     except ClientError as e:
 51 |         print('ERROR OCCURRED WHILE PROCESSING: ', e)
 52 |         print('PROCESSING WILL STOP')
 53 |         raise ClientError
 54 |     return convert_to_global_request
 55 | 
 56 | 
 57 | def get_regional_clusters(global_cluster_members):
 58 |     try:
 59 |         regional_clusters = []
 60 |         for each_item in global_cluster_members:
 61 | 
 62 |             if each_item['IsWriter']:
 63 |                 regional_clusters = each_item['Readers']
 64 |                 regional_clusters.append(each_item['DBClusterArn'])
 65 |                 break
 66 |             # Raise Error if no secondary clusters are available
 67 |             if len(regional_clusters) == 0:
 68 |                 print('No clusters found for provided global cluster',
 69 |                       '.Please check provided input.')
 70 |                 raise RuntimeError
 71 | 
 72 |     except ClientError as e:
 73 |         print('ERROR OCCURRED WHILE PROCESSING: ', e)
 74 |         print('PROCESSING WILL STOP')
 75 |         raise ClientError
 76 |     return regional_clusters
 77 | 
 78 | 
 79 | def get_cluster_details(cluster):
 80 |     try:
 81 |         cluster_id = cluster.split(":")[-1]
 82 |         region = cluster.split(":")[3]
 83 |         client = session.client('docdb', region_name=region)
 84 |         response = client.describe_db_clusters(
 85 |             DBClusterIdentifier=cluster
 86 |         )
 87 |         cluster_response = response['DBClusters'][0]
 88 | 
 89 |         vpc_group_ids = []
 90 |         for each_item in cluster_response['VpcSecurityGroups']:
 91 |             vpc_group_ids.append(each_item['VpcSecurityGroupId'])
 92 |         
 93 |         if "-flipped" in cluster_id:
 94 |             last_index = cluster_id.rfind("-")
 95 |             cluster_id = cluster_id[:last_index]
 96 |         else:
 97 |             cluster_id = cluster_id + "-flipped"
 98 | 
 99 |         cluster_details = {
100 |             # When converting the cluster to global cluster and adding clusters from the prior global
101 |             # cluster, we append the timestamp to keep the cluster ID unique. This is needed so that the
102 |             # function does not wait for the older clusters to be deleted. Also helps to differentiate
103 |             # between clusters created by script.
104 |             "secondary_cluster_id": cluster_id + "-" + dt_string,
105 |             "region": region,
106 |             "number_of_instances": len(cluster_response['DBClusterMembers']),
107 |             "subnet_group": cluster_response['DBSubnetGroup'],
108 |             "security_group_id": vpc_group_ids,
109 |             "backup_retention_period": cluster_response['BackupRetentionPeriod'],
110 |             "cluster_parameter_group": cluster_response['DBClusterParameterGroup'],
111 |             "preferred_back_up_window": cluster_response['PreferredBackupWindow'],
112 |             "preferred_maintenance_window": cluster_response['PreferredMaintenanceWindow'],
113 |             "storage_encryption": cluster_response['StorageEncrypted'],
114 |             "deletion_protection": cluster_response['DeletionProtection'],
115 |             "engine_version": cluster_response['EngineVersion']
116 |         }
117 |         # add KmsKeyId to cluster_details dictionary only if it exists in the deleted cluster
118 |         if 'KmsKeyId' in cluster_response:
119 |             cluster_details["kms_key_id"] = cluster_response['KmsKeyId']
120 |         return cluster_details
121 | 
122 |     except ClientError as e:
123 |         print('ERROR OCCURRED WHILE PROCESSING: ', e)
124 |         print('PROCESSING WILL STOP')
125 |         raise ClientError
126 | 


--------------------------------------------------------------------------------
/global-clusters-automation/images/GlobalClustersAutomation-BCP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/global-clusters-automation/images/GlobalClustersAutomation-BCP.png


--------------------------------------------------------------------------------
/global-clusters-automation/images/GlobalClustersAutomation-DR.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/global-clusters-automation/images/GlobalClustersAutomation-DR.png


--------------------------------------------------------------------------------
/global-clusters-automation/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.34.82
2 | botocore==1.34.82


--------------------------------------------------------------------------------
/global-clusters-automation/route53_endpoint_management.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | from botocore.exceptions import ClientError
 3 | 
 4 | session = boto3.Session()
 5 | client = session.client('route53')
 6 | 
 7 | 
 8 | def update_endpoint(zone_id, name, value):
 9 |     try:
10 |         client.change_resource_record_sets(
11 |             HostedZoneId=zone_id,
12 |             ChangeBatch={
13 |                 "Comment": "Switching endpoint",
14 |                 "Changes": [
15 |                     {
16 |                         "Action": "UPSERT",
17 |                         "ResourceRecordSet": {
18 |                             "Name": name,
19 |                             "Type": "CNAME",
20 |                             "TTL": 1,
21 |                             "ResourceRecords": [{"Value": value}]
22 |                         }
23 |                     }
24 |                 ]
25 |             }
26 |         )
27 |     except ClientError as e:
28 |         print('ERROR OCCURRED WHILE PROCESSING: ', e)
29 |         print('PROCESSING WILL STOP')
30 |         raise ClientError
31 | 
32 | 
33 | def manage_application_endpoint(hosted_zone_id, endpoint, cname):
34 |     try:
35 |         response = client.list_resource_record_sets(
36 |             HostedZoneId=hosted_zone_id
37 |         )
38 |         for record in response['ResourceRecordSets']:
39 |             record_name = ''
40 |             if record['Type'] == 'CNAME':
41 |                 record_name = record['Name']
42 |                 record_value = record['ResourceRecords'][0]
43 |                 if cname in record_name:
44 |                     # get record value by calling describe cluster
45 | 
46 |                     update_endpoint(hosted_zone_id, record_name, endpoint)
47 |                     print('Updated CNAME ', record_name, 'with record value', endpoint)
48 |                     break
49 | 
50 |     except ClientError as e:
51 |         print('ERROR OCCURRED WHILE PROCESSING: ', e)
52 |         print('PROCESSING WILL STOP')
53 |         raise ClientError
54 | 


--------------------------------------------------------------------------------
/global-clusters-automation/test/test_convert_to_global.py:
--------------------------------------------------------------------------------
 1 | from convert_to_global_lambda_function import lambda_handler
 2 | 
 3 | event ={
 4 |     "global_cluster_id":"global-2",
 5 |     "primary_cluster_arn":"arn:aws:rds:us-east-1:378282045186:cluster:demo",
 6 |     "secondary_clusters":[
 7 |         {
 8 |             "region":"us-west-2",
 9 |             "secondary_cluster_id":"cluster-1812222021-125454",
10 |             "number_of_instances":3,
11 |             "subnet_group":"default",
12 |             "security_group_id":[
13 |                 "sg-0817d8725e9edffda"
14 |             ],
15 |             "kms_key_id":"arn:aws:kms:us-west-2:378282045186:key/1ffd4692-238f-459e-9ced-5620bb8b426b",
16 |             "backup_retention_period":1,
17 |             "cluster_parameter_group":"default.docdb4.0",
18 |             "preferred_back_up_window":"10:25-10:55",
19 |             "preferred_maintenance_window":"wed:06:40-wed:07:10",
20 |             "storage_encryption":True,
21 |             "deletion_protection":False
22 |         }
23 |     ]
24 | }
25 | 
26 | lambda_handler(event,'')


--------------------------------------------------------------------------------
/global-clusters-automation/test/test_failover_and_convert.py:
--------------------------------------------------------------------------------
 1 | from failover_and_convert_lambda_function import lambda_handler
 2 | 
 3 | event = {
 4 |     "global_cluster_id": "global-7",
 5 |     "secondary_cluster_arn": "arn:aws:rds:us-east-2:378282045186:cluster:cluster-13",
 6 |     "primary_cluster_cname": "primary.sample.com",
 7 |     "hosted_zone_id": "Z00565841LXHQLXKDOHSB"
 8 | }
 9 | import time
10 | 
11 | start = time.time()
12 | lambda_handler(event, '')
13 | done = time.time()
14 | elapsed = done - start
15 | print("Total Time to execute the lambda function is", elapsed, "secs")
16 | 


--------------------------------------------------------------------------------
/global-clusters-automation/test/test_failover_and_delete.py:
--------------------------------------------------------------------------------
 1 | from failover_and_delete_lambda_function import lambda_handler
 2 | 
 3 | event = {
 4 |     "global_cluster_id": "global-demos",
 5 |     "secondary_cluster_arn": "arn:aws:rds:us-east-2:378282045186:cluster:cluster-5-165836",
 6 |     "primary_cluster_cname": "primary.sample.com",
 7 |     "hosted_zone_id": "Z00565841LXHQLXKDOHSB",
 8 |     "is_delete_global_cluster": True
 9 | }
10 | 
11 | lambda_handler(event,'')


--------------------------------------------------------------------------------
/index-tool/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3
2 | 
3 | COPY . .
4 | RUN pip install --no-cache-dir -r requirements.txt
5 | ENTRYPOINT [ "python", "migrationtools/documentdb_index_tool.py" ]
6 | CMD ["--help"]
7 | 


--------------------------------------------------------------------------------
/index-tool/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Index Tool
 2 | 
 3 | The Index Tool facilitates the migration of indexes metadata (excluding data) between document databases deployments.
 4 | 
 5 | Supported source: 
 6 |  - Amazon DocumentDB (any version)
 7 |  - MongoDB (2.x and later versions) standalone, replicaset or sharded cluster
 8 |  - Azure Cosmos DB
 9 | 
10 | Supported target: 
11 |  - Amazon DocumentDB (any version)
12 | 
13 | 
14 | ## Features
15 | 
16 | - Export indexes metadata from a running MongoDB or Amazon DocumentDB deployment
17 | - Checks for any unsupported indexes types or collections options with Amazon DocumentDB
18 | - Check index and collections options compatibility against a logical backup, taken with mongodump. The backup has to be uncompressed.
19 | - Restores supported indexes to Amazon DocumentDB (instance based or Elastic cluster)
20 | - Output is a json file, similar to mongodump format
21 | - Supports creation of 2dsphere indexes using the *--support-2dsphere* command line option
22 | 
23 | ## Requirements
24 | Python 3.7 or greater, Pymongo.
25 | 
26 | ## Installation
27 | Clone the repository and install the requirements:
28 | 
29 | ```
30 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
31 | cd amazon-documentdb-tools/index-tool
32 | python3 -m pip install -r requirements.txt
33 | ```
34 | 
35 | ## Usage/Examples
36 | The Index Tool accepts the following arguments:
37 | 
38 | ```
39 | --debug                      Output debugging information
40 | --dry-run                    Perform processing, but do not actually export or restore indexes
41 | --uri URI                    URI to connect to MongoDB or Amazon DocumentDB
42 | --dir DIR                    Specify the folder to export to or restore from (required)
43 | --show-compatible            Output all compatible indexes with Amazon DocumentDB (no change is applied)
44 | --show-issues                Output a report of compatibility issues found
45 | --dump-indexes               Perform index export from the specified server
46 | --restore-indexes            Restore indexes found in metadata to the specified server
47 | --skip-incompatible          Skip incompatible indexes when restoring metadata
48 | --support-2dsphere           Support 2dsphere indexes creation (collections must use GeoJSON Point type for indexing)
49 | --skip-python-version-check  Permit execution using Python 3.6 and prior
50 | --shorten-index-name         Shorten long index name to compatible length
51 | --skip-id-indexes            Do not create _id indexes
52 | ```
53 | 
54 | ### Export indexes from a MongoDB instance:
55 | ```
56 | python3 migrationtools/documentdb_index_tool.py --dump-indexes --dir mongodb_index_export --uri 'mongodb://localhost:27017' 
57 | ```
58 | 
59 | ### Export indexes from an Amazon DocumentDB cluster
60 | ```
61 | python3 migrationtools/documentdb_index_tool.py --dump-indexes --dir docdb_index_export --uri 'mongodb://user:password@mydocdb.cluster-cdtjj00yfi95.eu-west-2.docdb.amazonaws.com:27017/?tls=true&tlsCAFile=rds-combined-ca-bundle.pem&replicaSet=rs0&retryWrites=false' 
62 | ```
63 | 
64 | ### Check compatibility with Amazon DocumentDB against exported index metadata
65 | ```
66 | python3 migrationtools/documentdb_index_tool.py --show-issues --dir mongodb_index_export
67 | ```
68 | 
69 | ### Restore compatible indexes to Amazon DocumentDB
70 | ```
71 | python3 migrationtools/documentdb_index_tool.py --restore-indexes --skip-incompatible --dir mongodb_index_export --uri 'mongodb://user:password@mydocdb.cluster-cdtjj00yfi95.eu-west-2.docdb.amazonaws.com:27017/?tls=true&tlsCAFile=rds-combined-ca-bundle.pem&replicaSet=rs0&retryWrites=false' 
72 | ```
73 | 
74 | ## License
75 | [Apache 2.0](http://www.apache.org/licenses/LICENSE-2.0)
76 | 
77 | ## Contributing
78 | Contributions are always welcome! See the [contributing](https://github.com/awslabs/amazon-documentdb-tools/blob/master/CONTRIBUTING.md) page for ways to get involved.
79 | 


--------------------------------------------------------------------------------
/index-tool/migrationtools/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright 2019  Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 |   
 4 |   Licensed under the Apache License, Version 2.0 (the "License").
 5 |   You may not use this file except in compliance with the License.
 6 |   A copy of the License is located at
 7 |   
 8 |       http://www.apache.org/licenses/LICENSE-2.0
 9 |   
10 |   or in the "license" file accompanying this file. This file is distributed 
11 |   on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
12 |   express or implied. See the License for the specific language governing 
13 |   permissions and limitations under the License.
14 | """
15 | 
16 | from documentdb_index_tool import DocumentDbIndexTool
17 | 


--------------------------------------------------------------------------------
/index-tool/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo >= 3.12.1
2 | 


--------------------------------------------------------------------------------
/index-tool/test/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 |  Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 3 |   
 4 |   Licensed under the Apache License, Version 2.0 (the "License").
 5 |   You may not use this file except in compliance with the License.
 6 |   A copy of the License is located at
 7 |   
 8 |       http://www.apache.org/licenses/LICENSE-2.0
 9 |   
10 |   or in the "license" file accompanying this file. This file is distributed 
11 |   on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
12 |   express or implied. See the License for the specific language governing 
13 |   permissions and limitations under the License.
14 | """
15 | 


--------------------------------------------------------------------------------
/index-tool/test/fixtures/metadata/capped_collection/capped_collection.bson:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/index-tool/test/fixtures/metadata/capped_collection/capped_collection.bson


--------------------------------------------------------------------------------
/index-tool/test/fixtures/metadata/capped_collection/capped_collection.metadata.json:
--------------------------------------------------------------------------------
1 | {"options":{"capped":true,"size":10240,"max":500},"indexes":[{"v":2,"key":{"_id":1},"name":"_id_","ns":"foo_db.foo_col"}]}
2 | 


--------------------------------------------------------------------------------
/index-tool/test/fixtures/metadata/geo_index/geo_indexed_col.metadata.json:
--------------------------------------------------------------------------------
1 | {"options":{},"indexes":[{"v":2,"key":{"_id":1},"name":"_id_","ns":"foo_db.foo_col"},{"v":2,"key":{"myloc":"2dsphere","category":-1.0,"name":1.0},"name":"myloc_2dsphere_category_-1_name_1","ns":"foo_db.foo_col","2dsphereIndexVersion":3},{"v":2,"key":{"loc2":"2d"},"name":"loc2_2d","ns":"foo_db.foo_col"}]}
2 | 


--------------------------------------------------------------------------------
/index-tool/test/fixtures/metadata/storage_engine/storage_engine.metadata.json:
--------------------------------------------------------------------------------
 1 | {  
 2 |     "options":{  
 3 | 
 4 |     },
 5 |     "indexes":[  
 6 |         {  
 7 |             "v":2,
 8 |             "key":{  
 9 |                 "_id":1
10 |             },
11 |             "name":"_id_",
12 |             "ns":"foo_db.foo_col"
13 |         },
14 |         {  
15 |             "v":2,
16 |             "key":{  
17 |                 "engine_field":1.0
18 |             },
19 |             "name":"engine_field_1",
20 |             "ns":"foo_db.foo_col",
21 |             "storageEngine":{  
22 |                 "wiredTiger":{  
23 |                     "configString":"checksum=off"
24 |                 }
25 |             }
26 |         }
27 |     ]
28 | }
29 | 


--------------------------------------------------------------------------------
/index-tool/test/test1.bash:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | 
3 | python3 ../migrationtools/documentdb_index_tool.py --restore-indexes --dry-run --dir test1 | sed -n '1d;p' | cut -c 26- | diff - test1.expects
4 | 


--------------------------------------------------------------------------------
/index-tool/test/test1.expects:
--------------------------------------------------------------------------------
 1 | (dry run) idxtest.tmc: would attempt to add index: _id_
 2 |   (dry run) index options: OrderedDict([('name', '_id_'), ('ns', 'idxtest.tmc')])
 3 |   (dry run) index keys: [('_id', 1)]
 4 | (dry run) idxtest.tmc: would attempt to add index: one_two
 5 |   (dry run) index options: OrderedDict([('name', 'one_two'), ('ns', 'idxtest.tmc')])
 6 |   (dry run) index keys: [('one', 1), ('two', 1)]
 7 | (dry run) idxtest.tmc: would attempt to add index: five_three_four
 8 |   (dry run) index options: OrderedDict([('name', 'five_three_four'), ('ns', 'idxtest.tmc')])
 9 |   (dry run) index keys: [('five', 1), ('three', -1), ('four', 1)]
10 | (dry run) idxtest.tmc: would attempt to add index: five_four
11 |   (dry run) index options: OrderedDict([('name', 'five_four'), ('ns', 'idxtest.tmc')])
12 |   (dry run) index keys: [('five', 1), ('four', 1)]
13 | (dry run) idxtest.tmc: would attempt to add index: two_one_four
14 |   (dry run) index options: OrderedDict([('name', 'two_one_four'), ('ns', 'idxtest.tmc')])
15 |   (dry run) index keys: [('two', -1), ('one', 1), ('four', 1)]
16 | (dry run) idxtest.tmc: would attempt to add index: five_three_four_one_two
17 |   (dry run) index options: OrderedDict([('name', 'five_three_four_one_two'), ('ns', 'idxtest.tmc')])
18 |   (dry run) index keys: [('five', 1), ('three', -1), ('four', 1), ('one', 1), ('two', 1)]
19 | 


--------------------------------------------------------------------------------
/index-tool/test/test1/idxtest/tmc.metadata.json:
--------------------------------------------------------------------------------
1 | {"options":{},"indexes":[{"v":2,"key":{"_id":1},"name":"_id_","ns":"idxtest.tmc"},{"v":2,"key":{"one":1,"two":1},"name":"one_two","ns":"idxtest.tmc"},{"v":2,"key":{"five":1,"three":-1,"four":1},"name":"five_three_four","ns":"idxtest.tmc"},{"v":2,"key":{"five":1,"four":1},"name":"five_four","ns":"idxtest.tmc"},{"v":2,"key":{"two":-1,"one":1,"four":1},"name":"two_one_four","ns":"idxtest.tmc"},{"v":2,"key":{"five":1,"three":-1,"four":1,"one":1,"two":1},"name":"five_three_four_one_two","ns":"idxtest.tmc"}]}


--------------------------------------------------------------------------------
/index-tool/test/test_compatibility.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright <YEAR> Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 |   
  4 |   Licensed under the Apache License, Version 2.0 (the "License").
  5 |   You may not use this file except in compliance with the License.
  6 |   A copy of the License is located at
  7 |   
  8 |       http://www.apache.org/licenses/LICENSE-2.0
  9 |   
 10 |   or in the "license" file accompanying this file. This file is distributed 
 11 |   on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 
 12 |   express or implied. See the License for the specific language governing 
 13 |   permissions and limitations under the License.
 14 | """
 15 | 
 16 | import unittest
 17 | from argparse import Namespace
 18 | from migrationtools import DocumentDbIndexTool
 19 | 
 20 | 
 21 | class BaseTestCase(unittest.TestCase):
 22 |     """
 23 |     base test case class that enables debug logs
 24 |     """
 25 | 
 26 |     def setUp(self):
 27 |         args = Namespace()
 28 |         args.debug = True
 29 |         self.index_tool = DocumentDbIndexTool(args)
 30 | 
 31 | 
 32 | class TestCompatibilityIssues(BaseTestCase):
 33 |     """
 34 |     test case to check incompatible issues
 35 |     """
 36 | 
 37 |     def test_incompatible_collection(self):
 38 |         """
 39 |         tests unsupported collection types
 40 |         """
 41 |         metadata = self.index_tool.get_metadata(
 42 |             'test/fixtures/metadata/capped_collection')
 43 |         compatibility_issues = self.index_tool.find_compatibility_issues(
 44 |             metadata)
 45 |         expected_compatibility_issues = {
 46 |             'foo_db': {
 47 |                 'foo_col': {
 48 |                     'unsupported_collection_options': ['capped']
 49 |                 }
 50 |             }
 51 |         }
 52 |         self.assertDictEqual(compatibility_issues, \
 53 |                             expected_compatibility_issues, \
 54 |                              "Compatibility issues should've matched")
 55 | 
 56 |     def test_incompatible_index_type(self):
 57 |         """
 58 |         tests unsupported index types
 59 |         """
 60 |         metadata = self.index_tool.get_metadata(
 61 |             'test/fixtures/metadata/geo_index')
 62 |         compatibility_issues = self.index_tool.find_compatibility_issues(
 63 |             metadata)
 64 |         expected_compatibility_issues = {
 65 |             'foo_db': {
 66 |                 'foo_col': {
 67 |                     'loc2_2d': {
 68 |                         'unsupported_index_types': '2d'
 69 |                     },
 70 |                     'myloc_2dsphere_category_-1_name_1': {
 71 |                         'unsupported_index_types': '2dsphere'
 72 |                     }
 73 |                 }
 74 |             }
 75 |         }
 76 |         self.assertDictEqual(compatibility_issues, \
 77 |                              expected_compatibility_issues, \
 78 |                              "Compatibility issues should've matched")
 79 | 
 80 |     def test_incompatible_index_options(self):
 81 |         """
 82 |         tests unsupported index options
 83 |         """
 84 |         metadata = self.index_tool.get_metadata(
 85 |             'test/fixtures/metadata/storage_engine')
 86 |         compatibility_issues = self.index_tool.find_compatibility_issues(
 87 |             metadata)
 88 |         expected_compatibility_issues = {
 89 |             'foo_db': {
 90 |                 'foo_col': {
 91 |                     'engine_field_1': {
 92 |                         'unsupported_index_options': ['storageEngine']
 93 |                     }
 94 |                 }
 95 |             }
 96 |         }
 97 |         self.assertDictEqual(compatibility_issues, \
 98 |                              expected_compatibility_issues, \
 99 |                              "Compatibility issues should've matched")
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     unittest.main()
104 | 


--------------------------------------------------------------------------------
/migration/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Migration Tools
 2 | 
 3 | * [cosmos-db-migration-utility](./cosmos-db-migration-utility) - migrate from Cosmos DB to Amazon DocumentDB.
 4 | * [couchbase-migration-utility](./cosmos-db-migration-utility) - migrate from Couchbase to Amazon DocumentDB.
 5 | * [data-differ](./data-differ) - compare documents between two databases or collections.
 6 | * [dms-segments](./dms-segments) - calculate segments for Amazon DMS full load segmentation.
 7 | * [export-users](./export-users) - export users from MongoDB or Amazon DocumentDB.
 8 | * [json-import](./json-import) - high speed concurrent JSON data loader.
 9 | * [migrator](./migrator) - high speed concurrent full load and change data capture for online migrations.
10 | * [mongodb-changestream-review](./mongodb-changestream-review) - scan the changestream to determine the collection level insert/update/delete rates.
11 | * [mongodb-oplog-review](./mongodb-oplog-review) - scan the oplog to determine the collection level insert/update/delete rates.
12 | * [mongodb-ops](./mongodb-ops) - extract collection level query/insert/update/delete MongoDB counters to estimate workload for migrations.
13 | * [mvu-tool](./mvu-tool) - live migration tool to assist in providing near zero-downtime major version upgrades.
14 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/.gitignore:
--------------------------------------------------------------------------------
  1 | private/
  2 | 
  3 | README.pdf
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | .idea
 10 | 
 11 | .DS_Store
 12 | .vscode/
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | cover/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | target/
 84 | 
 85 | # Jupyter Notebook
 86 | .ipynb_checkpoints
 87 | 
 88 | # IPython
 89 | profile_default/
 90 | ipython_config.py
 91 | 
 92 | # pyenv
 93 | #   For a library or package, you might want to ignore these files since the code is
 94 | #   intended to run in multiple environments; otherwise, check them in:
 95 | # .python-version
 96 | 
 97 | # pipenv
 98 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 99 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
100 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
101 | #   install all needed dependencies.
102 | #Pipfile.lock
103 | 
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 | 
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 | 
111 | # SageMath parsed files
112 | *.sage.py
113 | 
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 | 
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 | 
127 | # Rope project settings
128 | .ropeproject
129 | 
130 | # mkdocs documentation
131 | /site
132 | 
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 | 
138 | # Pyre type checker
139 | .pyre/
140 | 
141 | # pytype static type analyzer
142 | .pytype/
143 | 
144 | .terraform/
145 | *.tfstate
146 | *.backup
147 | 
148 | test.py
149 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/architecture/architecture-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/architecture/architecture-diagram.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/cloud-trail-log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/cloud-trail-log.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/cloud-watch-log-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/cloud-watch-log-group.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/core-resources-create-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/core-resources-create-stack.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/core-resources-review-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/core-resources-review-stack.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/core-resources-stack-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/core-resources-stack-details.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/core-resources-stack-status.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/core-resources-stack-status.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/documentdb-connection-string.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/documentdb-connection-string.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/documentdb-resources-create-stack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/documentdb-resources-create-stack.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/documentdb-resources-stack-details.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/documentdb-resources-stack-details.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/documentdb-resources-stack-status.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/documentdb-resources-stack-status.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/ec2-instance-ami.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/ec2-instance-ami.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/ec2-instance-review.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/ec2-instance-review.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/ec2-instance-review3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/ec2-instance-review3.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/docs/images/s3-bucket-with-lambda-functions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/docs/images/s3-bucket-with-lambda-functions.png


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/lib/lambda/lambda-pack-pymongo.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/cosmos-db-migration-utility/lib/lambda/lambda-pack-pymongo.zip


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/scripts/build-package.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | pyclean () {
 3 |     find . -type f -name "*.py[co]" -delete
 4 |     find . -type d -name "__pycache__" -delete
 5 |     find . -type f -name "*.log" -delete
 6 |     find . -type f -name ".DS_Store" -delete
 7 | }
 8 | 
 9 | normalDir="`cd "${dirToNormalize}";pwd`"
10 | 
11 | SCRIPTS_DIR=$(cd `dirname $BASH_SOURCE`; pwd)
12 | BUILD_DIR="${SCRIPTS_DIR}/../build"
13 | SOURCE_DIR="${SCRIPTS_DIR}/../src"
14 | 
15 | echo "DEBUG: SCRIPTS_DIR: ${SCRIPTS_DIR}"
16 | echo "DEBUG:   BUILD_DIR: ${BUILD_DIR}"
17 | echo "DEBUG:  SOURCE_DIR: ${SOURCE_DIR}"
18 | 
19 | echo "Cleaning up build directory: ${BUILD_DIR}"
20 | rm -rf "${BUILD_DIR}"
21 | mkdir -p ${BUILD_DIR}/{lambda,cloudformation}
22 | 
23 | echo "Building app-request-reader"
24 | cd ${SOURCE_DIR}/lambda/app-request-reader
25 | pyclean
26 | zip "${BUILD_DIR}/lambda/app-request-reader.zip" *.py >/dev/null 2>&1
27 | 
28 | echo "Building gap-watch-request-reader"
29 | cd ${SOURCE_DIR}/lambda/gap-watch-request-reader
30 | pyclean
31 | zip "${BUILD_DIR}/lambda/gap-watch-request-reader.zip" *.py >/dev/null 2>&1
32 | 
33 | echo "Building batch-request-reader"
34 | cd ${SOURCE_DIR}/lambda/batch-request-reader
35 | pyclean
36 | # include the py and pem files
37 | zip ${BUILD_DIR}/lambda/batch-request-reader.zip *.p* >/dev/null 2>&1
38 | 
39 | echo "Copying migrator-app"
40 | cd ${SOURCE_DIR}/migrator-app
41 | pyclean
42 | cp -RL . ${BUILD_DIR}/migrator-app 
43 | 
44 | echo "Copying configure app"
45 | cd ${SOURCE_DIR}/configure
46 | pyclean
47 | cp -RL . ${BUILD_DIR}/configure
48 | 
49 | echo "Copying CloudFormation templates"
50 | cd ${SOURCE_DIR}/cloudformation
51 | cp *.yaml ${BUILD_DIR}/cloudformation/
52 | 
53 | echo "Copying lambda packs"
54 | cd "${SCRIPTS_DIR}/../lib/lambda"
55 | cp *.zip ${BUILD_DIR}/lambda/
56 | 
57 | echo "Creating a package: cosmosdb-migrator.tgz"
58 | cd ${BUILD_DIR}
59 | tar -czf cosmosdb-migrator.tgz * >/dev/null 2>&1
60 | rm -rf migrator-app/ cloudformation/ configure/ lambda/


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/application.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import json
 3 | from time import sleep
 4 | from common.logger import get_logger
 5 | from boto3.dynamodb.conditions import Key
 6 | from json_encoder import JSONFriendlyEncoder
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | class Application:
11 | 
12 |     def __init__(self, cluster_name):
13 |         self.__cluster_name = cluster_name
14 | 
15 |     def __update_secret_value(self, key, value):
16 |         client = boto3.client('secretsmanager')
17 |         try:
18 |             response = client.create_secret(
19 |                     Name=key,
20 |                     SecretString=value
21 |                 )
22 |             return response
23 |         except Exception as e:
24 |             if "ResourceExistsException" in str(e):
25 |                 response = client.update_secret(
26 |                     SecretId=key,
27 |                     SecretString=value
28 |                 )
29 |                 return response
30 |             else:
31 |                 raise
32 | 
33 |     def set_connection_string(self, connection_string):
34 |         logger.info("Setting the connection string for the cluster_name: %s.", self.__cluster_name)
35 |         self.__update_secret_value("migrator-app/{}".format(self.__cluster_name), connection_string)
36 |         logger.info("Successfully completed setting the connection string for the cluster_name: %s. Connection string: %s", self.__cluster_name, connection_string)
37 |         self.set_event_writer("stop")
38 | 
39 |     def set_event_writer(self, status):
40 |         logger.info("Setting the event writer status as %s", status)
41 |         payload = {"cluster_name": self.__cluster_name, "component":"event_writer", "operation": status}
42 |         self.__send_message("app-request-queue", payload)
43 |         # TODO: What aobut the gap-watcher. It should be in configure
44 |         # not in the migrator app
45 |         logger.info("Successfully completed setting the event writer status as %s", status)
46 | 
47 |     def __send_message(self, queue_name, payload):
48 |         data = json.dumps(payload)
49 |         logger.info("Starting to send SQS requests to queue: %s. Payload: %s", queue_name, data)
50 |         sqs_client = boto3.client('sqs')
51 |         queue = sqs_client.get_queue_url(QueueName=queue_name)
52 |         response = sqs_client.send_message(
53 |             QueueUrl= queue['QueueUrl'], MessageBody=data)
54 |         logger.info("Successfully completed sending SQS requests to queue: %s. Response: %s",
55 |             queue_name, response)
56 | 
57 |     def print_status(self):
58 |         client = boto3.resource('dynamodb')
59 |         response = client.Table("migration_status").get_item(Key={
60 |             "cluster_name": self.__cluster_name })
61 |         logger.debug("Successfully completed getting the migration_status for the cluster_name: %s.", self.__cluster_name)
62 |         if "Item" in response:
63 |             response["Item"]["details"] = json.loads(response["Item"]["details"])
64 |             logger.info("Status: %s", json.dumps(response["Item"], cls=JSONFriendlyEncoder, indent=1))
65 |         else:
66 |             logger.info("Status: Not available yet. Did you start the migration?")
67 |     
68 |     def watch_status(self):
69 |         while True:
70 |             self.print_status()
71 |             sleep(5)


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/commandline_parser.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from common.logger import get_logger
 4 | from common.application_exception import ApplicationException
 5 | 
 6 | logger = get_logger(__name__)
 7 | 
 8 | class  CommandLineParser():
 9 | 
10 |   def get_options(self):
11 |     config = self.__validate_arguments()
12 |     return config
13 |   
14 |   def __get_parser(self):
15 |     parser = argparse.ArgumentParser(description='Setup or configure the AWS services.')
16 |     parser.add_argument('--cluster-name', '-n', help='Identifies the name of the cluster being migrated', required=True)
17 |     parser.add_argument('--connection-string', '-c', help='Sets the connection string for the DocumentDB cluster.')
18 |     parser.add_argument('--event-writer', '-e', help='Sets the status of the event writer. Values: stop or start.')
19 |     parser.add_argument('--status', '-s', help='Displays the migration status and time gap details.', action='store_true')
20 |     parser.add_argument('--watch-status', '-w', help='Watch the migration status and time gap details in a loop.', action='store_true')
21 |     return parser
22 |   
23 | 
24 |   def __validate_arguments(self):
25 |     parser = self.__get_parser()
26 |     config = vars(parser.parse_args())
27 |     logger.info("Command line arguments given: " + json.dumps(config))
28 |     
29 |     # Verify necessary components are supplied in command line arguments
30 |     command = []
31 |     if config["connection_string"]:
32 |       command.append("connection_string")
33 |     if config["status"]:
34 |       command.append("status")
35 |     if config["watch_status"]:
36 |       command.append("watch_status")
37 |     if not config["event_writer"] is None:
38 |       command.append("event_writer")
39 | 
40 |     if len(command) == 0:
41 |       raise ApplicationException("Missing input argument for command. Specify --connection-string or --event-writer.")
42 |     if len(command) > 1:
43 |       raise ApplicationException("Please specify only one of the commands: --connection-string, --event_writer, --status or --watch-status arguments.")
44 | 
45 |     config["command"] = command[0]
46 |     logger.info("Validated Command line arguments are: " + json.dumps(config))
47 |     
48 |     if config["command"] == "event_writer":
49 |       if config["event_writer"] != "start" and config["event_writer"] != "stop":
50 |         raise ApplicationException("Given value for event-writer is not valid: {}. Valid values are stop or start".format(config["event_writer"]))
51 |     elif config["command"] == "connection_string":
52 |       if config["connection_string"] == "" or  config["connection_string"] == None:
53 |         raise ApplicationException("Given value for connection-string is not valid: [{}].".format(config["connection_string"]))
54 |     return config


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/common/application_exception.py:
--------------------------------------------------------------------------------
1 | ../../migrator-app/common/application_exception.py


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/common/logger.py:
--------------------------------------------------------------------------------
1 | ../../migrator-app/common/logger.py


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/json_encoder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from datetime import datetime
 4 | from decimal import Decimal
 5 | import json
 6 | 
 7 | class JSONFriendlyEncoder(json.JSONEncoder):
 8 |     def default(self, o):
 9 |         if isinstance(o, Decimal):
10 |             if o % 1 > 0:
11 |                 return float(o)
12 |             else:
13 |                 return int(o)
14 |         if isinstance(o, datetime):
15 |             return o.isoformat()
16 |         return super(JSONFriendlyEncoder, self).default(o)


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/main.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from application import Application
 3 | from common.logger import get_logger
 4 | from commandline_parser import CommandLineParser
 5 | from common.application_exception import ApplicationException
 6 | from signal import signal, SIGINT
 7 | from sys import exit
 8 | import os
 9 | 
10 | logger = get_logger(__name__)
11 | 
12 | def exit_handler(signal_received, frame):
13 |     # Handle any cleanup here
14 |     print('SIGINT or CTRL-C detected. Exiting gracefully')
15 |     exit(0)
16 | 
17 | def check_environment_variables(variables):
18 |     for variable in variables:
19 |         if variable not in os.environ:
20 |             logger.fatal("Environment variable %s is required but not set.", variable)
21 |             logger.error("The following environment variables are required: %s", json.dumps(variables, indent=2))
22 |             exit(1)
23 | 
24 | # Tell Python to run the handler() function when SIGINT is recieved
25 | signal(SIGINT, exit_handler)
26 | 
27 | try:
28 |     # check if the required environment variables are set or not
29 |     names = ["AWS_DEFAULT_REGION", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"]
30 |     check_environment_variables(names)
31 |     
32 |     parser = CommandLineParser()
33 |     commandline_options = parser.get_options()
34 | except ApplicationException as ae:
35 |     logger.error("%s", ae)
36 |     exit(1)
37 | except Exception as e:
38 |     logger.error("Exception occurred while processing the request", exc_info=True)
39 |     exit(1)
40 | 
41 | logger.info("Starting to configure application components with commandline_options: %s", json.dumps(commandline_options))
42 | app = Application(commandline_options["cluster_name"])
43 | if commandline_options["command"] == "connection_string":
44 |     app.set_connection_string(commandline_options["connection_string"])
45 | elif commandline_options["command"] == "event_writer":
46 |     app.set_event_writer(commandline_options["event_writer"])
47 | elif commandline_options["command"] == "status":
48 |     app.print_status()
49 | elif commandline_options["command"] == "watch_status":
50 |     app.watch_status()
51 | logger.info("Successfully completed configuring the application components.")


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/configure/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.12.17
2 | argparse==1.4.0


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/lambda/app-request-reader/sample_request_start.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Records": [
 3 |       {
 4 |         "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78",
 5 |         "receiptHandle": "MessageReceiptHandle",
 6 |         "body": "{\"cluster_name\":\"app-name\", \"component\":\"event_writer\", \"operation\":\"start\"}",
 7 |         "attributes": {
 8 |           "ApproximateReceiveCount": "1",
 9 |           "SentTimestamp": "1523232000000",
10 |           "SenderId": "123456789012",
11 |           "ApproximateFirstReceiveTimestamp": "1523232000001"
12 |         },
13 |         "messageAttributes": {},
14 |         "md5OfBody": "7b270e59b47ff90a553787216d55d91d",
15 |         "eventSource": "aws:sqs",
16 |         "eventSourceARN": "arn:aws:sqs:us-east-2:123456789012:MyQueue",
17 |         "awsRegion": "us-east-2"
18 |       }
19 |     ]
20 |   }


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/lambda/app-request-reader/sample_request_stop.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Records": [
 3 |       {
 4 |         "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78",
 5 |         "receiptHandle": "MessageReceiptHandle",
 6 |         "body": "{\"cluster_name\":\"app-name\", \"component\":\"event_writer\", \"operation\":\"stop\"}",
 7 |         "attributes": {
 8 |           "ApproximateReceiveCount": "1",
 9 |           "SentTimestamp": "1523232000000",
10 |           "SenderId": "123456789012",
11 |           "ApproximateFirstReceiveTimestamp": "1523232000001"
12 |         },
13 |         "messageAttributes": {},
14 |         "md5OfBody": "7b270e59b47ff90a553787216d55d91d",
15 |         "eventSource": "aws:sqs",
16 |         "eventSourceARN": "arn:aws:sqs:us-east-2:123456789012:MyQueue",
17 |         "awsRegion": "us-east-2"
18 |       }
19 |     ]
20 |   }


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/lambda/batch-request-reader/rds-combined-ca-bundle.pem:
--------------------------------------------------------------------------------
1 | ../../configure/rds-combined-ca-bundle.pem


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/lambda/batch-request-reader/sample_request.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Records": [
 3 |       {
 4 |         "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78",
 5 |         "receiptHandle": "MessageReceiptHandle",
 6 |         "body": "{\"cluster_name\": \"app-name\", \"namespace\": \"social.people\"}",
 7 |         "attributes": {
 8 |           "ApproximateReceiveCount": "1",
 9 |           "SentTimestamp": "1523232000000",
10 |           "SenderId": "123456789012",
11 |           "ApproximateFirstReceiveTimestamp": "1523232000001"
12 |         },
13 |         "messageAttributes": {},
14 |         "md5OfBody": "7b270e59b47ff90a553787216d55d91d",
15 |         "eventSource": "aws:sqs",
16 |         "eventSourceARN": "arn:aws:sqs:us-east-2:123456789012:MyQueue",
17 |         "awsRegion": "us-east-2"
18 |       }
19 |     ]
20 |   }


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/lambda/gap-watch-request-reader/sample_request.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Records": [
 3 |       {
 4 |         "messageId": "19dd0b57-b21e-4ac1-bd88-01bbb068cb78",
 5 |         "receiptHandle": "MessageReceiptHandle",
 6 |         "body": "{\"cluster_name\": \"app-name\"}",
 7 |         "attributes": {
 8 |           "ApproximateReceiveCount": "1",
 9 |           "SentTimestamp": "1523232000000",
10 |           "SenderId": "123456789012",
11 |           "ApproximateFirstReceiveTimestamp": "1523232000001"
12 |         },
13 |         "messageAttributes": {},
14 |         "md5OfBody": "7b270e59b47ff90a553787216d55d91d",
15 |         "eventSource": "aws:sqs",
16 |         "eventSourceARN": "arn:aws:sqs:us-east-2:123456789012:MyQueue",
17 |         "awsRegion": "us-east-2"
18 |       }
19 |     ]
20 |   }


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/commandline_parser.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import json
 3 | from common.logger import get_logger
 4 | from common.application_exception import ApplicationException
 5 | 
 6 | logger = get_logger(__name__)
 7 | 
 8 | class  CommandLineParser():
 9 | 
10 |   def get_options(self):
11 |     config = self.__validate_arguments()
12 |     return config
13 |   
14 |   def __get_parser(self):
15 |     parser = argparse.ArgumentParser(description='Start watching the change events on Cosmos Cluster')
16 |     parser.add_argument('--cluster-name', '-n', help='Identifies the name of the cluster being migrated', required=True)
17 |     return parser
18 |   
19 | 
20 |   def __validate_arguments(self):
21 |     parser = self.__get_parser()
22 |     config = vars(parser.parse_args())
23 |     logger.info("Command line arguments given: " + json.dumps(config))
24 |     return config


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/common/Singleton.py:
--------------------------------------------------------------------------------
1 | class Singleton(type):
2 |     _instances = {}
3 | 
4 |     def __call__(cls, *args, **kwargs):
5 |         if cls not in cls._instances:
6 |             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
7 |         return cls._instances[cls]
8 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/common/__init__.py:
--------------------------------------------------------------------------------
1 | from .Singleton import Singleton
2 | from .logger import get_logger
3 | from .timer import RepeatedTimer
4 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/common/application_exception.py:
--------------------------------------------------------------------------------
1 | # define Python user-defined exceptions
2 | class ApplicationException(Exception):
3 |    """Base class for other exceptions"""
4 |    pass
5 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/common/json_encoder.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | from datetime import datetime
 4 | from decimal import Decimal
 5 | import json
 6 | 
 7 | class JSONFriendlyEncoder(json.JSONEncoder):
 8 |     def default(self, o):
 9 |         if isinstance(o, Decimal):
10 |             if o % 1 > 0:
11 |                 return float(o)
12 |             else:
13 |                 return int(o)
14 |         if isinstance(o, datetime):
15 |             return o.isoformat()
16 |         return super(JSONFriendlyEncoder, self).default(o)


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/common/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name):
 5 |     logging.basicConfig(
 6 |         filename='console.log',
 7 |         format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]  %(message)s',
 8 |         level=logging.DEBUG, 
 9 |         datefmt='%Y-%m-%d %H:%M:%S')
10 |     logger = logging.getLogger(name)
11 |     logger.addHandler(__getConsoleHandler())
12 |     return logger
13 | 
14 | def __getConsoleHandler():
15 |     # create console handler with a higher log level
16 |     consoleHandler = logging.StreamHandler()
17 |     consoleHandler.setLevel(logging.INFO)
18 |     formatter = logging.Formatter(
19 |         fmt='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d]  %(message)s',
20 |         datefmt='%Y-%m-%d %H:%M:%S')
21 |     consoleHandler.setFormatter(formatter)
22 |     return consoleHandler
23 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/common/timer.py:
--------------------------------------------------------------------------------
 1 | from threading import Timer
 2 | 
 3 | 
 4 | class RepeatedTimer(object):
 5 |     def __init__(self, interval, function, *args, **kwargs):
 6 |         self._timer = None
 7 |         self.interval = interval
 8 |         self.function = function
 9 |         self.args = args
10 |         self.kwargs = kwargs
11 |         self.is_running = False
12 |         self.is_cancelled = False
13 | 
14 |     def _run(self):
15 |         self.is_running = False
16 |         self.function(*self.args, **self.kwargs)
17 |         self.start()
18 | 
19 |     def start(self):
20 |         if not self.is_running and not self.is_cancelled:
21 |             self._timer = Timer(self.interval, self._run)
22 |             self._timer.start()
23 |             self.is_running = True
24 | 
25 |     def stop(self):
26 |         self.is_running = False
27 |         self.is_cancelled = True
28 |         self._timer.cancel()
29 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | from .tokens_manager import TokensManager
2 | from .change_manager import ChangeManager
3 | from .document_batcher import DocumentBatcher
4 | from .file_helper import FileHelper
5 | from .s3_helper import S3Helper
6 | from .dynamodb_helper import DynamodbHelper
7 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/helpers/change_manager.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | from common.logger import get_logger
 4 | from .document_batcher import DocumentBatcher
 5 | 
 6 | logger = get_logger(__name__)
 7 | 
 8 | 
 9 | class ChangeManager:
10 |     def __init__(self, cluster_name, dynamodb_helper, tokens):
11 |         self.__managers = {}
12 |         self.__cluster_name = cluster_name
13 |         self.__tokens = tokens
14 |         self.__lock = threading.Lock()
15 |         self.__dynamodb_helper = dynamodb_helper
16 | 
17 |     def get_manager(self, cluster_name, database_name, collection_name):
18 |         namespace = "{}.{}".format(database_name, collection_name)
19 |         if namespace in self.__managers:
20 |             return self.__managers[namespace]
21 |         else:
22 |             try:
23 |                 self.__lock.acquire()
24 |                 if namespace not in self.__managers:
25 |                     manager = DocumentBatcher(self.__cluster_name, namespace, database_name, collection_name, self.__dynamodb_helper)
26 |                     token = None
27 |                     if namespace in self.__tokens:
28 |                         token = self.__tokens[namespace]
29 |                     manager.initialize(token)
30 |                     self.__managers[namespace] = manager
31 |                 return self.__managers[namespace]
32 |             finally:
33 |                 self.__lock.release()
34 | 
35 |     def on_change_event(self, cluster_name, database_name, collection_name, change):
36 |         manager = self.get_manager(cluster_name, database_name, collection_name)
37 |         # invoke the change even on the specific manager
38 |         manager.on_change_event(cluster_name, database_name, collection_name, change)
39 | 
40 |     def close(self):
41 |         logger.info("Cleaning up the Change Manager")
42 |         for namespace in self.__managers:
43 |             manager = self.__managers[namespace]
44 |             try:
45 |                 manager.close()
46 |             except:
47 |                 pass
48 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/helpers/dynamodb_helper.py:
--------------------------------------------------------------------------------
 1 | from decimal import Decimal
 2 | import boto3
 3 | from bson.json_util import dumps
 4 | import json
 5 | from common.Singleton import Singleton
 6 | from common.logger import get_logger
 7 | from common.json_encoder import JSONFriendlyEncoder
 8 | 
 9 | logger = get_logger(__name__)
10 | 
11 | 
12 | class DynamodbHelper(metaclass=Singleton):
13 |     def __init__(self, cluster_name):
14 |         self.__client = boto3.resource('dynamodb')
15 |         self.__cluster_name = cluster_name
16 | 
17 |     def save_namespaces(self, database_collections):
18 |         for database_name in database_collections:
19 |             logger.info("database: %s, collections: %s", database_name, dumps(database_collections[database_name]))
20 |             for collection_name in database_collections[database_name]:
21 |                 self.__client.Table("namespaces").put_item(Item={
22 |                     "cluster_name": self.__cluster_name,
23 |                     "namespace": "{}.{}".format(database_name, collection_name),
24 |                     "database_name": database_name,
25 |                     "collection_name": collection_name})
26 | 
27 |     def save_change_event(self, data):
28 |         watcher_id = "{}::{}".format(data["cluster_name"], data["namespace"])
29 |         batch_status = "{}::{:06.0f}".format(str(data["is_processed"]).lower(), data["batch_id"])
30 |         logger.info("About to save change event. watcher_id: %s and batch_status: %s", watcher_id, batch_status)
31 |         change_event = {
32 |             "watcher_id": watcher_id,
33 |             "batch_status": batch_status,
34 |             "cluster_name": data["cluster_name"],
35 |             "namespace": data["namespace"],
36 |             "batch_id": Decimal(data["batch_id"]),
37 |             "s3_link": data["s3_link"],
38 |             "created_timestamp": data["created_timestamp"],
39 |             "document_count": Decimal(data["document_count"]),
40 |             "is_processed": data["is_processed"],
41 |             "resume_token": data["resume_token"],
42 |             "processed_timestamp": data["processed_timestamp"]}
43 |         result = self.__client.Table("change_events").put_item(Item=change_event)
44 |         logger.info("Successfully saved the change event. watcher_id: %s and batch_status: %s. change_event: %s", 
45 |             watcher_id, batch_status, json.dumps(change_event, cls=JSONFriendlyEncoder))
46 |         return result
47 | 
48 |     def get_watcher(self, namespace):
49 |         watcher_id = "{}::{}".format(self.__cluster_name, namespace)
50 |         logger.info("Getting the watcher item by id: %s", watcher_id)
51 |         response = self.__client.Table("watchers").get_item(
52 |             Key={"watcher_id": watcher_id})
53 |         watcher = None
54 |         if "Item" in response:
55 |             watcher = response['Item']
56 |             watcher["batch_id"] = float(watcher["batch_id"])
57 |             watcher["total_count"] = float(watcher["total_count"])
58 |         logger.info("Successfully found the watcher item for id: %s. Item: %s", watcher_id, dumps(watcher))
59 |         return watcher
60 | 
61 |     def save_watcher(self, data):
62 |         result = self.__client.Table("watchers").update_item(
63 |             Key={"watcher_id": data["watcher_id"]}, # <--- changed
64 |             UpdateExpression="SET cluster_name = :cn, namespace = :n, resume_token = :t, validation_document = :v, batch_id = :b, #total_count = if_not_exists(#total_count, :initial) + :dc, created_timestamp = :ts",
65 |             ExpressionAttributeNames={'#total_count': 'total_count'},
66 |             ExpressionAttributeValues={
67 |                 ":cn": data["cluster_name"],
68 |                 ":n": data["namespace"],
69 |                 ":t": data["resume_token"], # <--- changed
70 |                 ":v": data["validation_document"],
71 |                 ":b": Decimal(data["batch_id"]),
72 |                 ":dc": Decimal(data["document_count"]),
73 |                 ":initial": 0,
74 |                 ":ts": data["created_timestamp"] # <--- changed
75 |             },
76 |             ReturnValues="UPDATED_NEW")
77 |         return result


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/helpers/file_helper.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import tempfile
 4 | import traceback
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | logger.setLevel(logging.INFO)
 8 | 
 9 | 
10 | class FileHelper:
11 |     """
12 |     A helper class to work with local file system. The class methods
13 |     offer methods to create a temporary files and delete the files 
14 |     on local file system.
15 |     """
16 | 
17 |     def create_file(self):
18 |         """
19 |         Creates a named temporary file on local file system
20 |         :rtype: str
21 |         :return: Returns a file path of temporary file
22 |         """
23 |         temp_file = tempfile.NamedTemporaryFile(mode='w+b', delete=False)
24 |         logger.info("Successfully created a temporary file: %s", temp_file.name)
25 |         return temp_file
26 | 
27 |     def delete_file(self, file_path):
28 |         """
29 |         Deletes a file located on local file system
30 |         :param file_path A file path for the file being deleted
31 |         """
32 |         try:
33 |             os.unlink(file_path)
34 |             logger.info("Successfully deleted the file: %s", file_path)
35 |         except Exception as e:
36 |             stack_trace = traceback.format_stack()
37 |             logger.error("Exception while deleting file: %s. Error: %s", file_path, e)
38 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/helpers/s3_helper.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import boto3
 4 | 
 5 | from .file_helper import FileHelper
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | logger.setLevel(logging.INFO)
 9 | 
10 | 
11 | class S3Helper:
12 |     """
13 |     A helper class to work with s3. The class offers various
14 |     methods to download files from s3 to local file system 
15 |     as well as upload local files to the s3 bucket.
16 | 
17 |     Assumption: The AWS_SECRET_ACCESS_KEY, AWS_ACCESS_KEY_ID are
18 |     assumed to be loaded in the environment variables. 
19 |     """
20 | 
21 |     def __init__(self):
22 |         self.__fh = FileHelper()
23 | 
24 |     def download(self, bucket_name, key_name):
25 |         """
26 |         Downloads the file from s3 to a local temporary file
27 |         :param bucket_name A string representing of s3 bucket name
28 |         :param key_name A string representing of s3 key name
29 |         :rtype: str
30 |         :return: Returns a local file path of downloaded s3 file
31 |         """
32 |         temp_file = self.__fh.create_file()
33 |         s3 = boto3.client('s3')
34 |         logger.info("Starting to download s3 file - bucket_name: %s, key_name: %s to local file: %s",
35 |                     bucket_name, key_name, temp_file.name)
36 |         s3.download_file(Bucket=bucket_name, Key=key_name, Filename=temp_file.name)
37 |         logger.info("Successfully downloaded s3 contents into file: %s", temp_file.name)
38 |         return temp_file.name
39 | 
40 |     def upload(self, file_path, bucket_name, key_name):
41 |         """
42 |         Uploads a local file to s3 bucket
43 |         :param file_path A file path for the file being uploaded
44 |         :param bucket_name A string representing of s3 bucket name
45 |         :param key_name A string representing of s3 key name
46 |         """
47 |         s3 = boto3.client('s3')
48 |         logger.info("Starting to upload file: %s to s3", file_path)
49 |         s3.upload_file(file_path, bucket_name, key_name)
50 |         logger.info("Successfully uploaded file contents: %s into s3", file_path)
51 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/helpers/tokens_manager.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | 
 4 | import yaml
 5 | from bson.json_util import dumps
 6 | 
 7 | from common.logger import get_logger
 8 | 
 9 | logger = get_logger(__name__)
10 | 
11 | 
12 | # noinspection PyBroadException
13 | class TokensManager:
14 |     def __init__(self, dynamo_helper):
15 |         self.__data = {}
16 |         self.__file_path = "{}/tokens.yaml".format(os.getcwd())
17 |         self.__dh = dynamo_helper
18 | 
19 |     def load(self, namespaces):
20 |         # fetch data from data store
21 |         return self.__load_from_db(namespaces)
22 |         # return self.__load_from_file()
23 | 
24 |     def __load_from_db(self, namespaces):
25 |         # for each namespace, read watcher entry
26 |         tokens = {}
27 |         for database_name in namespaces:
28 |             collections = namespaces[database_name]
29 |             for collection_name in collections:
30 |                 namespace = "{}.{}".format(database_name, collection_name)
31 |                 token = self.__dh.get_watcher(namespace)
32 |                 if token is not None:
33 |                     tokens[namespace] = token
34 |         logger.info("Successfully loaded tokens from database: %s", dumps(tokens))
35 |         return tokens
36 | 
37 |     def __load_from_file(self):
38 |         # read the data from file/remote cache
39 |         try:
40 |             logger.info("Opening tokens file located at %s", self.__file_path)
41 |             with open(self.__file_path, 'r') as stream:
42 |                 try:
43 |                     self.__data = yaml.safe_load(stream)
44 |                 except yaml.YAMLError as e:
45 |                     logger.fatal("Error occured while reading file as YAML", exc_info=True)
46 |                     exit(1)
47 |         except Exception as ex:
48 |             logger.fatal("Error opening tokens file: %s", self.__file_path, exc_info=True)
49 |             exit(1)
50 |         logger.info("Successfully loaded tokens.yaml. Contents: %s", json.dumps(self.__data))
51 |         return self.__data
52 | 
53 |     def save(self, peek_info):
54 |         # save the data into a local file/remote cache
55 |         try:
56 |             logger.info("Writing peek_info to tokens file located at %s. Contents: %s", self.__file_path,
57 |                         dumps(peek_info))
58 |             with open(self.__file_path, 'w') as stream:
59 |                 try:
60 |                     self.__data = yaml.dump(peek_info, stream)
61 |                 except yaml.YAMLError as e:
62 |                     logger.fatal("Error occured while writing contents as YAML. Content: %s", dumps(peek_info),
63 |                                  exc_info=True)
64 |                     exit(1)
65 |         except Exception as ex:
66 |             logger.fatal("Error opening tokens file: %s", self.__file_path, exc_info=True)
67 |             exit(1)
68 |         logger.info("Successfully saved tokens.yaml with contents: %s", json.dumps(self.__data))
69 |         return self.__data
70 | 
71 |     def get_token(self):
72 |         return self.__data
73 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from common.logger import get_logger
 3 | from signal import signal, SIGINT
 4 | from sys import exit
 5 | import json
 6 | from bson.json_util import loads, dumps
 7 | 
 8 | from helpers.change_manager import ChangeManager
 9 | from helpers.dynamodb_helper import DynamodbHelper
10 | from helpers.tokens_manager import TokensManager
11 | from migrators.ClusterMigrator import ClusterMigrator
12 | from commandline_parser import CommandLineParser
13 | from common.application_exception import ApplicationException
14 | 
15 | logger = get_logger(__name__)
16 | 
17 | def exit_handler(signal_received, frame):
18 |     # Handle any cleanup here
19 |     print('SIGINT or CTRL-C detected. Exiting gracefully')
20 |     exit(0)
21 | 
22 | def check_environment_variables(variables):
23 |     for variable in variables:
24 |         if variable not in os.environ:
25 |             logger.fatal("Environment variable %s is required but not set.", variable)
26 |             logger.error("The following environment variables are required: %s", json.dumps(variables, indent=2))
27 |             exit(1)
28 | 
29 | # Tell Python to run the handler() function when SIGINT is recieved
30 | signal(SIGINT, exit_handler)
31 | 
32 | migrator = None
33 | writer = None
34 | change_manager = None
35 | try:
36 |     # check if the required environment variables are set or not
37 |     names = ["AWS_DEFAULT_REGION", "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "S3_CHANGE_FEED_BUCKET_NAME", "SOURCE_URI"]
38 |     check_environment_variables(names)
39 |     
40 |     parser = CommandLineParser()
41 |     commandline_options = parser.get_options()
42 | 
43 |     cluster_name = commandline_options["cluster_name"]
44 | 
45 |     # get the active namespaces from the cluster
46 |     source_connection_string = os.environ['SOURCE_URI']
47 |     migrator = ClusterMigrator(cluster_name, source_connection_string)
48 |     namespaces = migrator.get_namespaces()
49 |     logger.info("Found the following namespaces on cluster_name: %s. Namespaces: %s", cluster_name, dumps(namespaces))
50 | 
51 |     # load the resume tokens for the given namespaces
52 |     dynamo_helper = DynamodbHelper(cluster_name)
53 |     tokens_manager = TokensManager(dynamo_helper)
54 |     tokens = tokens_manager.load(namespaces)
55 | 
56 |     # # save the list of databases being tracked
57 |     dynamo_helper.save_namespaces(namespaces)
58 | 
59 |     change_manager = ChangeManager(cluster_name, dynamo_helper, tokens)
60 |     migrator.watch(tokens, change_manager.on_change_event)
61 | except ApplicationException as ae:
62 |     logger.error("%s", ae)
63 |     exit(1)
64 | finally:
65 |     if migrator is not None:
66 |         migrator.close()
67 |     if change_manager is not None:
68 |         change_manager.close()
69 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/migrators/ClusterMigrator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import threading
 3 | 
 4 | from bson.json_util import dumps
 5 | from pymongo import MongoClient
 6 | 
 7 | from common.logger import get_logger
 8 | from .CollectionMigrator import CollectionMigrator
 9 | from .DatabaseMigrator import DatabaseMigrator
10 | from .TokenTracker import TokenTracker
11 | 
12 | logger = get_logger(__name__)
13 | 
14 | 
15 | class ClusterMigrator:
16 |     def __init__(self, cluster_name, connection_string):
17 |         self.__connection_string = connection_string
18 |         self.__cluster_name = cluster_name
19 |         self.__callback = None
20 |         self.__client = MongoClient(self.__connection_string)
21 |         self.__database_migrators = []
22 |         self.__skip_databases = ["admin", "local", "config"]
23 |         self.__tracker = TokenTracker()
24 |         self.__timer_threads = None
25 |         logger.info("Initializing the cluster migrator with connection string: %s", self.__connection_string)
26 | 
27 |     def get_namespaces(self):
28 |         db_collections = {}
29 |         database_names = self.__client.list_database_names()
30 |         for db_name in database_names:
31 |             if db_name not in self.__skip_databases:
32 |                 db = self.__client.get_database(db_name)
33 |                 collection_names = db.collection_names(include_system_collections=False)
34 |                 db_collections[db_name] = collection_names
35 |             else:
36 |                 logger.info("Skipping the database: %s while fetching get_namespaces", db_name)
37 |         return db_collections
38 | 
39 |     def peek(self, namepace):
40 |         names = namepace.split(".")
41 |         database_name = names[0]
42 |         collection_name = ".".join(names[1::])
43 |         collection = CollectionMigrator(self.__client, database_name, collection_name)
44 |         return collection.peek()
45 | 
46 |     def validate(self, tokens):
47 |         logger.info("Validating the tokens: %s", dumps(tokens))
48 |         for namespace in tokens:
49 |             logger.info("Validating the tokens: %s => %s", namespace, dumps(tokens[namespace]))
50 |             token = tokens[namespace]
51 |             names = namespace.split(".")
52 |             database_name = names[0]
53 |             collection_name = ".".join(names[1::])
54 |             collection = CollectionMigrator(self.__client, database_name, collection_name)
55 |             is_valid = collection.validate(token)
56 |             if not is_valid:
57 |                 logger.error("Validation of change stream resume token failed on collection: %s.", namespace)
58 |                 return False
59 |         return True
60 | 
61 |     def watch(self, tokens, notify_callback):
62 |         try:
63 |             self.__callback = notify_callback
64 |             logger.info("Fetching databases from the cluster: %s", self.__connection_string)
65 |             database_names = self.__client.list_database_names()
66 |             logger.info("Found the databases %s", json.dumps(database_names))
67 |             watch_threads = []
68 |             for database_name in database_names:
69 |                 if database_name not in self.__skip_databases:
70 |                     database_migrator = DatabaseMigrator(self.__client, self.__cluster_name, database_name)
71 |                     t = threading.Thread(target=database_migrator.watch, args=(tokens, notify_callback,))
72 |                     t.start()
73 |                     watch_threads.append(t)
74 |                     self.__database_migrators.append(database_migrator)
75 |                 else:
76 |                     logger.info("Skipping the database: %s for watching", database_name)
77 | 
78 |             # wait for threads to join
79 |             for watch_thread in watch_threads:
80 |                 watch_thread.join()
81 |         except Exception as e:
82 |             logger.exception(e)
83 | 
84 |     def __invoke_callback(self, database_name, collection_name, change):
85 |         namespace = "{}.{}".format(database_name, collection_name)
86 |         # self.__tracker.update_token(namespace, change)
87 |         self.__callback(database_name, collection_name, change)
88 | 
89 |     def close(self):
90 |         logger.info("Cleaning up the database migrators in the cluster.")
91 |         for migrator in self.__database_migrators:
92 |             migrator.close()
93 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/migrators/DatabaseMigrator.py:
--------------------------------------------------------------------------------
 1 | from pymongo import MongoClient
 2 | import json
 3 | from common.logger import get_logger
 4 | from .CollectionMigrator import CollectionMigrator
 5 | import threading
 6 | 
 7 | logger = get_logger(__name__)
 8 | 
 9 | 
10 | class DatabaseMigrator:
11 |     def __init__(self, client, cluster_name, database_name):
12 |         self.__client = client
13 |         self.__cluster_name = cluster_name
14 |         self.__database_name = database_name
15 |         self.__collection_migrators = []
16 |         logger.info("Initializing database migrator for database: [%s]", self.__database_name)
17 | 
18 |     def watch(self, tokens, notify_callback):
19 |         try:
20 |             db = self.__client.get_database(self.__database_name)
21 |             logger.info("Fetching collections from Database: %s", self.__database_name)
22 |             collection_names = db.collection_names(include_system_collections=False)
23 |             logger.info("Found collections in database: %s; Collections: %s", self.__database_name,
24 |                         json.dumps(collection_names))
25 |             watch_threads = []
26 |             for collection_name in collection_names:
27 |                 namespace = "{}.{}".format(self.__database_name, collection_name)
28 |                 token = {}
29 |                 if namespace in tokens:
30 |                     token = tokens[namespace]
31 |                 collection_migrator = CollectionMigrator(self.__client, self.__cluster_name, self.__database_name, collection_name)
32 |                 t = threading.Thread(target=collection_migrator.watch, args=(token, notify_callback,))
33 |                 watch_threads.append(t)
34 |                 t.start()
35 |                 self.__collection_migrators.append(collection_migrator)
36 |                 logger.info("Found the collection with namespace %s.%s", self.__database_name, collection_name)
37 |             # wait for threads to join
38 |             for watch_thread in watch_threads:
39 |                 watch_thread.join()
40 |         except Exception as e:
41 |             logger.exception(e)
42 |         finally:
43 |             if self.__client is not None:
44 |                 self.__client.close()
45 |                 logger.info("Gracefully closing the connection")
46 | 
47 |     def close(self):
48 |         logger.info("Cleaning up the database migrators in the cluster.")
49 |         for migrator in self.__collection_migrators:
50 |             migrator.close()
51 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/migrators/TokenTracker.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import threading
 3 | from datetime import datetime
 4 | 
 5 | from common.Singleton import Singleton
 6 | from common.logger import get_logger
 7 | 
 8 | logger = get_logger(__name__)
 9 | 
10 | 
11 | class TokenTracker(metaclass=Singleton):
12 |     def __init__(self):
13 |         self.__tokens = {}
14 |         self.__event = threading.Event()
15 | 
16 |     def update_token(self, key, change):
17 |         # waits for any active read to complete
18 |         self.__event.wait()
19 |         # store the token value for given key
20 |         self.__tokens[key] = {
21 |             "last_changed_at": datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
22 |             "token_id": change["_id"]
23 |             # , "change": change
24 |         }
25 | 
26 |     def get_token(self):
27 |         try:
28 |             # explicitly lock the token object for any updates
29 |             self.__event.clear()
30 |             # return the token to the client
31 |             return {
32 |                 "system_datetime": datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
33 |                 "resume_tokens": copy.deepcopy(self.__tokens)
34 |             }
35 |         finally:
36 |             # release any lock 
37 |             self.__event.set()
38 | 


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/migrators/__init__.py:
--------------------------------------------------------------------------------
1 | from .ClusterMigrator import ClusterMigrator
2 | from .DatabaseMigrator import DatabaseMigrator
3 | from .CollectionMigrator import CollectionMigrator
4 | # from .DocumentWriter import DocumentWriter


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo==4.6.3
2 | PyYAML==5.4
3 | boto3==1.12.17
4 | argparse==1.4.0


--------------------------------------------------------------------------------
/migration/cosmos-db-migration-utility/src/migrator-app/tokens.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | tokens: []


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/amazon-documentdb-connect-with-an-application.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/amazon-documentdb-connect-with-an-application.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/amazon-documentdb-connect-with-mongo-shell.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/amazon-documentdb-connect-with-mongo-shell.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/amazon-documentdb-connectivity-and-security.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/amazon-documentdb-connectivity-and-security.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/cloudformation-couchbase-to-amazon-documentdb-output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/cloudformation-couchbase-to-amazon-documentdb-output.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/ec2-cfn-migration-security-group.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/ec2-cfn-migration-security-group.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/ec2-copy-ssh-command.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/ec2-copy-ssh-command.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/ec2-delete-inbound-rule.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/ec2-delete-inbound-rule.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/ec2-edit-couchbase-security-group-inbound-rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/ec2-edit-couchbase-security-group-inbound-rules.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/ec2-select-couchbase-security-group-inbound-rules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/ec2-select-couchbase-security-group-inbound-rules.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/msk-cluster-bootstrap-servers.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/msk-cluster-bootstrap-servers.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/msk-cluster-client-information.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/msk-cluster-client-information.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/s3-confirm-specified-objects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/s3-confirm-specified-objects.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/s3-delete-objects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/s3-delete-objects.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/images/solution-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/awslabs/amazon-documentdb-tools/dea936adb5b6c5839858b580b7ba17499357f4e6/migration/couchbase-migration-utility/static/images/solution-overview.png


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/scripts/createTruststore.sh:
--------------------------------------------------------------------------------
 1 | mydir=.
 2 | truststore=${mydir}/docdb-truststore.jks
 3 | storepassword=password
 4 | 
 5 | curl -sS "https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem" > ${mydir}/global-bundle.pem
 6 | awk 'split_after == 1 {n++;split_after=0} /-----END CERTIFICATE-----/ {split_after=1}{print > "rds-ca-" n ".pem"}' < ${mydir}/global-bundle.pem
 7 | 
 8 | for CERT in rds-ca-*; do
 9 |   alias=$(openssl x509 -noout -text -in $CERT | perl -ne 'next unless /Subject:/; s/.*(CN=|CN = )//; print')
10 |   echo "Importing $alias" >> createTruststore.log
11 |   keytool -import -file ${CERT} -alias "${alias}" -storepass ${storepassword} -keystore ${truststore} -noprompt
12 |   rm $CERT
13 | done
14 | 
15 | echo "Trust store content is: "
16 | 
17 | keytool -list -v -keystore "$truststore" -storepass ${storepassword} | grep Alias | cut -d " " -f3- | while read alias
18 | do
19 |    expiry=`keytool -list -v -keystore "$truststore" -storepass ${storepassword} -alias "${alias}" | grep Valid | perl -ne 'if(/until: (.*?)\n/) { print "$1\n"; }'`
20 |    echo " Certificate ${alias} expires in '$expiry'" >> createTruststore.log
21 | done
22 | 


--------------------------------------------------------------------------------
/migration/couchbase-migration-utility/static/scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | # install Java
 2 | echo "installing java-21-amazon-corretto-devel ..." >> setup.log
 3 | sudo yum install -y java-21-amazon-corretto-devel
 4 | 
 5 | echo "copying cacerts to kafka_truststore.jks ..." >> setup.log
 6 | cp /usr/lib/jvm/java-21-amazon-corretto.x86_64/lib/security/cacerts kafka_truststore.jks
 7 | 
 8 | # Couchbase connector
 9 | echo "downloading couchbase-kafka-connect-couchbase-4.2.8.zip ..." >> setup.log
10 | wget https://packages.couchbase.com/clients/kafka/4.2.8/couchbase-kafka-connect-couchbase-4.2.8.zip
11 | 
12 | echo "copying couchbase-kafka-connect-couchbase-4.2.8.zip to s3://$1 ..." >> setup.log
13 | aws s3 cp couchbase-kafka-connect-couchbase-4.2.8.zip s3://$1
14 | 
15 | # Amazon DocumentDB connector
16 | echo "create directories for Amazon DocumentDB custom plugin ..." >> setup.log
17 | cd /home/ec2-user
18 | mkdir -p docdb-custom-plugin
19 | mkdir -p docdb-custom-plugin/mongo-connector
20 | mkdir -p docdb-custom-plugin/msk-config-providers
21 | 
22 | echo "downloading mongo-kafka-connect-1.15.0-all.jar ..." >> setup.log
23 | cd /home/ec2-user/docdb-custom-plugin/mongo-connector
24 | wget https://repo1.maven.org/maven2/org/mongodb/kafka/mongo-kafka-connect/1.15.0/mongo-kafka-connect-1.15.0-all.jar
25 | 
26 | echo "downloading msk-config-providers-0.3.1-with-dependencies.zip ..." >> /home/ec2-user/setup.log
27 | cd /home/ec2-user/docdb-custom-plugin/msk-config-providers
28 | wget https://github.com/aws-samples/msk-config-providers/releases/download/r0.3.1/msk-config-providers-0.3.1-with-dependencies.zip
29 | 
30 | echo "unzipping msk-config-providers-0.3.1-with-dependencies.zip ..." >> /home/ec2-user/setup.log
31 | unzip msk-config-providers-0.3.1-with-dependencies.zip
32 | 
33 | echo "deleting msk-config-providers-0.3.1-with-dependencies.zip ..." >> /home/ec2-user/setup.log
34 | rm msk-config-providers-0.3.1-with-dependencies.zip
35 | 
36 | echo "creating docdb-custom-plugin.zip ..." >> /home/ec2-user/setup.log
37 | cd /home/ec2-user
38 | zip -r docdb-custom-plugin.zip docdb-custom-plugin
39 | 
40 | echo "creating docdb-custom-plugin.zip to s3://$1 ..." >> setup.log
41 | aws s3 cp docdb-custom-plugin.zip s3://$1
42 | 
43 | # Kafka
44 | echo "downloading kafka_2.13-4.0.0.tgz ..." >> setup.log
45 | wget https://dlcdn.apache.org/kafka/4.0.0/kafka_2.13-4.0.0.tgz
46 | 
47 | echo "extracting kafka_2.13-4.0.0.tgz ..." >> setup.log
48 | tar -xzf kafka_2.13-4.0.0.tgz
49 | 
50 | # AWS MSK IAM auth
51 | echo "downloading aws-msk-iam-auth-2.3.2-all.jar ..." >> setup.log
52 | wget https://github.com/aws/aws-msk-iam-auth/releases/download/v2.3.2/aws-msk-iam-auth-2.3.2-all.jar
53 | 
54 | echo "copying aws-msk-iam-auth-2.3.2-all.jar to kafka_2.13-4.0.0/libs/. ..." >> setup.log
55 | cp aws-msk-iam-auth-2.3.2-all.jar kafka_2.13-4.0.0/libs/.
56 | 
57 | # Mongo shell
58 | echo "installing mongodb-mongosh-shared-openssl3 ..." >> setup.log
59 | echo -e "[mongodb-org-5.0] \nname=MongoDB Repository\nbaseurl=https://repo.mongodb.org/yum/amazon/2023/mongodb-org/5.0/x86_64/\ngpgcheck=1 \nenabled=1 \ngpgkey=https://pgp.mongodb.com/server-5.0.asc" | sudo tee /etc/yum.repos.d/mongodb-org-5.0.repo
60 | sudo yum install -y mongodb-mongosh-shared-openssl3
61 | 
62 | # create Amazon DocumentDB trust store
63 | echo "executing createTruststore.sh ..." >> setup.log
64 | ./createTruststore.sh
65 | 
66 | echo "copying docdb-truststore.jks to s3://$1 ..." >> setup.log
67 | aws s3 cp docdb-truststore.jks s3://$1
68 | 
69 | # create Kafka client properties file
70 | echo "creating /home/ec2-user/kafka_2.13-4.0.0/config/client.properties ..." >> setup.log
71 | echo "ssl.truststore.location=/home/ec2-user/kafka_truststore.jks" >> kafka_2.13-4.0.0/config/client.properties
72 | echo "security.protocol=SASL_SSL" >> kafka_2.13-4.0.0/config/client.properties
73 | echo "sasl.mechanism=AWS_MSK_IAM " >> kafka_2.13-4.0.0/config/client.properties
74 | echo "sasl.jaas.config=software.amazon.msk.auth.iam.IAMLoginModule required;" >> kafka_2.13-4.0.0/config/client.properties
75 | echo "sasl.client.callback.handler.class=software.amazon.msk.auth.iam.IAMClientCallbackHandler" >> kafka_2.13-4.0.0/config/client.properties
76 | 
77 | # setup complete
78 | echo "setup complete ..." >> setup.log
79 | 


--------------------------------------------------------------------------------
/migration/data-differ/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB DataDiffer Tool
 2 | 
 3 | The purpose of the DataDiffer tool is to facilitate the validation of data consistency by comparing two collections, making it particularly useful in migration scenarios.
 4 | This tool performs the following checks:
 5 | 
 6 | - Document existence check: It reads documents in batches from the source collection and checks for their existence in the target collection. If there is a discrepancy, the tool attempts will identify and report the missing documents.
 7 | - Index Comparison: examines the indexes of the collections and reports any differences.
 8 | - Document Comparison: each document in the collections, with the same _id, is compared using the DeepDiff library. This process can be computationally intensive, as it involves scanning all document fields. The duration of this check depends on factors such as document complexity and the CPU resources of the machine executing the script.
 9 | 
10 | ## Prerequisites:
11 | 
12 |  - Python 3
13 |  - Modules: pymongo, deepdiff, tqdm
14 | ```
15 |   pip3 install pymongo deepdiff tqdm
16 | ```
17 | Note: Refer to the DeepDiff [documentation](https://zepworks.com/deepdiff/current/optimizations.html) for potential optimizations you may try out specifically for your dataset.
18 | 
19 | ## How to use
20 | 
21 | 1. Clone the repository and go to the tool folder:
22 | ```
23 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
24 | cd amazon-documentdb-tools/migration/data-differ/
25 | ```
26 | 
27 | 2. Run the data-differ.py tool, which accepts the following arguments:
28 | 
29 | ```
30 | python3 data-differ.py --help
31 | usage: data-differ.py [-h] [--batch-size BATCH_SIZE] [--output-file OUTPUT_FILE] [--check-target] --source-uri SOURCE_URI --target-uri TARGET_URI --source-db SOURCE_DB --target-db TARGET_DB --source-coll SOURCE_COLL --target-coll TARGET_COLL [--sample-size_percent SAMPLE_SIZE_PERCENT] [--sampling-timeout-ms SAMPLING_TIMEOUT_MS]
32 | 
33 | Compare two collections and report differences.
34 | 
35 | options:
36 |   -h, --help            show this help message and exit
37 |   --batch-size BATCH_SIZE
38 |                         Batch size for bulk reads (optional, default: 100)
39 |   --output-file OUTPUT_FILE
40 |                         Output file path (optional, default: differences.txt)
41 |   --check-target
42 |                         optional, Check if extra documents exist in target database
43 |   --source-uri SOURCE_URI
44 |                         Source cluster URI (required)
45 |   --target-uri TARGET_URI
46 |                         Target cluster URI (required)
47 |   --source-db SOURCE_DB
48 |                         Source database name (required)
49 |   --target-db TARGET_DB
50 |                         Target database name (required)
51 |   --source-coll SOURCE_COLL
52 |                         Source collection name (required)
53 |   --target-coll TARGET_COLL
54 |                         Target collection name (required)
55 |   --sample-size-percent SAMPLE_SIZE_PERCENT
56 |                         optional, if set only samples a percentage of the documents
57 |   --sampling-timeout-ms SAMPLING_TIMEOUT_MS
58 |                         optional, override the timeout for returning a sample of documents when using the --sample-size-percent argument
59 | ```
60 | 
61 | ## Example usage:
62 | Connect to a standalone MongoDB instance as source and to a Amazon DocumentDB cluster as target.
63 | 
64 | From the source uri, compare the collection *mysourcecollection* from database *mysource*, against the collection *mytargetcollection* from database *mytargetdb* in the target uri.
65 | 
66 | ```
67 | python3 data-differ.py \
68 | --source-uri "mongodb://user:password@mongodb-instance-hostname:27017/admin?directConnection=true" \
69 | --target-uri "mongodb://user:password@target.cluster.docdb.amazonaws.com:27017/?tls=true&tlsCAFile=rds-combined-ca-bundle.pem&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false" \
70 | --source-db mysourcedb \
71 | --source-coll mysourcecollection \
72 | --target-db mytargetdb \
73 | --target-coll mytargetcollection
74 | ```
75 | 
76 | For more information on the connection string format, refer to the [documentation](https://www.mongodb.com/docs/manual/reference/connection-string/).
77 | 
78 | ## Sampling
79 | For large databases it might be unfeasible to compare every document as:
80 | * It takes a long time to compare every document.
81 | * Reading every document from a large busy database could have a performance impact.
82 | 
83 | If you use the `--sample-size-percent` option you can pass in a percentage of
84 | documents to sample and compare.
85 | 
86 | E.g. `--sample-size-percent 1` would sample 1% of the documents in the source
87 | database and compare them to the target database.
88 | 
89 | Under the hood this uses the [MongoDB `$sample` operator](https://www.mongodb.com/docs/manual/reference/operator/aggregation/sample/)
90 | You should read the documentation on how that behaves on your version of MongoDB
91 | when the percentage to sample is >= 5% before picking a percentage to sample.
92 | 
93 | The default timeout for retriving a sample of documents is `500ms`, if this is
94 | not long enough you can adjust it with the `--sampling-timeout-ms` argument.
95 | For example `--sample-timeout-ms 600` would increase the timeout to `600ms`.
96 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/README.md:
--------------------------------------------------------------------------------
 1 | ## To Run DataDiffer Test Scripts
 2 | 1. Ensure you have met the prerequsities from the general README.
 3 | 2. Create an envrionment file(s) as needed based on your migration needs. The environment file should set environment variables and look something like this with each variable filled out for your use case:
 4 | ```
 5 |     export SOURCE_URI=""
 6 |     export SOURCE_DB=""
 7 |     export SOURCE_COLL=""
 8 |     export TARGET_URI=""
 9 |     export TARGET_DB=""
10 |     export TARGET_COLL=""
11 | ```
12 | 3. Source the environment file you built in the command line with a command such as the following: 
13 | ```
14 |     source <environment-file-name-here>.sh
15 | ```
16 | 4. Run the appropriate bash test script in the command line with a command such as the following: 
17 | ```
18 |     bash <test-script-name-here>.bash
19 | ```
20 | 5. See the output in the command line!
21 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/dict_id.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=dict_id_source.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=dict_id_target.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file dict_id_diff.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/dict_id_diff.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=dict_id_diff_source.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=dict_id_diff_target.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file dict_id_diff_result.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/dict_id_diff_source.json:
--------------------------------------------------------------------------------
1 | {"_id": {"key1": "value1", "key2": "value2"}, "name": "Alice", "age": 25.0, "siblings": 1.0}
2 | {"_id": {"key1": "value3", "key2": "value4"}, "name": "Bob", "age": 30.0, "siblings": 2.0}
3 | {"_id": {"key1": "value5", "key2": "value6"}, "name": "Charlie", "age": 35.0, "siblings": 3.0}


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/dict_id_diff_target.json:
--------------------------------------------------------------------------------
1 | {"_id": {"key1": "value1", "key2": "value2"}, "name": "Alice", "age": 25.0, "siblings": 1.0}
2 | {"_id": {"key1": "value3", "key2": "value4"}, "name": "Bob", "age": 31.0, "siblings": 2.0}
3 | {"_id": {"key1": "value5", "key2": "value6"}, "name": "Charlie", "age": 35.0, "siblings": 3.0}


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/dict_id_source.json:
--------------------------------------------------------------------------------
1 | {"_id": {"key1": "value1", "key2": "value2"}, "name": "Alice", "age": 25.0, "siblings": 1.0}
2 | {"_id": {"key1": "value3", "key2": "value4"}, "name": "Bob", "age": 30.0, "siblings": 2.0}
3 | {"_id": {"key1": "value5", "key2": "value6"}, "name": "Charlie", "age": 35.0, "siblings": 3.0}


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/dict_id_target.json:
--------------------------------------------------------------------------------
1 | {"_id": {"key1": "value1", "key2": "value2"}, "name": "Alice", "age": 25.0, "siblings": 1.0}
2 | {"_id": {"key1": "value3", "key2": "value4"}, "name": "Bob", "age": 30.0, "siblings": 2.0}
3 | {"_id": {"key1": "value5", "key2": "value6"}, "name": "Charlie", "age": 35.0, "siblings": 3.0}


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/everything_same.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=everything_same.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=everything_same.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file everything_same_result.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/everything_same.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/extra_target_field.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=extra_target_field_source.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=extra_target_field_target.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file extra_target_field_result.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/extra_target_field_source.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/extra_target_field_target.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0, "pets": 12}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/nested_dict_diff.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=nested_dict_diff_source.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=nested_dict_diff_target.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file nested_dict_diff_result.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/nested_dict_diff_source.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0, "pets":[{"cat": "Kitty", "hamster": "JJ"}]}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0, "pets":[{"cat": "Sammy", "hamster": "Johnny"}]}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0, "pets":[{"cat": "Al", "hamster": "Max"}]}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/nested_dict_diff_target.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0, "pets":[{"cat": "Kitty", "hamster": "JJ"}]}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0, "pets":[{"cat": "Sammy", "hamster": "Johnny"}]}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0, "pets":[{"cat": "Alex", "hamster": "Max"}]}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/order_change.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=order_change_source.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=order_change_target.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file order_change_result.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/order_change_source.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/order_change_target.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "siblings": 1.0, "age": 21.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/wrong_id.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | mongoimport --uri="$SOURCE_URI" --db="$SOURCE_DB" --collection="$SOURCE_COLL" --file=wrong_id_source.json
4 | mongoimport --uri="$TARGET_URI" --db="$TARGET_DB" --collection="$TARGET_COLL" --file=wrong_id_target.json
5 | 
6 | python3 ../data-differ.py --source-uri "$SOURCE_URI" --target-uri "$TARGET_URI" --source-db "$SOURCE_DB" --source-coll "$SOURCE_COLL" --target-db "$TARGET_DB" --target-coll "$TARGET_COLL" --batch-size 100 --output-file wrong_id_result.txt
7 | 
8 | mongosh "$SOURCE_URI" --eval "use $SOURCE_DB; db.$SOURCE_COLL.drop()"
9 | mongosh "$TARGET_URI" --eval "use $TARGET_DB; db.$TARGET_COLL.drop()"


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/wrong_id_source.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0}
3 | {"_id": 3, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/data-differ/test-scripts/wrong_id_target.json:
--------------------------------------------------------------------------------
1 | {"_id": 1, "name": "John", "age": 21.0, "siblings": 1.0}
2 | {"_id": 2, "name": "Frank", "age": 22.0, "siblings": 14.0}
3 | {"_id": 10, "name": "Susan", "age": 22.0, "siblings": 2.0}
4 | 


--------------------------------------------------------------------------------
/migration/dms-segments/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon Database Migration Service (DMS) Segment Analyzer
 2 | 
 3 | The DMS Segment Analyzer calculates the segment boundaries of MongoDB and Amazon DocumentDB collections to be used for segmenting DMS full load operations.
 4 | 
 5 | # Requirements
 6 |  - Python 3.7+
 7 |  - PyMongo
 8 |    - MongoDB 2.6 - 3.4 | pymongo 3.10 - 3.12
 9 |    - MongoDB 3.6 - 5.0 | pymongo 3.12 - 4.0
10 |    - MongoDB 5.1+      | pymongo 4.0+
11 |    - DocumentDB        | pymongo 3.10+
12 | 
13 | ## Using the DMS Segment Analyzer
14 | `python3 dms-segments.py --uri <server-uri> --database <database-name> --collection <collection-name> --num-segments <number-of-segments>`
15 | 
16 | - Run on any instance in your MongoDB or Amazon DocumentDB cluster
17 | - Connect directly to servers, not as replicaSet. If driver version supports &directConnection=true then provide it as part of the --uri
18 | - The \<mongodb-uri> options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/ 
19 | - For DocumentDB use the instance endpoints, not the cluster endpoint
20 | - By default the tool uses large .skip() operations to determine the boundary ObjectId's, if you experience timeouts consider using the --single-cursor option
21 | 
22 | ## License
23 | This tool is licensed under the Apache 2.0 License. 
24 | 


--------------------------------------------------------------------------------
/migration/export-users/README.md:
--------------------------------------------------------------------------------
 1 | # Export Users tool
 2 | This tool will export Amazon DocumentDB or MongoDB users and custom roles to files, which then can be used to create them in another cluster. Note: Passwords are not exported.
 3 | 
 4 | # Requirements
 5 |  - Python 3.7+
 6 |  - PyMongo
 7 | 
 8 | ## Using the Export Users Tool
 9 | `python3 docdbExportUsers.py --users-file <users-file> --roles-file <roles-file> --uri <docdb-uri>`
10 | 
11 | ## Example:
12 | `python3 docdbExportUsers.py --users-file mydocdb-users.js --roles-file mydocdb-roles.js --uri "mongodb://user:password@mydocdb.cluster-cdtjj00yfi95.eu-west-2.docdb.amazonaws.com:27017/?tls=true&tlsCAFile=rds-combined-ca-bundle.pem&replicaSet=rs0&retryWrites=false"`
13 | 
14 | ## Restore custom roles
15 | Run the custom roles .js script:
16 | `mongo --ssl --host mydocdb.cluster-cdtjj00yfi95.eu-west-2.docdb.amazonaws.com:27017 --sslCAFile rds-combined-ca-bundle.pem --username <user> --password <password> mydocdb-roles.js`
17 | 
18 | ## Restore users
19 | Edit the users .js script and update passwords for each user. Run the users .js script:
20 | `mongo --ssl --host mydocdb.cluster-cdtjj00yfi95.eu-west-2.docdb.amazonaws.com:27017 --sslCAFile rds-combined-ca-bundle.pem --username <user> --password <password> mydocdb-users.js`
21 | 
22 | ## License
23 | This tool is licensed under the Apache 2.0 License.


--------------------------------------------------------------------------------
/migration/export-users/docdbExportUsers.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import argparse
  3 | import pymongo
  4 | 
  5 | 
  6 | rolesToExport = {}
  7 | 
  8 | 
  9 | def exportUsers(appConfig):
 10 |     client = pymongo.MongoClient(host=appConfig['uri'], appname='userexp')
 11 |     database_names = client.list_database_names()
 12 |     database_names.append("$external")
 13 | 
 14 |     f = open(appConfig['usersFile'], "w+", encoding='utf-8')
 15 |     for database_name in database_names:
 16 |         print("")
 17 |         if (database_name == 'local'):
 18 |             print(f"Skipping database:  {database_name}")
 19 |             continue
 20 | 
 21 |         print(f"Checking database:  {database_name}")
 22 |         database = client[database_name]
 23 |         users = database.command('usersInfo')
 24 |         if len(users['users']) == 0:
 25 |             print(f"No users in database:  {database_name}")
 26 |             continue
 27 | 
 28 |         use_db_printed = False
 29 |         for user in users['users']:
 30 |             """ Exclude serviceadmin user """
 31 |             if user['user'] == "serviceadmin":
 32 |                 continue
 33 | 
 34 |             if (database_name == "$external") and (user['user'].startswith("arn:aws:iam::") == False):
 35 |                 print(f"Skipping user:  {user['user']}, user must start with 'arn:aws:iam::'")
 36 |                 continue
 37 | 
 38 |             print(f"Exporting user:  {user['user']}")
 39 | 
 40 |             if (use_db_printed == False):
 41 |                 print(f"use {database_name}", file=f)
 42 |                 use_db_printed = True
 43 | 
 44 |             print('db.createUser({user: "' + user['user'] + '", pwd: "REPLACE_THIS_PASS",' + ' roles: ' + str(user['roles']) + '});', file=f)
 45 | 
 46 |             print(f"Checking roles for user:  {user['user']}")
 47 |             for userRole in user['roles']:
 48 |                 checkRole(database, userRole)
 49 |     
 50 |     f.close()
 51 |     print(f"Done! Users exported to {appConfig['usersFile']}")
 52 | 
 53 | 
 54 | def checkRole(database, userRole):
 55 |     print (f"Checking role {userRole}")
 56 |     """ A role can be assigned to multiple users so we only want to export the role definition once """
 57 |     """ Build a dictionary to keep track of all user-defined roles assigned to users being exported """
 58 |     roleInfo = database.command({'rolesInfo': {'role': userRole['role'], 'db': userRole['db']}, 'showPrivileges': True, 'showBuiltinRoles': False})
 59 | 
 60 |     if len(roleInfo['roles']) == 1:
 61 |         role = roleInfo['roles'][0]
 62 |         if (role['isBuiltin'] == False):
 63 |             """ Check role against list of roles supported by DocumentDB """
 64 |             if not role['role'] in rolesToExport:
 65 |                 """ If this is a user-defined role not already marked for export, mark it for export """
 66 |                 rolesToExport[role['role']] = role
 67 | 
 68 | 
 69 | def exportRoles(appConfig):
 70 |     with open(appConfig['rolesFile'], "w+", encoding='utf-8') as f:
 71 |         print("use admin", file=f)
 72 |         for role in rolesToExport:
 73 |             print(f"Exporting role:  {role}")
 74 |             privileges = str(rolesToExport[role]['privileges'])
 75 |             """ convert Python True/False to JSON true/false """
 76 |             privileges = privileges.replace(": True}", ": true}")
 77 |             privileges = privileges.replace(": False}", ": false}")
 78 |             print('db.createRole({role: "' + rolesToExport[role]['role'] + '", privileges: ' + privileges + ', roles: ' + str(rolesToExport[role]['roles']) + '});', file=f)
 79 | 
 80 |     f.close()    
 81 |     print(f"Done! Roles exported to {appConfig['rolesFile']}")
 82 | 
 83 |         
 84 | def main():
 85 |     """ v1:  Initial script, export users to a file """
 86 | 
 87 |     parser = argparse.ArgumentParser(description='Export Amazon DocumentDB users and user defined roles to user_output.js file, can be used to import them to other instance. Note: Passwords are not exported.')
 88 | 
 89 |     parser.add_argument('--skip-python-version-check',
 90 |                         required=False,
 91 |                         action='store_true',
 92 |                         help='Permit execution on Python 3.6 and prior')
 93 | 
 94 |     parser.add_argument('--uri',
 95 |                         required=True,
 96 |                         type=str,
 97 |                         help='MongoDB Connection URI')
 98 | 
 99 |     parser.add_argument('--users-file',
100 |                         required=True,
101 |                         type=str,
102 |                         help='The users output file')
103 | 
104 |     parser.add_argument('--roles-file',
105 |                         required=True,
106 |                         type=str,
107 |                         help='The roles output file')
108 | 
109 |     args = parser.parse_args()
110 | 
111 |     MIN_PYTHON = (3, 7)
112 |     if (not args.skip_python_version_check) and (sys.version_info < MIN_PYTHON):
113 |         sys.exit("\nPython %s.%s or later is required.\n" % MIN_PYTHON)
114 | 
115 |     appConfig = {}
116 |     appConfig['uri'] = args.uri
117 |     appConfig['usersFile'] = args.users_file
118 |     appConfig['rolesFile'] = args.roles_file
119 | 
120 |     exportUsers(appConfig)
121 |     exportRoles(appConfig)
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     main()


--------------------------------------------------------------------------------
/migration/json-import/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB JSON Import Tool
 2 | 
 3 | The purpose of the JSON Import Tool is to load JSON formatted data from a single file into DocumentDB or MongoDB in parallel. Input file must contain one JSON document per line.
 4 | 
 5 | ## Prerequisites:
 6 | 
 7 |  - Python 3
 8 |  - Modules: pymongo
 9 | ```
10 |   pip3 install pymongo
11 | ```
12 | ## How to use
13 | 
14 | 1. Clone the repository and go to the tool folder:
15 | ```
16 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
17 | cd amazon-documentdb-tools/migration/json-import/
18 | ```
19 | 
20 | 2. Run the json-import.py tool, which accepts the following arguments:
21 | 
22 | ```
23 | python3 json-import.py --help
24 | usage: json-import.py [-h] --uri URI --file-name FILE_NAME --operations-per-batch OPERATIONS_PER_BATCH --workers WORKERS --database DATABASE --collection COLLECTION --log-file-name LOG_FILE_NAME
25 |                       [--skip-python-version-check] [--lines-per-chunk LINES_PER_CHUNK] [--debug-level DEBUG_LEVEL] --mode {insert,replace,update} [--drop-collection]
26 | 
27 | Bulk/Concurrent JSON file import utility.
28 | 
29 | optional arguments:
30 |   -h, --help            show this help message and exit
31 |   --uri URI             URI
32 |   --file-name FILE_NAME
33 |                         Name of JSON file to load
34 |   --operations-per-batch OPERATIONS_PER_BATCH
35 |                         Number of operations per batch
36 |   --workers WORKERS     Number of parallel workers
37 |   --database DATABASE   Database name
38 |   --collection COLLECTION
39 |                         Collection name
40 |   --log-file-name LOG_FILE_NAME
41 |                         Log file name
42 |   --skip-python-version-check
43 |                         Permit execution on Python 3.6 and prior
44 |   --lines-per-chunk LINES_PER_CHUNK
45 |                         Number of lines each worker reserves before jumping ahead in the file to the next chunk
46 |   --debug-level DEBUG_LEVEL
47 |                         Debug output level.
48 |   --mode {insert,replace,update}
49 |                         Mode - insert, replace, or update
50 |   --drop-collection     Drop the collection prior to loading data
51 | 
52 | ```
53 | 
54 | ## Example usage:
55 | Load data (as inserts) from JSON formatted file load-me.json
56 | 
57 | ```
58 | python3 json-import.py \
59 |   --uri "mongodb://user:password@target.cluster.docdb.amazonaws.com:27017/?tls=true&tlsCAFile=rds-combined-ca-bundle.pem&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false" \
60 |   --file-name load-me.json
61 |   --operations-per-batch 100
62 |   --workers 4
63 |   --database jsonimport
64 |   --collection coll1
65 |   --log-file-name json-import-log-file.log
66 |   --lines-per-chunk 1000
67 |   --mode insert
68 |   --drop-collection
69 | ```
70 | 
71 | For more information on the connection string format, refer to the [documentation](https://www.mongodb.com/docs/manual/reference/connection-string/).
72 | 


--------------------------------------------------------------------------------
/migration/migrator/.gitignore:
--------------------------------------------------------------------------------
1 | rds-combined-ca-bundle.pem
2 | doit.bash
3 | doit-fl.bash
4 | doit-cdc.bash
5 | 


--------------------------------------------------------------------------------
/migration/migrator/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Full Load and Change Data Capture (CDC) Synchronization Tool
 2 | This synchronization tool enables high-speed Full Load and CDC from a MongoDB/DocumentDB source database to an Amazon DocumentDB target database.
 3 | 
 4 | The full load script requires "boundaries" for parallelism, you can run the [dms-segments tool](https://github.com/awslabs/amazon-documentdb-tools/tree/master/migration/dms-segments) to calculate them.
 5 | 
 6 | ## Installation
 7 | Clone the repository.
 8 | 
 9 | ## Requirements
10 | * Python 3.7+
11 | * PyMongo, boto3
12 |   * IAM permission "cloudwatch:PutMetricData" is required to create CloudWatch metrics 
13 | 
14 | 
15 | ## Using the change data capture tool
16 | ```
17 | python3 cdc-multiprocess.py --source-uri <source-uri> --target-uri <target-uri> --source-namespace <database.collection> --start-position [0 or YYYY-MM-DD+HH:MM:SS in UTC] --use-[oplog|change-stream]  [--create-cloudwatch-metrics] [--cluster-name <cluster-name>]
18 | ```
19 | 
20 | * source-uri and target-uri follow the [MongoDB Connection String URI Format](https://www.mongodb.com/docs/manual/reference/connection-string/)
21 | * source-namespace in database.collection format (i.e. "database1.collection2")
22 | * start-position either 0 (process entire oplog) or specific oplog position as YYYY-MM-DD+HH:MM:SS in UTC
23 | * must pass either --use-oplog for oplog to be source (MongoDB only) or --use-change-stream to use change streams for source (MongoDB or DocumentDB)
24 | * optionally pass 2+ for the --threads option to process the oplog with concurrent processes
25 | * several other optional parameters as supported, execute the script with -h for a full listing
26 | * include --create-cloudwatch-metrics to create metrics for the number of CDC operations per second and the number of seconds behind current
27 |   * CloudWatch metrics are captured in namespace "CustomDocDB" as "MigratorCDCOperationsPerSecond" and "MigratorCDCNumSecondsBehind"
28 | * include --cluster-name <cluster-name> if capturing CloudWatch metrics via --create-cloudwatch-metrics
29 | 
30 | ## Using the full load tool
31 | ```
32 | python3 fl-multiprocess.py --source-uri <source-uri> --target-uri <target-uri> --source-namespace <database.collection> --boundaries <comma-separated-boundary-list>  [--create-cloudwatch-metrics] [--cluster-name <cluster-name>]
33 | ```
34 | 
35 | * source-uri and target-uri follow the [MongoDB Connection String URI Format](https://www.mongodb.com/docs/manual/reference/connection-string/)
36 | * source-namespace and target-namespace in database.collection format (i.e. "database1.collection2")
37 | * pass --boundary-datatype for string or int for _id boundaries that are not objectid type
38 | * several other optional parameters as supported, execute the script with -h for a full listing
39 | * include --create-cloudwatch-metrics to create metrics for the number of inserts per second and the approximate number of seconds to done
40 |   * CloudWatch metrics are captured in namespace "CustomDocDB" as "MigratorFLInsertsPerSecond" and "MigratorFLRemainingSeconds"
41 | * include --cluster-name <cluster-name> if capturing CloudWatch metrics via --create-cloudwatch-metrics
42 | 


--------------------------------------------------------------------------------
/migration/mongodb-changestream-review/README.md:
--------------------------------------------------------------------------------
 1 | # MongoDB Changestream Review Tool
 2 | 
 3 | The mongodb changestream review tool connects to any instance in a MongoDB replicaset (primary or secondary), reads the changestream, and produces a log file containing counters for insert/update/delete operations by collection. 
 4 | 
 5 | # Requirements
 6 |  - Python 3.7+
 7 |    - If using Snappy wire protocol compression and MongoDB, "apt install python-snappy"
 8 |  - PyMongo
 9 |    - MongoDB 2.6 - 3.4 | pymongo 3.10 - 3.12
10 |    - MongoDB 3.6 - 5.0 | pymongo 3.12 - 4.0
11 |    - MongoDB 5.1+      | pymongo 4.0+
12 |    - DocumentDB        | pymongo 3.10 - 4.0
13 | 
14 | ## Using the MongoDB Changestream Review Tool
15 | `python3 mongo-changestream-review.py --server-alias <server-alias> --uri <mongodb-uri> --stop-when-changestream-current --start-position 2025-04-02+12:00:00`
16 | 
17 | - Run on any instance in the replicaset (the larger the changestream the better)
18 | - Use a different \<server-alias> for each execution
19 | - If sharded, run on one instance in each shard
20 | - Avoid running the tool from the server itself if possible, it consume disk space for the output files
21 | - Each execution creates a file starting with \<server-alias> and ending with .log
22 | - The \<mongodb-uri> options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/ 
23 | - Consider adding "&compressor=snappy" to your \<mongodb-uri> if your MongoDB server supports it
24 | 
25 | ## License
26 | This tool is licensed under the Apache 2.0 License. 
27 | 


--------------------------------------------------------------------------------
/migration/mongodb-oplog-review/README.md:
--------------------------------------------------------------------------------
 1 | # MongoDB Oplog Review Tool
 2 | 
 3 | The mongodb oplog review tool connects to any instance in a MongoDB replicaset (primary or secondary), reads the entire oplog, and produces a log file containing counters for insert/update/delete operations by collection. 
 4 | 
 5 | # Requirements
 6 |  - Python 3.7+
 7 |    - If using Snappy wire protocol compression and MongoDB, "apt install python-snappy"
 8 |  - PyMongo
 9 |    - MongoDB 2.6 - 3.4 | pymongo 3.10 - 3.12
10 |    - MongoDB 3.6 - 5.0 | pymongo 3.12 - 4.0
11 |    - MongoDB 5.1+      | pymongo 4.0+
12 |    - DocumentDB        | pymongo 3.10 - 4.0
13 | 
14 | ## Using the MongoDB Oplog Review Tool
15 | `python3 mongo-oplog-review.py --server-alias <server-alias> --uri <mongodb-uri> --stop-when-oplog-current`
16 | Optionally add arguments `--output-to-csv --file-name <name of outputfile>` if you want to save the output to a csv file
17 | 
18 | - Run on any instance in the replicaset (the larger the oplog the better)
19 | - Use a different \<server-alias> for each execution
20 | - If sharded, run on one instance in each shard
21 | - Avoid running the tool from the server itself if possible, it consume disk space for the output files
22 | - Each execution creates a file starting with \<server-alias> and ending with .log
23 | - The \<mongodb-uri> options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/ 
24 | - Consider adding "&compressor=snappy" to your \<mongodb-uri> if your MongoDB server supports it
25 | 
26 | ## License
27 | This tool is licensed under the Apache 2.0 License. 
28 | 


--------------------------------------------------------------------------------
/migration/mongodb-ops/README.md:
--------------------------------------------------------------------------------
 1 | # MongoDB Ops Tool
 2 | 
 3 | The MongoDB Ops tool gathers collection level query/insert/update/delete counters to assist in the process of sizing. 
 4 | 
 5 | # Requirements
 6 |  - Python 3.7+
 7 |  - PyMongo
 8 |    - MongoDB 2.6 - 3.4 | pymongo 3.10 - 3.12
 9 |    - MongoDB 3.6 - 5.0 | pymongo 3.12 - 4.0
10 |    - MongoDB 5.1+      | pymongo 4.0+
11 |    - DocumentDB        | pymongo 3.10 - 4.0
12 | 
13 | ## Using the MongoDB Ops Tool
14 | `python3 mongodb-ops.py --uri <mongodb-uri> --server-alias <server-alias> --collect`
15 | - Produces an output file for comparison
16 | 
17 | `python3 mongodb-ops.py --compare --file1 <first-compare-file> --file2 <second-compare-file>`
18 | - Compares the results of two executions to estimate the number of queries, inserts, updates, and deletes per second at the collection level.
19 | 
20 | ## Notes
21 | - Run on any instance in the replicaset (the larger the oplog the better)
22 | - If sharded, run on one instance in each shard
23 | - Each execution creates a file starting with \<server-alias> and ending with .json
24 | - The \<mongodb-uri> options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/
25 | - Use &directConnection=true
26 | 
27 | ## License
28 | This tool is licensed under the Apache 2.0 License. 
29 | 


--------------------------------------------------------------------------------
/migration/mvu-tool/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB MVU CDC Migrator Tool
 2 | 
 3 | The purpose of mvu cdc migrator tool is to migrate the cluster wide changes from source Amazon DocumentDB Cluster to target  Amazon DocumentDB Cluster.
 4 | 
 5 | It enables to perform near zero downtime Major Version Upgrade(MVU) from Amazon DocumentDB 3.6  to Amazon DocumentDB 5.0.
 6 | 
 7 | This tool is only recommended for performing MVU from Amazon DocumentDB 3.6. If you are performing MVU from Amazon DocumentDB 4.0 to 5.0, we recommend using the AWS Database Migration Service CDC approach.
 8 | 
 9 | ## Prerequisites:
10 | 
11 |  - Python 3
12 |  - Modules: pymongo
13 | ```
14 |   pip3 install pymongo
15 | ```
16 | ## How to use
17 | 
18 | 1. Clone the repository and go to the tool folder:
19 | ```
20 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
21 | cd amazon-documentdb-tools/mvu-tool/
22 | ```
23 | 
24 | 2. Run the mvu-cdc-migrator.py tool to capature the cluster wide change stream token and migrate the changes. It accepts the following arguments:
25 | ```
26 | python3 mvu-cdc-migrator.py  --help
27 | usage: mvu-cdc-migrator.py [-h] [--skip-python-version-check] --source-uri SOURCE_URI [--target-uri TARGET_URI]
28 |                            [--source-database SOURCE_DATABASE] 
29 |                            [--duration-seconds DURATION_SECONDS]
30 |                            [--feedback-seconds FEEDBACK_SECONDS] [--threads THREADS]
31 |                            [--max-seconds-between-batches MAX_SECONDS_BETWEEN_BATCHES]
32 |                            [--max-operations-per-batch MAX_OPERATIONS_PER_BATCH]    
33 |                            [--dry-run] --start-position START_POSITION
34 |                            [--verbose] [--get-resume-token]
35 | 
36 | MVU CDC Migrator Tool.
37 | 
38 | options:
39 |   -h, --help            show this help message and exit
40 |   --skip-python-version-check
41 |                         Permit execution on Python 3.6 and prior
42 |   --source-uri SOURCE_URI
43 |                         Source URI
44 |   --target-uri TARGET_URI
45 |                         Target URI you can skip if you run with get-resume-token
46 |   --source-database SOURCE_DATABASE
47 |                         Source database name if you skip it will replicate all the databases
48 |   --duration-seconds DURATION_SECONDS
49 |                         Number of seconds to run before exiting, 0 = run forever
50 |   --feedback-seconds FEEDBACK_SECONDS
51 |                         Number of seconds between feedback output
52 |   --threads THREADS     Number of threads (parallel processing)
53 |   --max-seconds-between-batches MAX_SECONDS_BETWEEN_BATCHES
54 |                         Maximum number of seconds to await full batch
55 |   --max-operations-per-batch MAX_OPERATIONS_PER_BATCH
56 |                         Maximum number of operations to include in a single batch
57 |   --dry-run             Read source changes only, do not apply to target
58 |   --start-position START_POSITION
59 |                         Starting position - 0 to get change stream resume token, or change stream resume token
60 |   --verbose             Enable verbose logging
61 |   --get-resume-token    Display the current change stream resume token
62 | ```
63 | ## Example usage:
64 | 
65 | * To get the cluster wide change stream token 
66 | ```
67 | python3 mvu-cdc-migrator.py --source-uri <source-cluster-uri> -- start-position 0 --verbose --get-resume-token
68 | ```
69 | * To Migrate the CDC changes during MVU
70 | ```
71 | python3 migrate-cdc-cluster.py --source-uri <source-cluster-uri> -- target-uri <target-cluster-uri> --start-position <change stream token> --verbose
72 | ```
73 | 


--------------------------------------------------------------------------------
/monitoring/README.md:
--------------------------------------------------------------------------------
1 | # Amazon DocumentDB Monitoring Tools
2 | 
3 | * [docdb-dashboarder](./docdb-dashboarder) - create a "starter" dashboard for a DocumentDB cluster.
4 | * [docdb-stat](./docdb-stat) - display high level DocumentDB instance statistics.
5 | * [documentdb-top](./documentdb-top) - display detailed DocumentDB collection level statistics.
6 | * [gc-watchdog](./gc-watchdog) - track garbage collection activity to a file or CloudWatch metrics
7 | 


--------------------------------------------------------------------------------
/monitoring/custom-metrics/README.md:
--------------------------------------------------------------------------------
 1 | # Custom Metrics Tool
 2 | There are Amazon DocumentDB cluster limits that are not currently exposed as Amazon CloudWatch metrics. The **custom-metrics** tool connects to an Amazon DocumentDB cluster, collects the specified metrics, and publishes them as custom CloudWatch metrics. The following metrics can be collected by the **custom-metrics** tool:
 3 | 
 4 | 1. collection count (per cluster)
 5 | 2. collection size (per collection)
 6 | 3. database count (per cluster)
 7 | 4. index count (per collection)
 8 | 5. index size (per index)
 9 | 6. user count (per cluster)
10 | 
11 | CloudWatch metrics will be published to the following dimensions in the **CustomDocDB** namespace:
12 | 
13 | 1. **Cluster, Collection, Database, Index** - index size
14 | 2. **Cluster, Collection, Database** - collection size and index count
15 | 3. **Database** - collection count, database count, and user count
16 | 
17 | 
18 | 
19 | ------------------------------------------------------------------------------------------------------------------------
20 | ## Requirements 
21 | 
22 | Python 3.x with modules: 
23 | 
24 | * boto3 - AWS SDK that allows management of AWS resources through Python
25 | * pymongo - MongoDB driver for Python applications
26 | 
27 | ```
28 | pip install boto3
29 | pip install pymongo
30 | ```
31 | 
32 | Download the Amazon DocumentDB Certificate Authority (CA) certificate required to authenticate to your cluster:
33 | ```
34 | wget https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem
35 | ```
36 | 
37 | ------------------------------------------------------------------------------------------------------------------------
38 | ## Usage
39 | 
40 | The tool accepts the following arguments:
41 | 
42 | ```
43 | python3 custom-metrics.py --help
44 | usage: custom-metrics.py [-h] [--skip-python-version-check] --cluster_name
45 |                          CLUSTER_NAME --uri URI --namespaces NAMESPACES
46 |                          [--collection_count] [--database_count]
47 |                          [--user_count] [--collection_size] [--index_count]
48 |                          [--index_size]
49 | 
50 | optional arguments:
51 |   -h, --help            show this help message and exit
52 |   --skip-python-version-check
53 |                         Permit execution on Python 3.6 and prior
54 |   --cluster_name CLUSTER_NAME
55 |                         Name of cluster for Amazon CloudWatch custom metric
56 |   --uri URI             Amazon DocumentDB Connection URI
57 |   --namespaces NAMESPACES
58 |                         comma separated list of namespaces to monitor
59 |   --collection_count    log cluster collection count
60 |   --database_count      log cluster database count
61 |   --user_count          log cluster user count
62 |   --collection_size     log collection size
63 |   --index_count         log collection index count
64 |   --index_size          log collection index size
65 | ```
66 | 
67 | Examples of ```namespaces``` parameter:
68 | 
69 | 1. Specific namespace: ```"<database>.<collection>"```
70 | 2. All collections in specific database: ```"<database>.*"```
71 | 3. Specific collection in any database: ```"*.<collection>"```
72 | 4. All namespaces: ```"*.*"```
73 | 5. Multiple namespaces: ```"<database>.*, *.<collection>, <database>.<collection>"```
74 | 
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/monitoring/docdb-dashboarder/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | 


--------------------------------------------------------------------------------
/monitoring/docdb-dashboarder/README.md:
--------------------------------------------------------------------------------
 1 | # DocumentDB Dashboarder Tool
 2 | DocumentDB Dashboarder creates a CloudWatch monitoring dashboard for a DocumentDB cluster.
 3 | 
 4 | ------------------------------------------------------------------------------------------------------------------------
 5 | ## Package Requirements 
 6 | 
 7 | * boto3 - AWS SDK that allows management of aws resources through python
 8 | * awscli - Command line tools that allow access public APIs to manage AWS services
 9 | 
10 | ------------------------------------------------------------------------------------------------------------------------
11 | ## Installing Packages
12 | 
13 | 1. In your terminal, install the boto3, awscli, and argparse in your terminal
14 | ```
15 | pip install boto3
16 | pip install awscli
17 | ```
18 | ------------------------------------------------------------------------------------------------------------------------
19 | ## IAM User Creation and Setup
20 | 
21 | **Note: If you already have an existing IAM user for DocDB, associate the roles in step 4 and can move on to the next
22 | section "Configure your AWS Credentials"**
23 | 
24 | 1. Open IAM Service in your AWS Management Console
25 | 2. Select the "Users" tab using the toolbar on the left side of your screen
26 | 3. Create a new user and under "Select AWS Access Type" choose "Access Key - Programmatic Access" and click next. Be sure to save this access key for later on.
27 | 4. Associate the following permissions for your IAM User - CloudWatchFullAccess, AmazonDocDBReadOnlyAccess
28 | 5. Complete the user creation and save the csv file with your access key and secret access key in a safe place
29 | 
30 | _Congratulations you have successfully set up your IAM User to interact with CloudWatch and DocumentDB!_
31 | 
32 | ------------------------------------------------------------------------------------------------------------------------
33 | ## Configure Your AWS Credentials
34 | 
35 | 1. In your terminal use the following command: 
36 | ```
37 | aws configure
38 | ```
39 | 2. You will be prompted to fill out four categories:
40 | 
41 | **Note: Access key can be found in IAM -> Users -> User Name -> Security Credentials -> Access Keys**
42 | ```
43 | AWS Access Key: <IAM User access key> 
44 | AWS Secret Access Key: <IAM Secret Access Key> 
45 | Default region: <Region of your DocumentDB Cluster>
46 | Default output format: <json>
47 | ```
48 | 3. To view your credentials, use the following command in your terminal: 
49 | ```
50 | cat ~/.aws/credentials
51 | ```
52 | 4. To test your aws credentials by returning your account information use the following command in your terminal:
53 | ```
54 | aws sts get-caller-identity | tee 
55 | ```
56 | 
57 | _Congratulations you have successfully configured your AWS Credentials!_
58 | 
59 | ------------------------------------------------------------------------------------------------------------------------
60 | ## How to Run
61 | 
62 | Note: If you want to add additional instances to your cluster in the future, you must run the script again.
63 | 
64 | ### To Run in IDE
65 | In your IDE click edit your configurations and set your parameters as follows, then run in your IDE: 
66 | ```
67 | --name <your dashboard name> --region <your region> --clusterID <DocDB clusterID>
68 | ```
69 | ### To Run in Terminal
70 | Open your terminal and run the following commands: 
71 | ```
72 | cd <location path of python-script>
73 | ```
74 | ```
75 | python create-docdb-dashboard.py --name <your dashboard name> --region <your region> --clusterID <DocDB clusterID>
76 | 
77 | optionally include --include-nvme to add NVMe metrics to the dashboard
78 | ```
79 | ### Only include one of the below metrics. If migrating using migrator script, include --monitor-migration. If migrating using DMS, include --monitor-dms following by --dms-task-id
80 | ```
81 | optionally include --monitor-migration to add full load / cdc metrics from migrator tool to the dashboard
82 | 
83 | optionally include --monitor-dms --dms-task-id (dms task identifier) to monitor DMS task during migration
84 | ```
85 | 


--------------------------------------------------------------------------------
/monitoring/docdb-stat/README.md:
--------------------------------------------------------------------------------
 1 | # Real-time Amazon DocumentDB server stats monitoring tool. 
 2 | 
 3 | The **docdbstat** tool connects to a compute instance and continuously fetches real-time metrics by polling `db.serverStatus()` at a configurable interval (defaults to 1 sec).
 4 | 
 5 | 
 6 | ## Requirements
 7 | 
 8 | - Python 3.x with modules:
 9 |   - Pymongo
10 |   - Pandas
11 | ```
12 | pip3 install pymongo pandas
13 | ```
14 | 
15 | - Download the Amazon DocumentDB Certificate Authority (CA) certificate required to authenticate to your instance
16 | ```
17 | wget https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem
18 | ```
19 | 
20 | ## Usage
21 | The tools accepts the following arguments:
22 | 
23 | ```
24 | # python3 docdbstat.py --help
25 | usage: docdbstat.py [-h] --uri URI [-i INTERVAL] [-hi HEADER_INTERVAL] [-f FIELD]
26 | 
27 | Real-time Amazon DocumentDB server stats monitoring tool.
28 | 
29 | options:
30 |   -h, --help            show this help message and exit
31 |   --uri URI             DocumentDB connection URI.
32 |   -i INTERVAL, --interval INTERVAL
33 |                         Polling interval in seconds (Default: 1s).
34 |   -hi HEADER_INTERVAL, --header-interval HEADER_INTERVAL
35 |                         Interval to display the header in iterations (Default: 10).
36 |   -f FIELD, --field FIELD
37 |                         Comma-separated fields to display in the output.
38 | ```
39 | 
40 | ## Example
41 | 
42 | Get stats every 5 seconds:
43 | 
44 | ```
45 | python3 docdbstat.py --uri "mongodb://<user>:<pass>@<docdb-instance-endpoint>:27017/?tls=true&tlsCAFile=global-bundle.pem&retryWrites=false" -i 5
46 | ```
47 | 
48 | Get specific stats, for example to ouput just write operations:
49 | 
50 | ```
51 | python3 docdbstat.py --uri "mongodb://<user>:<pass>@<docdb-instance-endpoint>:27017/?tls=true&tlsCAFile=global-bundle.pem&retryWrites=false" -f inserts,updates,deletes
52 | ```
53 | 


--------------------------------------------------------------------------------
/monitoring/documentdb-top/README.md:
--------------------------------------------------------------------------------
 1 | # Real-time Amazon DocumentDB collection level monitoring tool. 
 2 | 
 3 | The **documentdb-top** tool connects to a DocumentDB instance and continuously fetches real-time collection level metrics by polling `db.<collection>.stats()` at a configurable interval (defaults 60 seconds).
 4 | 
 5 | 
 6 | ## Requirements
 7 | 
 8 | - Python 3.x with modules:
 9 |   - Pymongo
10 | ```
11 | pip3 install pymongo
12 | ```
13 | 
14 | - Download the Amazon DocumentDB Certificate Authority (CA) certificate required to authenticate to your instance
15 | ```
16 | wget https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem
17 | ```
18 | 
19 | ## Usage
20 | The tools accepts the following arguments:
21 | 
22 | ```
23 | # python3 documentdb-top.py --help
24 | 
25 | usage: documentdb-top.py [-h] --uri URI --database DATABASE [--update-frequency-seconds UPDATE_FREQUENCY_SECONDS] [--must-crud] --log-file-name LOG_FILE_NAME [--skip-python-version-check] [--show-per-second]
26 | 
27 | DocumentDB Top
28 | 
29 | optional arguments:
30 |   -h, --help                                             show this help message and exit
31 |   --uri URI                                              URI
32 |   --database DATABASE                                    Database name
33 |   --update-frequency-seconds UPDATE_FREQUENCY_SECONDS    Number of seconds before update
34 |   --must-crud                                            Only display when insert/update/delete occurred
35 |   --log-file-name LOG_FILE_NAME                          Log file name
36 |   --skip-python-version-check                            Permit execution on Python 3.6 and prior
37 |   --show-per-second                                      Show operations as "per second"
38 | ```
39 | 
40 | ## Example
41 | 
42 | Get collection stats every 15 seconds, only if insert/update/delete has occurred:
43 | 
44 | ```
45 | python3 documentdb-top.py --uri "mongodb://<user>:<pass>@<docdb-instance-endpoint>:27017/?tls=true&tlsCAFile=global-bundle.pem&retryWrites=false&directConnection=true" --database db1 --update-frequency-seconds 15 --log-file-name my-log-file.log --must-crud
46 | ```
47 | 
48 | 


--------------------------------------------------------------------------------
/monitoring/gc-watchdog/.gitignore:
--------------------------------------------------------------------------------
1 | doit-gc.bash
2 | clean.bash
3 | 


--------------------------------------------------------------------------------
/monitoring/gc-watchdog/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Garbage Collection Watchdog
 2 | This tool monitors a DocumentDB cluster for garbage collection activity. It displays the start and end of each garbage collection to a log file and can optionally create 3 CloudWatch metrics for monitoring and alerting purposes.
 3 | 
 4 | ## Installation
 5 | Clone the repository
 6 | 
 7 | ## Requirements
 8 | * Python 3.7+
 9 | * PyMongo, boto3
10 |   * IAM permission "cloudwatch:PutMetricData" is required to create CloudWatch metrics 
11 | 
12 | ## Using the garbage collection watchdog
13 | ```
14 | python3 gc-watchdog.py --uri <uri> --log-file-name <log-file-name> [--create-cloudwatch-metrics] [--cluster-name <cluster-name>]
15 | ```
16 | 
17 | * \<uri> follows the [MongoDB Connection String URI Format](https://www.mongodb.com/docs/manual/reference/connection-string/)
18 | * \<log-file-name> is the name of the log file created by the tool 
19 | * include --create-cloudwatch-metrics to create metrics for the number of ongoing garbage collections, maximum time of an ongoing garbage collection in seconds, and total time of all ongoing garbage collections in seconds
20 |   * CloudWatch metrics are captured in namespace "CustomDocDB" as "GCCount", "GCTotalSeconds", and "GCMaxSeconds"
21 | * include --cluster-name \<cluster-name> if capturing CloudWatch metrics via --create-cloudwatch-metrics
22 | * NOTE - The default frequency to check for garbage collection activity is every 5 seconds. Garbage collections requiring less than 5 seconds might not be recorded by this tool. 
23 | 


--------------------------------------------------------------------------------
/operations/README.md:
--------------------------------------------------------------------------------
1 | # Amazon DocumentDB Operational Tools
2 | 
3 | * [index-creator](./index-creator) - create indexes and monitor the progress
4 | 


--------------------------------------------------------------------------------
/operations/document-compression-updater/.gitignore:
--------------------------------------------------------------------------------
1 | doit*.bash
2 | 


--------------------------------------------------------------------------------
/operations/document-compression-updater/README.md:
--------------------------------------------------------------------------------
 1 | # Python Updater tool 
 2 | This sample applications compresses pre-existing documents in an existing collection after compression is turned on that existing collection.
 3 | 
 4 | Single threaded application - issues **5000** (controlled by argument --batch-size) updates serially in a _round_, and sleeps for **60** (controlled by argument --wait-period) seconds before starting next _round_.
 5 | 
 6 | Status of the updates are maintained in database **tracker_db** - for each collection there is a tracker collection named **<< collection >>__tracker_col**.
 7 | 
 8 | The application can be restarted if it crashes and it will pick up from last successful _round_ based on data in **<< collection >>__tracker_col**.
 9 | 
10 | The update statements use field **6nh63** (controlled by argument --update-field), for triggering compression on existing records.
11 | 
12 | The application uses **_id** field for tracking and updating existing documents. If you are using a custom value _id, the value should be sort-able.
13 | 
14 | ## Requirements
15 | Python 3.7 or later, pymongo
16 | 
17 | ## Installation
18 | Clone the repository and go to the application folder:
19 | ```
20 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
21 | cd amazon-documentdb-tools/operations/document-compression-updater
22 | ```
23 | 
24 | ## Usage/Examples
25 | 
26 | ```
27 |  python3 update_apply_compression.py --uri "<<documentdb_uri>>"  --database <<database>>   --collection <<collection>> --update-field << field_name >> --wait-period << int >>> --batch-size << int >>
28 | ```
29 | 
30 | The application has the following arguments:
31 | 
32 | ```
33 | Required parameters
34 |   --uri URI                                      URI (connection string)
35 |   --database DATABASE                            Database
36 |   --collection COLLECTION                        Collection
37 | 
38 | Optional parameters
39 |  --file-name                                    Starting name of the created log files
40 |  --update-field                                 Field used for updating an existing document. This should not conflict with any fieldname you are already using
41 |  --wait-period                                  Number of seconds to wait between each batch
42 |  --batch-size                                   Number of documents to update in a single batch
43 | ```
44 | 


--------------------------------------------------------------------------------
/operations/document-compression-updater/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo
2 | 
3 | 


--------------------------------------------------------------------------------
/operations/index-creator/.gitignore:
--------------------------------------------------------------------------------
1 | *.bash
2 | 


--------------------------------------------------------------------------------
/operations/index-creator/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Index Creator
 2 | 
 3 | Index Creator enables the creation of indexes while viewing the status and progress from the command line.
 4 | 
 5 | ## Features
 6 | - Create single key and compound indexes from the command line, including multi-key indexes
 7 | - **NOTE** - does not currently support creation of partial, geospatial, text, or vector indexes
 8 | - During index creation the status of the index creation process as well as estimated time to complete the current stage is displayed
 9 | 
10 | ## Requirements
11 | Python 3.7 or greater, Pymongo.
12 | 
13 | ## Usage/Examples
14 | Index Creator accepts the following arguments:
15 | 
16 | ```
17 | --uri URI                             URI to connect to Amazon DocumentDB (required)
18 | --workers WORKERS                     Number of worker processes for heap scan stage of index creation (required)
19 | --database DATABASE                   Database containing collection for index creation (required)
20 | --collection COLLECTION               Collection to create index (required)
21 | --index-name INDEX_NAME               Name of index to create (required)
22 | --index-keys INDEX_KEYS               Comma separated list of index key(s), append :-1 after key for descending (required)
23 | --unique                              Create unique index
24 | --foreground                          Create index in the foreground (must provide this or --background)
25 | --background                          Create index in the background (must provide this or --foreground)
26 | --drop-index                          Drop index (if exists)
27 | --update-frequency-seconds SECONDS    Number of seconds between progress updates (default 15)
28 | --log-file-name LOG_FILE_NAME         Name of file for output logging (default index-creator.log)
29 | ```
30 | 
31 | ### Create a compound index with 4 workers on testdb.testcoll on fields f1 and f2
32 | ```
33 | python3 index-creator.py --uri $DOCDB_URI --workers 4 --database testdb --collection testcoll --index-name test_idx --index-keys f1,f2 --background
34 | ```
35 | 
36 | ## License
37 | [Apache 2.0](http://www.apache.org/licenses/LICENSE-2.0)
38 | 


--------------------------------------------------------------------------------
/operations/large-doc-finder/README.md:
--------------------------------------------------------------------------------
 1 | # Large Document Finder for DocumentDB
 2 | 
 3 | This tool scans an Amazon DocumentDB collection to identify documents that exceed a specified size threshold. It processes documents in parallel using multiple threads and outputs results exceeding the threshold to a CSV file.
 4 | 
 5 | # Requirements
 6 |  - Python 3.9+
 7 |  - pymongo Python package - tested versions
 8 |    - DocumentDB        | pymongo 4.10.1
 9 |    - If not installed - "$ pip3 install pymongo"
10 | 
11 | ## Example usage:
12 | Basic usage:
13 | 
14 |     python large-docs.py --uri "mongodb://..." \
15 |                         --processes 8 \
16 |                         --batch-size 1000 \
17 |                         --database mydb \
18 |                         --collection mycollection \
19 |                         --csv "mydb_mycollection_" \
20 |                         --large-doc-size 10485760
21 | 
22 | ## Parameters:
23 | `--uri` : str
24 | - Required
25 | - DocumentDB connection string
26 | - Example: `mongodb://user:password@name.cluster.region.docdb.amazonaws.com:27017/?tls=true&tlsCAFile=global-bundle.pem&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false`
27 | 
28 | `--processes` : int
29 | - Required
30 | - Number of parallel threads to use
31 | - Example: 8
32 | 
33 | `--batch-size` : int
34 | - Required
35 | - Number of documents to process in each batch
36 | - Example: 1000
37 | 
38 | `--database` : str
39 | - Required
40 | - Name of the database to scan
41 | - Example: `mydb`
42 | 
43 | `--collection` : str
44 | - Required
45 | - Name of the collection to scan
46 | - Example: `mycollection`
47 | 
48 | `--csv` : str
49 | - Prefix for the CSV output filename 
50 | - Default: `large_doc_`
51 | - Example: `large_docs_prod`
52 | 
53 | `--large-doc-size` : int
54 | - Size threshold in bytes 
55 | - Default: 8388608 (8MB)
56 | - Example: 10485760 (10MB)
57 | 
58 | ## Example output:
59 | ----------------
60 | The output CSV contains:
61 | - Scan details (database, collection, threshold, etc.)
62 | - Document details (ID, size in bytes, size in MB)
63 | 
64 |     ```Database,mydb
65 |     Collection,mycollection
66 |     Batch size,50000
67 |     Number of threads,4
68 |     Total documents,3156003
69 |     Large document threshold (bytes),8388608
70 |     Large document threshold (MB),8.00
71 |     Scan Start Time,2025-03-02T22:17:04.761870
72 |     Scan completion time,2025-03-02T22:17:36.291172
73 |     Total scan time,00:00:31
74 |     Large documents found,3
75 | 
76 |     Document _id,Size (bytes),Size (MB)
77 |     65e8f2a1b3e8d97531abcdef,9437247,9.00
78 |     65e8f2a2b3e8d97531abcd01,9437247,9.00
79 |     65e8f2a3b3e8d97531abcd02,9437247,9.00
80 | 
81 | ## Performance Considerations:
82 | 1. Thread count: Start with 2x CPU cores, adjust based on monitoring
83 | 2. Batch size: Larger batches = more memory but fewer DB round trips
84 | 3. Run during off-peak hours and monitor cluster performance metrics
85 | 4. Use `secondaryPreferred` read preference
86 | 
87 | ## License
88 | This tool is licensed under the Apache 2.0 License. 


--------------------------------------------------------------------------------
/operations/large-doc-finder/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo


--------------------------------------------------------------------------------
/operations/server-certificate-check/.gitignore:
--------------------------------------------------------------------------------
1 | *.log
2 | 


--------------------------------------------------------------------------------
/operations/server-certificate-check/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Server Certificate Check
 2 | The server certificate check returns a list of all instances in a region including the expiration of the servers certificate and maintenance window.
 3 | 
 4 | ## Features
 5 | - Output may be filtered using case-insensitive matching on cluster name and/or instance name.
 6 | 
 7 | ## Requirements
 8 | - Python 3.7 or greater, boto3, urllib3
 9 | - IAM privileges in https://github.com/awslabs/amazon-documentdb-tools/blob/master/operations/server-certificate-check/iam-policy.json
10 | 
11 | ## Installation
12 | Clone the repository and install the requirements:
13 | 
14 | ```
15 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
16 | cd amazon-documentdb-tools/performance/server-certificate-check
17 | python3 -m pip install -r requirements.txt
18 | ```
19 | 
20 | ## Usage/Examples
21 | The utility accepts the following arguments:
22 | 
23 | ```
24 | --region                     AWS region for scan
25 | --log-file-name              Name of log file to capture all output
26 | --cluster-filter             [optional] Case-insensitive string to use for filtering clusters to include in output
27 | --instance-filter            [optional] Case-insensitive string to use for filtering instances to include in output 
28 | 
29 | ```
30 | 
31 | ### Report all Amazon DocumentDB instances in us-east-1
32 | ```
33 | python3 server-certificate-check.py --log-file-name certs.log --region us-east-1
34 | ```
35 | 
36 | ### Report all Amazon DocumentDB instances in us-east-1 containing "ddb5" in instance name
37 | ```
38 | python3 server-certificate-check.py --log-file-name certs.log --region us-east-1 --instance-filter ddb5
39 | ```
40 | 
41 | ## License
42 | [Apache 2.0](http://www.apache.org/licenses/LICENSE-2.0)
43 | 
44 | ## Contributing
45 | Contributions are always welcome! See the [contributing](https://github.com/awslabs/amazon-documentdb-tools/blob/master/CONTRIBUTING.md) page for ways to get involved.
46 | 


--------------------------------------------------------------------------------
/operations/server-certificate-check/iam-policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "rds:DescribeDBClusters",
 8 |                 "rds:DescribeDBInstances"
 9 |             ],
10 |             "Resource": [
11 |                 "arn:aws:rds:{region}:{account}:*:*"
12 |             ]
13 |         }
14 |     ]
15 | }
16 | 


--------------------------------------------------------------------------------
/operations/server-certificate-check/requirements.txt:
--------------------------------------------------------------------------------
1 | botocore>=1.33.6
2 | boto3>=1.33.6
3 | requests
4 | urllib3<2
5 | 


--------------------------------------------------------------------------------
/operations/server-certificate-check/server-certificate-check.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 |  
 3 | import boto3
 4 | import datetime
 5 | import argparse
 6 | import requests
 7 | import json
 8 | import sys
 9 | import os
10 | 
11 | 
12 | def deleteLog(appConfig):
13 |     if os.path.exists(appConfig['logFileName']):
14 |         os.remove(appConfig['logFileName'])
15 | 
16 | 
17 | def printLog(thisMessage,appConfig):
18 |     print("{}".format(thisMessage))
19 |     with open(appConfig['logFileName'], 'a') as fp:
20 |         fp.write("{}\n".format(thisMessage))
21 | 
22 | 
23 | def scan_clusters(appConfig):
24 |     client = boto3.client('docdb',region_name=appConfig['region'])
25 |     
26 |     response = client.describe_db_clusters(Filters=[{'Name': 'engine','Values': ['docdb']}])
27 | 
28 |     printLog("{:<30} | {:<30} | {:<25} | {:<20}".format("cluster-name","instance-name","server-cert-expire","server-maint-window"),appConfig)
29 |     
30 |     for thisCluster in response['DBClusters']:
31 |         thisClusterName = thisCluster['DBClusterIdentifier']
32 |         if appConfig['clusterFilter'] is None or appConfig['clusterFilter'].upper() in thisClusterName.upper():
33 |             for thisInstance in thisCluster['DBClusterMembers']:
34 |                 thisInstanceName = thisInstance['DBInstanceIdentifier']
35 |                 if appConfig['instanceFilter'] is None or appConfig['instanceFilter'].upper() in thisInstanceName.upper():
36 |                     responseInstance = client.describe_db_instances(DBInstanceIdentifier=thisInstanceName)
37 |                     validTill = responseInstance['DBInstances'][0]['CertificateDetails']['ValidTill']
38 |                     preferredMaintenanceWindow = responseInstance['DBInstances'][0]['PreferredMaintenanceWindow']
39 |                     printLog("{:<30} | {:<30} | {} | {:<20}".format(thisClusterName,thisInstanceName,validTill,preferredMaintenanceWindow),appConfig)
40 |     
41 |     client.close()
42 |     
43 | 
44 | def main():
45 |     parser = argparse.ArgumentParser(description='DocumentDB Server Certificate Checker')
46 | 
47 |     parser.add_argument('--region',required=True,type=str,help='AWS Region')
48 |     parser.add_argument('--cluster-filter',required=False,type=str,help='Cluster name filter (substring match)')
49 |     parser.add_argument('--instance-filter',required=False,type=str,help='Instance name filter (substring match)')
50 |     parser.add_argument('--log-file-name',required=True,type=str,help='Log file name')
51 |                         
52 |     args = parser.parse_args()
53 |    
54 |     appConfig = {}
55 |     appConfig['region'] = args.region
56 |     appConfig['logFileName'] = args.log_file_name
57 |     appConfig['clusterFilter'] = args.cluster_filter
58 |     appConfig['instanceFilter'] = args.instance_filter
59 | 
60 |     deleteLog(appConfig)
61 |     scan_clusters(appConfig)
62 |     
63 |     print("")
64 |     print("Created {} with results".format(appConfig['logFileName']))
65 |     print("")
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/performance/README.md:
--------------------------------------------------------------------------------
1 | # Amazon DocumentDB Performance Tools
2 | 
3 | * [compression-review](./compression-review) - calculate compressibility of each collection by samping random documents.
4 | * [deployment-scanner](./deployment-scanner) - scan all clusters in one account/region and provide potential cost savings suggestions.
5 | * [index-cardinality-detection](./index-cardinality-detection) - sample random documents to estimate index cardinality and selectivity.
6 | * [index-review](./index-review) - provide analysis of all collections and indexes including sizing, index usage, and redundant indexes.
7 | * [metric-collector](./metric-collector) - collect and summarize major cluster and instance level metrics in a single view.
8 | * [metric-analyzer](./metric-analyzer) - process the output of metric-collector for size, cost, and performance recommendations.


--------------------------------------------------------------------------------
/performance/compression-review/.gitignore:
--------------------------------------------------------------------------------
1 | doit*.bash
2 | 


--------------------------------------------------------------------------------
/performance/compression-review/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Compression Review Tool
 2 | 
 3 | The compression review tool samples 1000 documents in each collection to determine the average compressibility of the data. A larger number of documents can be sampled via the --sample-size parameter. 
 4 | 
 5 | # Requirements
 6 |  - Python 3.7+
 7 |  - pymongo Python package - tested versions
 8 |    - MongoDB 2.6 - 3.4 | pymongo 3.10 - 3.12
 9 |    - MongoDB 3.6 - 5.0 | pymongo 3.12 - 4.0
10 |    - MongoDB 5.1+      | pymongo 4.0+
11 |    - DocumentDB        | pymongo 3.10+
12 |    - If not installed - "$ pip3 install pymongo"
13 |  - lz4 Python package
14 |    - If not installed - "$ pip3 install lz4"
15 |  - zstandard Python package
16 |    - If not installed - "$ pip3 install zstandard"
17 | 
18 | ## Using the Compression Review Tool
19 | `python3 compression-review.py --uri <server-uri> --server-alias <server-alias>`
20 | 
21 | - Default compression tested is lz4/fast/level 1
22 | - To test other compression techniques provide --compressor \<compression-type> with one of the following for \<compression-type>
23 | 
24 | | compression | description |
25 | | ----------- | ----------- |
26 | | lz4-fast | lz4/fast/level 1 |
27 | | lz4-fast-dict | lz4/fast/level 1/dictionary-provided (trained by sampling documents) |
28 | | lz4-high | lz4/high/level 1 |
29 | | lz4-high-dict | lz4/high/level 1/dictionary-provided (trained by sampling documents) |
30 | | zstd-1 | zstandard/level 1 |
31 | | zstd-1-dict | zstandard/level 1/dictionary-provided (trained by sampling documents) |
32 | | zstd-5 | zstandard/level 5 |
33 | | zstd-5-dict | zstandard/level 5/dictionary-provided (trained by sampling documents) |
34 | | bz2-1 | bzip/level 1 |
35 | | lzma-0 | lzma/level 0 |
36 | | zlib-1 | zlib/level 1 |
37 | 
38 | - Run on any instance in the replica set
39 | - Use a different \<server-alias> for each server analyzed, output file is named using \<server-alias> as the starting portion
40 | - Creates a single CSV file per execution
41 | - The \<server-uri> options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/ 
42 |   - If your URI contains ampersand (&) characters they must be escaped with the backslash or enclosed your URI in double quotes
43 | - For DocumentDB use either the cluster endpoint or any of the instance endpoints
44 | 
45 | ## License
46 | This tool is licensed under the Apache 2.0 License. 
47 | 


--------------------------------------------------------------------------------
/performance/compression-review/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo
2 | lz4
3 | zstandard
4 | 


--------------------------------------------------------------------------------
/performance/deployment-scanner/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Deployment Scanner
 2 | The deployment scanner reviews DocumentDB clusters for possible cost optimization and utilization.
 3 | 
 4 | ## Features
 5 | - Estimate the monthly cost for each cluster in a region in both standard storage and IO optimized storage configurations
 6 | 
 7 | ## Requirements
 8 | - Python 3.7 or greater, boto3, urllib3
 9 | - IAM privileges in https://github.com/awslabs/amazon-documentdb-tools/blob/master/performance/deployment-scanner/iam-policy.json
10 | 
11 | ## Installation
12 | Clone the repository and install the requirements:
13 | 
14 | ```
15 | git clone https://github.com/awslabs/amazon-documentdb-tools.git
16 | cd amazon-documentdb-tools/performance/deployment-scanner
17 | python3 -m pip install -r requirements.txt
18 | ```
19 | 
20 | ## Usage/Examples
21 | The deployment scanner accepts the following arguments:
22 | 
23 | ```
24 | --region                     AWS region for scan
25 | --log-file-name              Name of file write CSV data to
26 | --start-date                 [optional] Starting date in YYYYMMDD for historical review of cluster resource usage
27 | --end-date                   [optional] Ending date in YYYYMMDD for historical review of cluster resource usage
28 | 
29 | If --start-date and --end-date are not provided, the last 30 days are used for historical cluster resource usage.
30 | ```
31 | 
32 | ### Review Amazon DocumentDB clusters in us-east-1 for November 2023:
33 | ```
34 | python3 deployment-scanner.py --log-file-name nov-23-us-east-1 --start-date 20231101 --end-date 20231130 --region us-east-1
35 | ```
36 | 
37 | 
38 | ## License
39 | [Apache 2.0](http://www.apache.org/licenses/LICENSE-2.0)
40 | 
41 | ## Contributing
42 | Contributions are always welcome! See the [contributing](https://github.com/awslabs/amazon-documentdb-tools/blob/master/CONTRIBUTING.md) page for ways to get involved.
43 | 


--------------------------------------------------------------------------------
/performance/deployment-scanner/deployment-scanner-debug.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 |  
 3 | import boto3
 4 | import datetime
 5 | import argparse
 6 | import requests
 7 | import json
 8 | import sys
 9 | import os
10 | 
11 | 
12 | def get_docdb_instance_based_clusters(appConfig):
13 |     client = boto3.client('docdb',region_name=appConfig['region'])
14 |     
15 |     response = client.describe_db_clusters(Filters=[{'Name': 'engine','Values': ['docdb']}])
16 |     
17 |     for thisCluster in response['DBClusters']:
18 |         if thisCluster['DBClusterIdentifier'] == appConfig['clusterName']:
19 |             if 'StorageType' in thisCluster:
20 |                 print("StorageType is {}".format(thisCluster['StorageType']))
21 |             else:
22 |                 print("StorageType not present in describe_db_clusters() output")
23 |     
24 |     client.close()
25 |     
26 |     
27 | def main():
28 |     parser = argparse.ArgumentParser(description='DocumentDB Deployment Scanner')
29 | 
30 |     parser.add_argument('--region',required=True,type=str,help='AWS Region')
31 |     parser.add_argument('--cluster-name',required=True,type=str,help='name of the cluster')
32 |                         
33 |     args = parser.parse_args()
34 |    
35 |     appConfig = {}
36 |     appConfig['region'] = args.region
37 |     appConfig['clusterName'] = args.cluster_name
38 | 
39 |     clusterList = get_docdb_instance_based_clusters(appConfig)
40 |     
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/performance/deployment-scanner/iam-policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": [
 7 |                 "rds:DescribeDBClusters",
 8 |                 "rds:DescribeDBInstances"
 9 |             ],
10 |             "Resource": [
11 |                 "arn:aws:rds:{region}:{account}:*:*"
12 |             ]
13 |         },
14 |         {
15 |             "Effect": "Allow",
16 |             "Action": [
17 |                 "cloudwatch:GetMetricStatistics"
18 |             ],
19 |             "Resource": "*"
20 |         }
21 |     ]
22 | }


--------------------------------------------------------------------------------
/performance/deployment-scanner/requirements.txt:
--------------------------------------------------------------------------------
1 | botocore>=1.33.6
2 | boto3>=1.33.6
3 | requests
4 | urllib3<2
5 | 


--------------------------------------------------------------------------------
/performance/index-cardinality-detection/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store


--------------------------------------------------------------------------------
/performance/index-cardinality-detection/requirements.txt:
--------------------------------------------------------------------------------
1 | termtables==0.2.4
2 | pymongo==4.6.3
3 | pandas==2.1.0


--------------------------------------------------------------------------------
/performance/index-review/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Index Review Tool
 2 | 
 3 | The index review tool catalogs all collections and their indexes (structure and usage). It outputs a JSON file containing all collected information, a listing of unused and/or redundant indexes, and a pair of CSV files containing collection and index details. 
 4 | 
 5 | *NOTE: indexes should never be dropped without discussing with all interested parties and testing performance*.
 6 | 
 7 | # Requirements
 8 |  - Python 3.7+
 9 |  - PyMongo
10 | 
11 | # Access Control
12 | The account executing to this script requires the following permissions -
13 |  - collStats
14 |  - indexStats
15 |  - listCollections
16 |  - listDatabases
17 |  - serverStatus
18 | 
19 | ## Using the Index Review Tool
20 | `python3 index-review.py --server-alias <server-alias> --uri <mongodb-uri>`
21 | 
22 | - Execute on all instances (primary and all secondaries/read-replicas), this is critical to give a complete review of index usage.
23 | - Use a different `<server-alias>` for each server, output files are named using `<server-alias>` as the starting portion of the filename
24 | - All `<mongodb-uri>` options can be found at https://www.mongodb.com/docs/manual/reference/connection-string/ 
25 | - For DocumentDB use the individual instance endpoints, not the cluster endpoint
26 | 
27 | ## License
28 | This tool is licensed under the Apache 2.0 License. 
29 | 


--------------------------------------------------------------------------------
/performance/index-review/index-review-testing.txt:
--------------------------------------------------------------------------------
 1 | # test on mongoDB and documentDB
 2 | #   identify unused indexes
 3 | #   identify redundant indexes
 4 | #   check all servers in replica-set
 5 | #   what about sharded clusters?
 6 | 
 7 | use idxtest
 8 | 
 9 | db.idxwiz.drop();
10 | 
11 | db.idxwiz.insertOne({"one":1,"two":1,"three":1,"four":1,"five":1});
12 | 
13 | db.idxwiz.createIndex({"one":1},{"name":"one-pos"});
14 | db.idxwiz.createIndex({"two":1},{"name":"two-pos"});
15 | 
16 | db.idxwiz.createIndex({"one":1,"two":1},{"name":"one-pos_two-pos"});
17 | db.idxwiz.createIndex({"five":1,"three":-1},{"name":"five-pos_three-pos"});
18 | 
19 | db.idxwiz.createIndex({"two":-1,"one":1,"four":1},{"name":"two-neg_one-pos_four-pos"});
20 | db.idxwiz.createIndex({"two":1,"one":1,"four":1},{"name":"two-pos_one-pos_four-pos"});
21 | db.idxwiz.createIndex({"five":1,"three":-1,"four":1},{"name":"five-pos_three-neg_four-pos"});
22 | db.idxwiz.createIndex({"one":1,"four":1,"two":1},{"name":"one-pos_four-pos_two-pos"});
23 | 
24 | db.idxwiz.createIndex({"three":1,"one":1,"four":1},{"name":"three-pos_one-pos_four-pos_idx1"});
25 | db.idxwiz.createIndex({"three":1,"one":1,"four":1},{"name":"three-pos_one-pos_four-pos_idx2"});
26 | 
27 | db.idxwiz.createIndex({"five":1,"three":-1,"four":1,"one":1,"two":1},{"name":"five-pos_three-neg_four-pos_one-pos_two-pos"});
28 | 
29 | 


--------------------------------------------------------------------------------
/performance/metric-analyzer/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Metric Analyzer
 2 | 
 3 | This tool analyzes the output of the [Amazon DocumentDB Metric Collector Tool](https://github.com/awslabs/amazon-documentdb-tools/tree/master/performance/metric-collector) to provide recommendations for optimizing performance, cost, and availability.
 4 | 
 5 | ## Features
 6 | 
 7 | - Analyzes CPU utilization, cache hit ratios, connection limits, and more
 8 | - Provides specific recommendations based on best practices
 9 | - Includes detailed context for each recommendation type
10 | - Generates CSV output for easy review
11 | - Creates interactive HTML reports with recommendation context details
12 | 
13 | ## Usage
14 | 
15 | ```bash
16 | python metric-analyzer.py --metrics-file <input-file-name> \
17 |     --region <aws-region-name> \
18 |     --output <output-file-name> \
19 |     --log-level <log-level> \
20 |     [--no-html]
21 | ```
22 | 
23 | ### Parameters
24 | 
25 | - `--metrics-file`: Path to the metrics CSV file to analyze (required)
26 | - `--region`: AWS Region (default: us-east-1)
27 | - `--output`: Base name for output files (default: metric-analyzer)
28 | - `--log-level`: Log level for logging (choices: DEBUG, INFO, WARNING, ERROR, CRITICAL, default: WARNING)
29 | - `--no-html`: Disable HTML output generation (HTML output is enabled by default)
30 | 
31 | ## Recommendation Context
32 | 
33 | Each recommendation includes a link to a context file in the `context/` directory that provides additional information about:
34 | 
35 | - Considerations before implementing the recommendation
36 | - Potential impacts (positive and negative)
37 | - Alternative approaches
38 | - Implementation guidance
39 | 
40 | These context files supplement the AWS documentation references and provide more nuanced guidance for decision-making.
41 | 
42 | ## Output Format
43 | 
44 | ### CSV Output
45 | 
46 | The tool generates a CSV file with the following columns:
47 | 
48 | - ClusterName: Name of the DocumentDB cluster
49 | - InstanceName: Name of the instance (if applicable)
50 | - InstanceType: Instance type (e.g., db.r6g.large)
51 | - InstanceRole: PRIMARY or SECONDARY
52 | - Category: Instance or Cluster level recommendation
53 | - ModifyInstance: Action to take (INCREASE, DECREASE, UPGRADE)
54 | - Finding: Specific finding with metrics
55 | - Recommendation: Recommended action
56 | - Reference: Link to AWS documentation
57 | 
58 | ### HTML Output
59 | 
60 | The tool also generates an interactive HTML report that includes:
61 | 
62 | - All information from the CSV output
63 | - Interactive "View Context" buttons that display detailed guidance for each recommendation
64 | - Responsive design for better readability
65 | 
66 | ## Requirements
67 | 
68 | - Python 3.6+
69 | - boto3>=1.26.0
70 | - pandas>=1.3.0
71 | - markdown>=3.3.0
72 | 


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/buffer_cache_low.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Low Buffer Cache Hit Ratio Considerations</h1>
15 | <p>When BufferCacheHitRatio is low (below 90%) for an extended period of time, your instances may be under-provisioned for your working dataset, which can lead to performance issues and higher I/O costs.</p>
16 | 
17 | <h2>Considerations before taking action:</h2>
18 | <ol>
19 | <li><strong>Data access patterns</strong>: Analyze if your application is accessing data in a way that could be optimized to improve <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/user_diagnostics.html#user-diag-cache-perf" target="_blank">cache utilization</a>.</li>
20 | <li><strong>Working dataset size</strong>: Determine if your working dataset has grown beyond the memory capacity of your current instance.</li>
21 | <li><strong>Instance sizing impact</strong>: Increasing instance size will increase costs but may improve performance significantly if memory is the bottleneck.</li>
22 | </ol>
23 | 
24 | <h2>Additional considerations:</h2>
25 | <ol>
26 | <li>Enable the <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/profiling.html" target="_blank">profiler</a> and review the CloudWatch logs to identify and optimize slow queries</li>
27 | <li>Consider using Performance Insights to identify specific resource bottlenecks</li>
28 | <li>Implement BufferCacheHitRatio isolation by directing operational queries to the primary instance and analytic queries only to the replica instances</li>
29 | <li>Alternatively, achieve partial isolation by directing analytic queries to a specific replica instance with the understanding that some percentage of regular queries will also run on that replica and could potentially be affected</li>
30 | </ol>
31 | </body>
32 | </html>
33 | 


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/connection_limit.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Connection Limits Approaching Maximum</h1>
15 | <p>Each instance has a connection limit that scales with instance size. Additionally, each open connection consumes memory and CPU resources on the Amazon DocumentDB instance. After the connection limit has been reached, Amazon DocumentDB rejects any further connection attempts and the application will encounter connection exceptions.</p>
16 | 
17 | <h2>Considerations before taking action:</h2>
18 | <ol>
19 | <li><strong>Connection pooling</strong>: Verify if your application is using <a href="https://aws.amazon.com/blogs/database/building-resilient-applications-with-amazon-documentdb-with-mongodb-compatibility-part-1-client-configuration/" target="_blank">connection pooling</a> effectively. Improper connection management can lead to unnecessary connection proliferation.</li>
20 | <li><strong>Connection distribution</strong>: Verify that you are connecting to your cluster as a replica set, distributing reads to replica instances using the built-in read preference capabilities of your driver.</li>
21 | <li><strong>Instance sizing impact</strong>: Increasing instance size will increase costs but provides higher connection limits and additional resources.</li>
22 | </ol>
23 | 
24 | <h2>Additional considerations:</h2>
25 | <ol>
26 | <li>Review application code for connection leaks or improper connection handling</li>
27 | <li>Consider implementing a <a href="https://aws.amazon.com/blogs/database/scale-your-connections-with-amazon-documentdb-using-mongobetween/ target="_blank""> connection proxy layer</a> to manage connection distribution</li>
28 | </ol>
29 | </body>
30 | </html>


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/cpu_overutilized.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>High CPU Usage Considerations</h1>
15 | <p>When CPU utilization is consistently high (above 90%), your instances may be under-provisioned, which can lead to performance issues, increased latency, and potential outages.</p>
16 | 
17 | <h2>Considerations before taking action:</h2>
18 | <ol>
19 | <li><strong>Query optimization</strong>: High CPU might indicate inefficient queries. Review and optimize your queries before scaling up.</li>
20 | <li><strong>Workload patterns</strong>: Determine if high CPU is consistent or occurs during specific peak periods.</li>
21 | <li><strong>Instance sizing impact</strong>: Increasing instance size will increase costs but may be necessary for consistent performance.</li>
22 | </ol>
23 | 
24 | <h2>Additional considerations:</h2>
25 | <ol>
26 | <li>Review the <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/profiling.html" target="_blank">slow query profiler</a> to identify and optimize problematic queries</li>
27 | <li>Check for common causes of <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/user_diagnostics.html#user_diagnostics-cpu_utilization" target="_blank">high CPU utilization</a></li>
28 | <li>Consider using Performance Insights to identify specific resource bottlenecks</li>
29 | <li>Distribute read operations to replica instances if using a read-heavy workload</li>
30 | <li>Implement or optimize connection pooling to reduce connection overhead</li>
31 | <li>Consider implementing a <a href="https://aws.amazon.com/blogs/database/caching-for-performance-with-amazon-documentdb-and-amazon-elasticache/" target="_blank">caching layer</a> for frequently accessed data</li>
32 | </ol>
33 | </body>
34 | </html>
35 | 


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/cpu_underutilized.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Low CPU Usage Considerations</h1>
15 | <p>When CPU utilization is consistently low (below 30%), your instances may be over-provisioned, which can lead to unnecessary costs without providing performance benefits.</p>
16 | 
17 | <h2>Considerations before downsizing:</h2>
18 | <ol>
19 | <li><strong>Workload patterns</strong>: Verify if the low CPU usage is consistent or if there are periodic spikes that require the current capacity.</li>
20 | <li><strong>Future growth</strong>: Consider if you're anticipating workload growth that would justify the current instance size.</li>
21 | <li><strong>Memory requirements</strong>: Even with low CPU, your workload may require the current memory capacity for optimal cache performance. For example, moving from an r6g.xlarge to r6g.large decreases your buffer cache space by 50%, potentially increasing your I/O costs.</li>
22 | <li><strong>Instance type selection</strong>: Consider if a different instance family might better match your workload characteristics, such as <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/db-instance-nvme.html" target="_blank">NVMe-backed instances</a>.</li>
23 | <li><strong>Network bandwidth impact</strong>: In addition to vCPU and RAM, decreasing the instance size may also affect <a href="https://aws.amazon.com/ec2/instance-types/#Memory_Optimized" target="_blank">network bandwidth</a>. This can affect your I/O performance.</li>
24 | </ol>
25 | 
26 | <h2>Additional considerations:</h2>
27 | <ol>
28 | <li>If running multiple clusters that are over-provisioned, evaluate if their workloads can be consolidated</li>
29 | <li>Monitor other metrics like memory usage and cache hit ratios before making sizing decisions</li>
30 | </ol>
31 | </body>
32 | </html>


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/graviton_upgrade.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Upgrading Instances to Graviton2</h1>
15 | <p>AWS Graviton2 processors are custom built by AWS using 64-bit Arm Neoverse cores and provide significant performance and cost benefits over previous generation instances. R6g instances offer up to 30% better price/performance compared to R5/R4 instances, with r6g instances 5% less expensive than their r5 counterparts.</p>
16 | 
17 | <h2>Considerations before upgrading to Graviton2:</h2>
18 | <ol>
19 | <li><strong>Performance benefits</strong>: Graviton2 instances provide better CPU performance and memory encryption, which can improve overall database performance.</li>
20 | <li><strong>Application compatibility</strong>: No application changes are required when migrating from Intel to Graviton2 instances on Amazon DocumentDB.</li>
21 | <li><strong>Instance size mapping</strong>: Ensure you select the appropriate Graviton2 instance size that matches or exceeds your current workload requirements.</li>
22 | </ol>
23 | 
24 | <h2>Additional considerations:</h2>
25 | <ol>
26 | <li>Identify the appropriate Graviton2 instance type (r6g) that corresponds to your current instance</li>
27 | <li>To minimize downtime, consider modifying your replicas first, then promoting the replicas before modifying the primary instance</li>
28 | <li>Monitor performance after the upgrade to ensure expected improvements are realized</li>
29 | </ol>
30 | </body>
31 | </html>


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/index_cache_low.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Low Index Buffer Cache Hit Ratio Considerations</h1>
15 | <p>When IndexBufferCacheHitRatio is low (below 90%) for an extended period of time, your instance may have too many indexes for its memory capacity, which can lead to performance degradation.</p>
16 | 
17 | <h2>Considerations before taking action:</h2>
18 | <ol>
19 | <li><strong>Index usage analysis</strong>: Use the <a href="https://github.com/awslabs/amazon-documentdb-tools/tree/master/performance/index-review" target="_blank">Index Review Tool</a> to identify which indexes are rarely or never used before removing them.</li>
20 | <li><strong>Query impact</strong>: Removing indexes may negatively impact some queries that depend on them. Test your workload before removing indexes from your production clusters.</li>
21 | <li><strong>Instance sizing trade-offs</strong>: Increasing instance size will increase costs but may be necessary if all indexes are required.</li>
22 | <li><strong>Review your queries for optimization</strong>: Utilize the explain command to veiw your <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/querying.html#querying.queryplan" target="_blank">query execution plan</a>. You can use the $hint operator to enforce selection of a preferred index.</li>
23 | </ol>
24 | 
25 | <h2>Additional considerations:</h2>
26 | <ol>
27 | <li>Consider consolidating multiple indexes where possible</li>
28 | <li>Evaluate if your application can be modified to require fewer indexes</li>
29 | <li>Utilize the <a href="https://github.com/awslabs/amazon-documentdb-tools/tree/master/performance/index-cardinality-detection" target="_blank">index low cardinality tool</a> to idetify indexes with a high number of duplicate values. It is recommended to limit the creation of indexes to fields where the number of duplicate values is less than 1% of the total number of documents in the collection.</li>
30 | </ol>
31 | </body>
32 | </html>


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/read_preference.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Read Preference Driver Settings</h1>
15 | <p>When the primary instance is handling the majority of read operations, your cluster is not efficiently utilizing read replicas. This can lead to unnecessary load on the primary instance and underutilization of replica resources.</p>
16 | 
17 | <h2>Considerations before changing read preference:</h2>
18 | <ol>
19 | <li><strong>Consistency requirements</strong>: Secondary reads provide eventual consistency. Ensure your application can tolerate this for read operations.</li>
20 | <li><strong>Driver configuration</strong>: Different drivers have different methods for setting read preferences. Review the documentation for your driver.</li>
21 | <li><strong>Replica availability</strong>: With secondaryPreferred, reads will fall back to the primary if no replicas are available.</li>
22 | <li><strong>Monitoring impact</strong>: Track metrics after changing read preference to ensure proper load distribution.</li>
23 | </ol>
24 | 
25 | <h2>Additional considerations:</h2>
26 | <ol>
27 | <li><strong>Node.js driver</strong>:
28 |   <pre>const client = new MongoClient(uri, {
29 |   readPreference: 'secondaryPreferred'
30 | });</pre>
31 | </li>
32 | <li><strong>Python driver</strong>:
33 |   <pre>client = pymongo.MongoClient(uri, 
34 |   read_preference=pymongo.ReadPreference.SECONDARY_PREFERRED)</pre>
35 | </li>
36 | <li><strong>Java driver</strong>:
37 |   <pre>MongoClientSettings settings = MongoClientSettings.builder()
38 |   .readPreference(ReadPreference.secondaryPreferred())
39 |   .build();</pre>
40 | </li>
41 | </ol>
42 | </body>
43 | </html>
44 | 


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/remove_instances.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Excessive Instance Deployment Considerations</h1>
15 | <p>Amazon DocumentDB is designed for 99.99% availability when deployed across two or more AZ’s. Replica instances work well for read scaling because they are fully dedicated to read operations on your cluster volume. However running an Amazon DocumentDB cluster with more than three instances (one primary and two replicas) does not increase the availability beyond 99.99% and can increase costs. </p>
16 | 
17 | <h2>Considerations before removing instances:</h2>
18 | <ol>
19 | <li><strong>Read scaling needs</strong>: Determine if your current read workload actually requires the additional replica instances.</li>
20 | <li><strong>Regional distribution</strong>: Verify that the current instances are distributed across multiple Availability Zones for maximum resilience.</li>
21 | <li><strong>Maintenance strategy</strong>: Consider how instance removal might affect your maintenance procedures.</li>
22 | <li><strong>Future growth plans</strong>: Assess if anticipated workload growth justifies keeping additional instances.</li>
23 | </ol>
24 | 
25 | <h2>Additional considerations:</h2>
26 | <ol>
27 | <li>For production workloads, it is recommended to run a cluster with three instances (one primary, two replicas), but no less than two (one primary, one replica)</li>
28 | <li>Ensure instances are distributed across different Availability Zones</li>
29 | <li>Configure read preference to secondaryPreferred to maximize read scaling</li>
30 | <li>Consider increasing instance size instead of adding more instances if additional capacity is needed</li>
31 | </ol>
32 | </body>
33 | </html>


--------------------------------------------------------------------------------
/performance/metric-analyzer/context/single_az.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <style>
 5 |         body { font-family: Arial, sans-serif; line-height: 1.6; margin: 20px; }
 6 |         h1 { color: #333; }
 7 |         h2 { color: #444; margin-top: 20px; }
 8 |         strong { font-weight: bold; }
 9 |         ul { margin-left: 20px; }
10 |         li { margin-bottom: 8px; }
11 |     </style>
12 | </head>
13 | <body>
14 | <h1>Single Instance Deployment Considerations</h1>
15 | <p>Running a DocumentDB cluster with only a single instance provides no high availability and no read scaling capabilities. For production workloads, it is recommended to deploy a cluster with at least one replica instance.</p>
16 | 
17 | <h2>Considerations before adding replica instances:</h2>
18 | <ol>
19 | <li><strong>High availability needs</strong>: Single-instance clusters have no automatic <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/failover.html" target="_blank">failover</a> capability, resulting in longer downtime during instance failures.</li>
20 | <li><strong>Read scaling requirements</strong>: Without replicas, all read operations must be processed by the primary instance.</li>
21 | <li><strong>Maintenance impact</strong>: With replicas, maintenance operations can be performed with minimal downtime.</li>
22 | </ol>
23 | 
24 | <h2>Additional considerations:</h2>
25 | <ol>
26 | <li>For production workloads, deploy at least one replica instance (two total instances)</li>
27 | <li>For critical workloads, consider deploying two replica instances (three total instances)</li>
28 | <li>Amazon DocumentDB clusters can be <a href="https://docs.aws.amazon.com/documentdb/latest/developerguide/db-cluster-stop-start.html" target="_blank">stopped and started</a>, helping to manage costs for development and test environments</li>
29 | <li>Configure read preference to secondaryPreferred to utilize replica instances for read operations</li>
30 | <li>Monitor replica lag to ensure acceptable data consistency</li>
31 | </ol>
32 | </body>
33 | </html>


--------------------------------------------------------------------------------
/performance/metric-analyzer/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.26.0
2 | pandas>=1.3.0
3 | markdown>=3.3.0


--------------------------------------------------------------------------------
/performance/metric-collector/IAM-policy.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "Version": "2012-10-17",
 3 |     "Statement": [
 4 |         {
 5 |             "Effect": "Allow",
 6 |             "Action": "rds:DescribeDBInstances",
 7 |             "Resource": "*",
 8 |             "Condition": {
 9 |                 "StringEquals": {
10 |                     "rds:DatabaseEngine": "docdb"
11 |                 }
12 |             }
13 |         },
14 |         {
15 |             "Effect": "Allow",
16 |             "Action": [
17 |                 "rds:DescribeDBClusterParameters",
18 |                 "cloudwatch:GetMetricData",
19 |                 "cloudwatch:GetMetricStatistics",
20 |                 "rds:DescribeDBClusters",
21 |                 "rds:DescribeDBClusterParameterGroups"
22 |             ],
23 |             "Resource": "*"
24 |         }
25 |     ]
26 | }


--------------------------------------------------------------------------------
/performance/metric-collector/README.md:
--------------------------------------------------------------------------------
 1 | # Amazon DocumentDB Metric Collector Tool
 2 | 
 3 | The metric collector tool provides a csv output consolidating metrics for all DocumentDB clusters within a defined region. In addition to metadata such as cluster name, engine version, multi-AZ configuration, TLS status, and instance types, the script captures the Min, Max, Mean, p99, and Std values for a chosen time period. These can be compared against [Best Practices for Amazon DocumentDB](https://docs.aws.amazon.com/documentdb/latest/developerguide/best_practices.html) to ensure your cluster and instances are correctly sized for performance, resiliency, and cost.
 4 | 
 5 | ## Requirements
 6 |  - Python 3.9+
 7 |  - boto3 1.24.49+
 8 |  - pandas 2.2.1+
 9 | 
10 | ```
11 | pip3 install boto3, pandas
12 | ```
13 | 
14 | - This script reads DocumentDB instance and cluster metrics from [Amazon CloudWatch](https://aws.amazon.com/cloudwatch/), as well as DocumentDB cluster details including parameter group information. The required IAM permissions can be found in `IAM-policy.json`.
15 | 
16 | ## Usage parameters
17 | Usage:
18 |     
19 | ```
20 | python3 metric-collector.py --region <aws-region-name> \\
21 |     --log-file-name <output-file-name> \\
22 |     --start-date <YYYYMMDD> \\
23 |     --end-date <YYYYMMDD>
24 | ```
25 | 
26 | Script Parameters:
27 | 
28 |  - region: str
29 |     AWS Region
30 |  - start-date: str
31 |     Start date for CloudWatch logs, format=YYYYMMDD
32 |  - end-date: str
33 |     End date for CloudWatch logs, format=YYYYMMDD
34 |  - log-file-name: str
35 |     Log file for CSV output
36 |  - log-level: str
37 |     Log level for logging, default=INFO
38 | 
39 | ## License
40 | This tool is licensed under the Apache 2.0 License. 
41 | 


--------------------------------------------------------------------------------
/performance/metric-collector/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3>=1.34.49
2 | pandas>=2.2.1


--------------------------------------------------------------------------------