├── .devcontainer
    ├── Dockerfile
    └── devcontainer.json
├── .dockerignore
├── .flake8
├── .github
    └── workflows
    │   ├── build.yml
    │   ├── codeql-analysis.yml
    │   ├── dependency-review.yml
    │   ├── docker.yml
    │   ├── linting.yml
    │   ├── stale.yml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .sonarcloud.properties
├── Dockerfile
├── LICENSE
├── README.md
├── docker-compose.yml
├── dynamodump
    ├── __init__.py
    └── dynamodump.py
├── pyproject.toml
├── renovate.json
├── requirements-dev.txt
├── requirements.txt
├── setup.py
├── test.sh
└── tests
    ├── __init__.py
    ├── test.py
    └── testTable
        ├── data
            └── 0001.json
        └── schema.json


/.devcontainer/Dockerfile:
--------------------------------------------------------------------------------
 1 | # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.191.1/containers/python-3/.devcontainer/base.Dockerfile
 2 | 
 3 | # [Choice] Python version
 4 | ARG VARIANT="3.13"
 5 | FROM mcr.microsoft.com/vscode/devcontainers/python:${VARIANT}
 6 | 
 7 | # [Choice] Node.js version: none, lts/*, 16, 14, 12, 10
 8 | ARG NODE_VERSION="none"
 9 | RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi
10 | 
11 | # [Optional] If your pip requirements rarely change, uncomment this section to add them to the image.
12 | # COPY requirements.txt /tmp/pip-tmp/
13 | # RUN pip3 --disable-pip-version-check --no-cache-dir install -r /tmp/pip-tmp/requirements.txt \
14 | #    && rm -rf /tmp/pip-tmp
15 | 
16 | # [Optional] Uncomment this section to install additional OS packages.
17 | # RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
18 | #     && apt-get -y install --no-install-recommends <your-package-list-here>
19 | 
20 | # [Optional] Uncomment this line to install global node packages.
21 | # RUN su vscode -c "source /usr/local/share/nvm/nvm.sh && npm install -g <your-package-here>" 2>&1


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.191.1/containers/python-3
 3 | {
 4 | 	"name": "Python 3",
 5 | 	"build": {
 6 | 		"dockerfile": "Dockerfile",
 7 | 		"context": "..",
 8 | 		"args": { 
 9 | 			// Update 'VARIANT' to pick a Python version
10 | 			"VARIANT": "3.13",
11 | 			// Options
12 | 			"NODE_VERSION": "none"
13 | 		}
14 | 	},
15 | 
16 | 	// Set *default* container specific settings.json values on container create.
17 | 	"settings": { 
18 | 		"python.pythonPath": "/usr/local/bin/python",
19 | 		"python.languageServer": "Pylance",
20 | 		"python.linting.enabled": true,
21 | 		"python.linting.pylintEnabled": true,
22 | 		"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
23 | 		"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
24 | 		"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
25 | 		"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
26 | 		"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
27 | 		"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
28 | 		"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
29 | 		"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
30 | 		"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
31 | 	},
32 | 
33 | 	// Add the IDs of extensions you want installed when the container is created.
34 | 	"extensions": [
35 | 		"ms-python.python",
36 | 		"ms-python.vscode-pylance"
37 | 	],
38 | 
39 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
40 | 	// "forwardPorts": [],
41 | 
42 | 	// Use 'postCreateCommand' to run commands after the container is created.
43 | 	"postCreateCommand": "pip3 install --user -r requirements-dev.txt",
44 | 
45 | 	// Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
46 | 	"remoteUser": "vscode"
47 | }
48 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.pyc
 3 | *.pyo
 4 | *.pyd
 5 | .Python
 6 | env
 7 | pip-log.txt
 8 | .git
 9 | .DS_Store
10 | README.md
11 | Dockerfile
12 | 


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | extend-ignore = C901, E203, E501, W503
3 | max-line-length = 88
4 | select = B,C,E,F,W,B950
5 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Publish (PyPI)
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     branches: [master]
 7 | 
 8 | jobs:
 9 |   build:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.13"]
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - name: Set up Python ${{ matrix.python-version }}
18 |         uses: actions/setup-python@v5
19 |         with:
20 |           python-version: ${{ matrix.python-version }}
21 |       - name: Install build dependencies
22 |         run: |
23 |           pip install --upgrade build
24 |       - name: Generate distribution package
25 |         run: |
26 |           python -m build
27 |       - name: Publish package to TestPyPI
28 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
29 |         uses: pypa/gh-action-pypi-publish@release/v1
30 |         with:
31 |           user: __token__
32 |           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
33 |           repository_url: https://test.pypi.org/legacy/
34 |       - name: Publish package to PyPI
35 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
36 |         uses: pypa/gh-action-pypi-publish@release/v1
37 |         with:
38 |           user: __token__
39 |           password: ${{ secrets.PYPI_API_TOKEN }}
40 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ master ]
17 |   pull_request:
18 |     # The branches below must be a subset of the branches above
19 |     branches: [ master ]
20 |   schedule:
21 |     - cron: '17 10 * * 3'
22 | 
23 | jobs:
24 |   analyze:
25 |     name: Analyze
26 |     runs-on: ubuntu-latest
27 |     permissions:
28 |       actions: read
29 |       contents: read
30 |       security-events: write
31 | 
32 |     strategy:
33 |       fail-fast: false
34 |       matrix:
35 |         language: [ 'python' ]
36 |         # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
37 |         # Learn more:
38 |         # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
39 | 
40 |     steps:
41 |     - name: Checkout repository
42 |       uses: actions/checkout@v4
43 | 
44 |     # Initializes the CodeQL tools for scanning.
45 |     - name: Initialize CodeQL
46 |       uses: github/codeql-action/init@v3
47 |       with:
48 |         languages: ${{ matrix.language }}
49 |         # If you wish to specify custom queries, you can do so here or in a config file.
50 |         # By default, queries listed here will override any specified in a config file.
51 |         # Prefix the list here with "+" to use these queries and those in the config file.
52 |         # queries: ./path/to/local/query, your-org/your-repo/queries@main
53 | 
54 |     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
55 |     # If this step fails, then you should remove it and run the build manually (see below)
56 |     - name: Autobuild
57 |       uses: github/codeql-action/autobuild@v3
58 | 
59 |     # ℹ️ Command-line programs to run using the OS shell.
60 |     # 📚 https://git.io/JvXDl
61 | 
62 |     # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
63 |     #    and modify them (or add more) to build your code if your project
64 |     #    uses a compiled language
65 | 
66 |     #- run: |
67 |     #   make bootstrap
68 |     #   make release
69 | 
70 |     - name: Perform CodeQL Analysis
71 |       uses: github/codeql-action/analyze@v3
72 | 


--------------------------------------------------------------------------------
/.github/workflows/dependency-review.yml:
--------------------------------------------------------------------------------
 1 | name: 'Dependency Review'
 2 | on: [pull_request]
 3 | 
 4 | permissions:
 5 |   contents: read
 6 | 
 7 | jobs:
 8 |   dependency-review:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: 'Checkout Repository'
12 |         uses: actions/checkout@v4
13 |       - name: 'Dependency Review'
14 |         uses: actions/dependency-review-action@v4
15 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: Publish Docker image
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - 'master'
 7 |     tags:
 8 |       - 'v*'
 9 | 
10 | jobs:
11 |   push_to_registries:
12 |     name: Push Docker image to Docker Hub, ECR Public, GitHub Container Registry
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - name: Check out the repo
16 |         uses: actions/checkout@v4
17 | 
18 |       - name: Set up QEMU
19 |         uses: docker/setup-qemu-action@v3
20 | 
21 |       - name: Set up Docker Buildx
22 |         uses: docker/setup-buildx-action@v3
23 |       
24 |       - name: Log in to Docker Hub
25 |         uses: docker/login-action@v3
26 |         with:
27 |           username: ${{ secrets.DOCKER_HUB_USERNAME }}
28 |           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
29 |       
30 |       - name: Login to ECR Public
31 |         uses: docker/login-action@v3
32 |         with:
33 |           registry: public.ecr.aws
34 |           username: ${{ secrets.AWS_ACCESS_KEY_ID }}
35 |           password: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
36 |         env:
37 |           AWS_REGION: us-east-1
38 | 
39 |       - name: Login to GitHub Container Registry
40 |         uses: docker/login-action@v3
41 |         with:
42 |           registry: ghcr.io
43 |           username: ${{ github.actor }}
44 |           password: ${{ secrets.GITHUB_TOKEN }}
45 | 
46 |       - name: Extract metadata (tags, labels) for Docker
47 |         id: meta
48 |         uses: docker/metadata-action@v5
49 |         with:
50 |           images: |
51 |             ${{ github.repository }}
52 |             public.ecr.aws/${{ github.repository }}
53 |             ghcr.io/${{ github.repository }}
54 |       
55 |       - name: Build and push Docker image
56 |         uses: docker/build-push-action@v6
57 |         with:
58 |           context: .
59 |           platforms: linux/amd64,linux/arm64
60 |           push: true
61 |           tags: ${{ steps.meta.outputs.tags }}
62 |           labels: ${{ steps.meta.outputs.labels }}
63 | 


--------------------------------------------------------------------------------
/.github/workflows/linting.yml:
--------------------------------------------------------------------------------
 1 | name: Linting
 2 | 
 3 | on:
 4 |   push:
 5 |   pull_request:
 6 |     branches: [master]
 7 | 
 8 | jobs:
 9 |   linting:
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       matrix:
13 |         python-version: ["3.13"]
14 | 
15 |     steps:
16 |       - uses: actions/checkout@v4
17 |       - name: Set up Python ${{ matrix.python-version }}
18 |         uses: actions/setup-python@v5
19 |         with:
20 |           python-version: ${{ matrix.python-version }}
21 |       - name: Install dev dependencies
22 |         run: |
23 |           pip install -r requirements-dev.txt
24 |       - name: Lint with Black
25 |         run: |
26 |           black --check .
27 |       - name: Lint with flake8
28 |         run: |
29 |           flake8 . --count --show-source --statistics
30 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: 'Close stale issues and PRs'
 2 | on:
 3 |   schedule:
 4 |     - cron: '30 1 * * *'
 5 | 
 6 | permissions:
 7 |   issues: write
 8 |   pull-requests: write
 9 | 
10 | jobs:
11 |   stale:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |       - uses: actions/stale@v9
15 |         with:
16 |           exempt-issue-labels: 'renovate'
17 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
  1 | name: Test
  2 | 
  3 | on:
  4 |   push:
  5 |   pull_request:
  6 |     branches: [master]
  7 | 
  8 | jobs:
  9 |   test-default-dump-path:
 10 |     runs-on: ubuntu-latest
 11 |     strategy:
 12 |       matrix:
 13 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 14 |     steps:
 15 |       - uses: actions/checkout@v4
 16 |       - name: Set up Python ${{ matrix.python-version }}
 17 |         uses: actions/setup-python@v5
 18 |         with:
 19 |           python-version: ${{ matrix.python-version }}
 20 |       - name: Set up DynamoDB Local
 21 |         run: |
 22 |           mkdir /tmp/dynamodb_local
 23 |           wget -O - https://s3-us-west-2.amazonaws.com/dynamodb-local/dynamodb_local_latest.tar.gz \
 24 |             | tar xz --directory /tmp/dynamodb_local
 25 |           java -Djava.library.path=/tmp/dynamodb_local/DynamoDBLocal_lib -jar /tmp/dynamodb_local/DynamoDBLocal.jar \
 26 |             -sharedDb -inMemory &
 27 |       - name: Install dependencies
 28 |         run: |
 29 |           pip install -r requirements.txt
 30 |       - name: Test basic restore and backup
 31 |         run: |
 32 |           mkdir dump && cp -a tests/testTable dump
 33 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s testTable -d testRestoredTable \
 34 |             --host localhost --port 8000 --accessKey a --secretKey a
 35 |           python dynamodump/dynamodump.py -m backup -r local -s testRestoredTable --host localhost --port 8000 \
 36 |             --accessKey a --secretKey a
 37 |           python tests/test.py
 38 |       - name: Test wildcard restore and backup
 39 |         run: |
 40 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "*" --host localhost --port 8000 \
 41 |             --accessKey a --secretKey a
 42 |           rm -rf dump/test*
 43 |           python dynamodump/dynamodump.py -m backup -r local -s "*" --host localhost --port 8000 --accessKey a \
 44 |             --secretKey a
 45 |           python tests/test.py
 46 |       - name: Test prefixed wildcard restore and backup
 47 |         run: |
 48 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "test*" --host localhost --port 8000 \
 49 |             --accessKey a --secretKey a --prefixSeparator ""
 50 |           rm -rf dump/test*
 51 |           python dynamodump/dynamodump.py -m backup -r local -s "test*" --host localhost --port 8000 --accessKey a \
 52 |             --secretKey a --prefixSeparator ""
 53 |           python tests/test.py
 54 | 
 55 |   test-non-default-dump-path:
 56 |     runs-on: ubuntu-latest
 57 |     strategy:
 58 |       matrix:
 59 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
 60 |     env:
 61 |       DUMP_PATH: abc
 62 |     steps:
 63 |       - uses: actions/checkout@v4
 64 |       - name: Set up Python ${{ matrix.python-version }}
 65 |         uses: actions/setup-python@v5
 66 |         with:
 67 |           python-version: ${{ matrix.python-version }}
 68 |       - name: Set up DynamoDB Local
 69 |         run: |
 70 |           mkdir /tmp/dynamodb_local
 71 |           wget -O - https://s3-us-west-2.amazonaws.com/dynamodb-local/dynamodb_local_latest.tar.gz \
 72 |             | tar xz --directory /tmp/dynamodb_local
 73 |           java -Djava.library.path=/tmp/dynamodb_local/DynamoDBLocal_lib -jar /tmp/dynamodb_local/DynamoDBLocal.jar \
 74 |             -sharedDb -inMemory &
 75 |       - name: Install dependencies
 76 |         run: |
 77 |           pip install -r requirements.txt
 78 |       - name: Test non default dump path basic restore and backup
 79 |         run: |
 80 |           mkdir ${{ env.DUMP_PATH }} && cp -a tests/testTable ${{ env.DUMP_PATH }}
 81 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s testTable -d testRestoredTable \
 82 |             --host localhost --port 8000 --accessKey a --secretKey a --dumpPath ${{ env.DUMP_PATH }}
 83 |           rm -rf ${{ env.DUMP_PATH }}
 84 |           python dynamodump/dynamodump.py -m backup -r local -s testRestoredTable --host localhost --port 8000 \
 85 |             --accessKey a --secretKey a --dumpPath ${{ env.DUMP_PATH }}
 86 |           DUMP_DATA_DIR=${{ env.DUMP_PATH }} python tests/test.py
 87 |       - name: Test non default dump path wildcard restore and backup
 88 |         run: |
 89 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "*" --host localhost --port 8000 \
 90 |             --accessKey a --secretKey a --dumpPath ${{ env.DUMP_PATH }}
 91 |           rm -rf ${{ env.DUMP_PATH }}/test*
 92 |           python dynamodump/dynamodump.py -m backup -r local -s "*" --host localhost --port 8000 --accessKey a \
 93 |             --secretKey a --dumpPath ${{ env.DUMP_PATH }}
 94 |           DUMP_DATA_DIR=${{ env.DUMP_PATH }} python tests/test.py
 95 |       - name: Test non default dump path prefixed wildcard restore and backup
 96 |         run: |
 97 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "test*" --host localhost --port 8000 \
 98 |             --accessKey a --secretKey a --prefixSeparator "" --dumpPath ${{ env.DUMP_PATH }}
 99 |           rm -rf ${{ env.DUMP_PATH }}/test*
100 |           python dynamodump/dynamodump.py -m backup -r local -s "test*" --host localhost --port 8000 --accessKey a \
101 |             --secretKey a --prefixSeparator "" --dumpPath ${{ env.DUMP_PATH }}
102 |           DUMP_DATA_DIR=${{ env.DUMP_PATH }} python tests/test.py
103 | 
104 |   test-absolute-dump-path:
105 |     runs-on: ubuntu-latest
106 |     strategy:
107 |       matrix:
108 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
109 |     env:
110 |       DUMP_PATH: /tmp/abs
111 |     steps:
112 |       - uses: actions/checkout@v4
113 |       - name: Set up Python ${{ matrix.python-version }}
114 |         uses: actions/setup-python@v5
115 |         with:
116 |           python-version: ${{ matrix.python-version }}
117 |       - name: Set up DynamoDB Local
118 |         run: |
119 |           mkdir /tmp/dynamodb_local
120 |           wget -O - https://s3-us-west-2.amazonaws.com/dynamodb-local/dynamodb_local_latest.tar.gz \
121 |             | tar xz --directory /tmp/dynamodb_local
122 |           java -Djava.library.path=/tmp/dynamodb_local/DynamoDBLocal_lib -jar /tmp/dynamodb_local/DynamoDBLocal.jar \
123 |             -sharedDb -inMemory &
124 |       - name: Install dependencies
125 |         run: |
126 |           pip install -r requirements.txt
127 |       - name: Test absolute dump path basic restore and backup
128 |         run: |
129 |           mkdir ${{ env.DUMP_PATH }} && cp -a tests/testTable ${{ env.DUMP_PATH }}
130 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s testTable -d testRestoredTable \
131 |             --host localhost --port 8000 --accessKey a --secretKey a --dumpPath ${{ env.DUMP_PATH }}
132 |           rm -rf ${{ env.DUMP_PATH }}
133 |           python dynamodump/dynamodump.py -m backup -r local -s testRestoredTable --host localhost --port 8000 \
134 |             --accessKey a --secretKey a --dumpPath ${{ env.DUMP_PATH }}
135 |           DUMP_DATA_DIR=${{ env.DUMP_PATH }} python tests/test.py
136 |       - name: Test absolute dump path wildcard restore and backup
137 |         run: |
138 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "*" --host localhost --port 8000 \
139 |             --accessKey a --secretKey a --dumpPath ${{ env.DUMP_PATH }}
140 |           rm -rf ${{ env.DUMP_PATH }}/test*
141 |           python dynamodump/dynamodump.py -m backup -r local -s "*" --host localhost --port 8000 --accessKey a \
142 |             --secretKey a --dumpPath ${{ env.DUMP_PATH }}
143 |           DUMP_DATA_DIR=${{ env.DUMP_PATH }} python tests/test.py
144 |       - name: Test absolute dump path prefixed wildcard restore and backup
145 |         run: |
146 |           python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "test*" --host localhost --port 8000 \
147 |             --accessKey a --secretKey a --prefixSeparator "" --dumpPath ${{ env.DUMP_PATH }}
148 |           rm -rf ${{ env.DUMP_PATH }}/test*
149 |           python dynamodump/dynamodump.py -m backup -r local -s "test*" --host localhost --port 8000 --accessKey a \
150 |             --secretKey a --prefixSeparator "" --dumpPath ${{ env.DUMP_PATH }}
151 |           DUMP_DATA_DIR=${{ env.DUMP_PATH }} python tests/test.py
152 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | 
30 | # Translations
31 | *.mo
32 | 
33 | # Mr Developer
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | 
38 | dump
39 | dynamodump.iml
40 | env
41 | .vscode
42 | .idea
43 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/psf/black-pre-commit-mirror
 3 |     rev: 25.1.0
 4 |     hooks:
 5 |       - id: black
 6 |         language_version: python3.13
 7 |   - repo: https://github.com/PyCQA/flake8
 8 |     rev: 7.1.2
 9 |     hooks:
10 |       - id: flake8
11 | 


--------------------------------------------------------------------------------
/.sonarcloud.properties:
--------------------------------------------------------------------------------
1 | sonar.python.version=3
2 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.13.2-alpine
 2 | 
 3 | COPY ./requirements.txt /mnt/dynamodump/requirements.txt
 4 | COPY ./dynamodump/dynamodump.py /usr/local/bin/dynamodump
 5 | 
 6 | RUN pip install -r /mnt/dynamodump/requirements.txt
 7 | 
 8 | ENTRYPOINT ["dynamodump"]
 9 | CMD ["-h"]
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2013 Benny Chew and dynamodump contributors
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 7 | of the Software, and to permit persons to whom the Software is furnished to do
 8 | so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dynamodump
  2 | 
  3 | ![Build Status](https://github.com/bchew/dynamodump/actions/workflows/build.yml/badge.svg)
  4 | ![CodeQL Status](https://github.com/bchew/dynamodump/actions/workflows/codeql-analysis.yml/badge.svg)
  5 | [![Docker Status](https://github.com/bchew/dynamodump/actions/workflows/docker.yml/badge.svg)](https://hub.docker.com/r/bchew/dynamodump)
  6 | ![Linting Status](https://github.com/bchew/dynamodump/actions/workflows/linting.yml/badge.svg)
  7 | ![Test Status](https://github.com/bchew/dynamodump/actions/workflows/test.yml/badge.svg)
  8 | [![PyPI version](https://img.shields.io/pypi/v/dynamodump)](https://pypi.org/project/dynamodump)
  9 | [![PyPI pyversions](https://img.shields.io/pypi/pyversions/dynamodump.svg)](https://pypi.org/project/dynamodump)
 10 | ![Code Style](https://img.shields.io/badge/code%20style-black-black)
 11 | 
 12 | Simple backup and restore script for Amazon DynamoDB using AWS SDK for Python (boto3) to work similarly to mysqldump.
 13 | 
 14 | Suitable for DynamoDB usages of smaller data volume which do not warrant the usage of AWS Data Pipeline for backup/restores/empty.
 15 | 
 16 | dynamodump supports local DynamoDB instances as well (tested with [DynamoDB Local](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html)).
 17 | 
 18 | ## Table of Contents
 19 | 
 20 | - [Installation](#installation)
 21 | - [Usage](#usage)
 22 | - [Script (unattended) usage](#script-unattended-usage)
 23 | - [Docker CLI usage](#docker-cli-usage)
 24 | - [AWS example](#aws-example)
 25 | - [Local example](#local-example)
 26 | - [Development](#development)
 27 | 
 28 | ## Installation
 29 | 
 30 | ```
 31 | pip install dynamodump
 32 | ```
 33 | 
 34 | ## Usage
 35 | 
 36 | ```
 37 | usage: dynamodump.py [-h] [-a {zip,tar}] [-b BUCKET] [-m {backup,restore,empty}] [-r REGION] [--host HOST] [--port PORT] [--accessKey ACCESSKEY] [--secretKey SECRETKEY] [-p PROFILE] [-s SRCTABLE] [-d DESTTABLE]
 38 |                      [--prefixSeparator PREFIXSEPARATOR] [--noSeparator] [--readCapacity READCAPACITY] [-t TAG] [--writeCapacity WRITECAPACITY] [--schemaOnly] [--dataOnly] [--noConfirm] [--skipThroughputUpdate]
 39 |                      [--dumpPath DUMPPATH] [--billingMode {PROVISIONED,PAY_PER_REQUEST}] [--log LOG] [--limit LIMIT] [-f FILTEROPTION]
 40 | 
 41 | Simple DynamoDB backup/restore/empty.
 42 | 
 43 | options:
 44 |   -h, --help            show this help message and exit
 45 |   -a {zip,tar}, --archive {zip,tar}
 46 |                         Type of compressed archive to create. If unset, don't create archive
 47 |   -b BUCKET, --bucket BUCKET
 48 |                         S3 bucket in which to store or retrieve backups. [must already exist]
 49 |   -m {backup,restore,empty}, --mode {backup,restore,empty}
 50 |                         Operation to perform
 51 |   -r REGION, --region REGION
 52 |                         AWS region to use, e.g. 'us-west-1'. Can use any region for local testing
 53 |   --host HOST           Host of local DynamoDB. This parameter initialises dynamodump for local DynamoDB testing [required only for local]
 54 |   --port PORT           Port of local DynamoDB [required only for local]
 55 |   --accessKey ACCESSKEY
 56 |                         Access key of local DynamoDB [required only for local]
 57 |   --secretKey SECRETKEY
 58 |                         Secret key of local DynamoDB [required only for local]
 59 |   -p PROFILE, --profile PROFILE
 60 |                         AWS credentials file profile to use. Allows you to use a profile instead accessKey, secretKey authentication
 61 |   -s SRCTABLE, --srcTable SRCTABLE
 62 |                         Source DynamoDB table name to backup or restore from, use 'tablename*' for wildcard prefix selection or '*' for all tables. Mutually exclusive with --tag
 63 |   -d DESTTABLE, --destTable DESTTABLE
 64 |                         Destination DynamoDB table name to backup or restore to, use 'tablename*' for wildcard prefix selection (defaults to use '-' separator) [optional, defaults to source]
 65 |   --prefixSeparator PREFIXSEPARATOR
 66 |                         Specify a different prefix separator, e.g. '.' [optional]
 67 |   --noSeparator         Overrides the use of a prefix separator for backup wildcard searches [optional]
 68 |   --readCapacity READCAPACITY
 69 |                         Change the temp read capacity of the DynamoDB table to backup from [optional]
 70 |   -t TAG, --tag TAG     Tag to use for identifying tables to back up. Mutually exclusive with srcTable. Provided as KEY=VALUE
 71 |   --writeCapacity WRITECAPACITY
 72 |                         Change the temp write capacity of the DynamoDB table to restore to [defaults to 25, optional]
 73 |   --schemaOnly          Backup or restore the schema only. Do not backup/restore data. Can be used with both backup and restore modes. Cannot be used with the --dataOnly [optional]
 74 |   --dataOnly            Restore data only. Do not delete/recreate schema [optional for restore]
 75 |   --noConfirm           Don't ask for confirmation before deleting existing schemas.
 76 |   --skipThroughputUpdate
 77 |                         Skip updating throughput values across tables [optional]
 78 |   --dumpPath DUMPPATH   Directory to place and search for DynamoDB table backups (defaults to use 'dump') [optional]
 79 |   --billingMode {PROVISIONED,PAY_PER_REQUEST}
 80 |                         Set billing mode between PROVISIONED|PAY_PER_REQUEST (defaults to use 'PROVISIONED') [optional]
 81 |   --log LOG             Logging level - DEBUG|INFO|WARNING|ERROR|CRITICAL [optional]
 82 |   --limit LIMIT         Limit option for backup, will stop the back up process after number of backed up items reaches the limit [optional]
 83 |   -f FILTEROPTION, --filterOption FILTEROPTION
 84 |                         Filter option for backup, JSON file of which keys are ['FilterExpression', 'ExpressionAttributeNames', 'ExpressionAttributeValues']
 85 | ```
 86 | 
 87 | Backup files are stored in a 'dump' subdirectory, and are restored from there as well by default.
 88 | 
 89 | ## Script (unattended) usage
 90 | 
 91 | As of v1.2.0, note that `--noConfirm` is required to perform data restores involving deletions without any confirmation.
 92 | 
 93 | ## Docker CLI usage
 94 | 
 95 | ```
 96 | docker run --rm -it bchew/dynamodump -h
 97 | ```
 98 | 
 99 | dynamodump container images are also published to Amazon ECR Public and GitHub Packages:
100 | 
101 | ```
102 | public.ecr.aws/bchew/dynamodump
103 | ghcr.io/bchew/dynamodump
104 | ```
105 | 
106 | Links to the registries specified:
107 | - [Docker Hub](https://hub.docker.com/r/bchew/dynamodump)
108 | - [Amazon ECR Public Gallery](https://gallery.ecr.aws/bchew/dynamodump)
109 | - [GitHub Packages](https://github.com/bchew/dynamodump/pkgs/container/dynamodump)
110 | 
111 | ## AWS example
112 | 
113 | Single table backup/restore:
114 | 
115 | ```
116 | dynamodump -m backup -r us-west-1 -s testTable
117 | 
118 | dynamodump -m restore -r us-west-1 -s testTable
119 | ```
120 | 
121 | Multiple table backup/restore (assumes prefix of 'production-' of table names, use --prefixSeparator to specify a
122 | different separator):
123 | 
124 | ```
125 | dynamodump -m backup -r us-west-1 -s production*
126 | 
127 | dynamodump -m restore -r us-west-1 -s production*
128 | ```
129 | 
130 | The above, but between different environments (e.g. production-_ tables to development-_ tables):
131 | 
132 | ```
133 | dynamodump -m backup -r us-west-1 -s production*
134 | 
135 | dynamodump -m restore -r us-west-1 -s production* -d development*
136 | ```
137 | 
138 | Backup all tables and restore only data (will not delete and recreate schema):
139 | 
140 | ```
141 | dynamodump -m backup -r us-west-1 -s "*"
142 | 
143 | dynamodump -m restore -r us-west-1 -s "*" --dataOnly
144 | ```
145 | 
146 | Dump all table schemas and create the schemas (e.g. creating blank tables in a different AWS account):
147 | 
148 | ```
149 | dynamodump -m backup -r us-west-1 -p source_credentials -s "*" --schemaOnly
150 | 
151 | dynamodump -m restore -r us-west-1 -p destination_credentials -s "*" --schemaOnly
152 | ```
153 | 
154 | Backup all tables based on AWS tag `key=value`
155 | 
156 | ```
157 | dynamodump -p profile -r us-east-1 -m backup -t KEY=VALUE
158 | ```
159 | 
160 | Backup all tables based on AWS tag, compress and store in specified S3 bucket.
161 | 
162 | ```
163 | dynamodump -p profile -r us-east-1 -m backup -a tar -b some_s3_bucket -t TAG_KEY=TAG_VALUE
164 | 
165 | dynamodump -p profile -r us-east-1 -m backup -a zip -b some_s3_bucket -t TAG_KEY=TAG_VALUE
166 | ```
167 | 
168 | Restore from S3 bucket to specified destination table
169 | 
170 | ```
171 | ## source_table identifies archive file in S3 bucket from which backup data is restored
172 | dynamodump -a tar -b some_s3_bucket -m restore -r us-east-1 -p profile -d destination_table -s source_table
173 | ```
174 | 
175 | ## Local example
176 | 
177 | The following assumes your local DynamoDB is running on localhost:8000 and is accessible via 'a' as access/secret keys.
178 | You must specify the host to get local behavior.
179 | 
180 | ```
181 | dynamodump -m backup -r local -s testTable --host localhost --port 8000 --accessKey a --secretKey a
182 | 
183 | dynamodump -m restore -r local -s testTable --host localhost --port 8000 --accessKey a --secretKey a
184 | ```
185 | 
186 | Multiple table backup/restore as stated in the AWS examples are also available for local.
187 | 
188 | ## Development
189 | 
190 | ```
191 | python3 -m venv env
192 | source env/bin/activate
193 | 
194 | # install dev requirements
195 | pip3 install -r requirements-dev.txt
196 | 
197 | # one-time install of pre-commit hooks
198 | pre-commit install
199 | ```
200 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.8'
2 | services:
3 |   dynamodb-local:
4 |     command: "-jar DynamoDBLocal.jar -sharedDb -inMemory"
5 |     image: "amazon/dynamodb-local:latest"
6 |     container_name: dynamodb-local
7 |     ports:
8 |       - "8000:8000"
9 | 


--------------------------------------------------------------------------------
/dynamodump/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bchew/dynamodump/4546edbe57c5e4062215f9ad5cdf7e676e4f6376/dynamodump/__init__.py


--------------------------------------------------------------------------------
/dynamodump/dynamodump.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | """
   3 | Simple backup and restore script for Amazon DynamoDB using boto to work similarly to mysqldump.
   4 | 
   5 | Suitable for DynamoDB usages of smaller data volume which do not warrant the usage of AWS
   6 | Data Pipeline for backup/restores/empty.
   7 | 
   8 | dynamodump supports local DynamoDB instances as well (tested with DynamoDB Local).
   9 | """
  10 | 
  11 | import argparse
  12 | import base64
  13 | import boto3
  14 | import datetime
  15 | import errno
  16 | import fnmatch
  17 | import json
  18 | import logging
  19 | import os
  20 | import re
  21 | import shutil
  22 | import sys
  23 | import tarfile
  24 | import threading
  25 | import time
  26 | import zipfile
  27 | from queue import Queue
  28 | from six.moves import input
  29 | from urllib.error import URLError, HTTPError
  30 | from urllib.request import urlopen
  31 | 
  32 | 
  33 | AWS_SLEEP_INTERVAL = 10  # seconds
  34 | BATCH_WRITE_SLEEP_INTERVAL = 0.15  # seconds
  35 | DATA_DIR = "data"
  36 | DATA_DUMP = "dump"
  37 | DEFAULT_PREFIX_SEPARATOR = "-"
  38 | CURRENT_WORKING_DIR = os.getcwd()
  39 | JSON_INDENT = 2
  40 | LOCAL_SLEEP_INTERVAL = 1  # seconds
  41 | LOG_LEVEL = "INFO"
  42 | MAX_BATCH_WRITE = 25  # DynamoDB limit
  43 | MAX_NUMBER_BACKUP_WORKERS = 25
  44 | MAX_RETRY = 6
  45 | METADATA_URL = "http://169.254.169.254/latest/meta-data/"
  46 | PAY_PER_REQUEST_BILLING_MODE = "PAY_PER_REQUEST"
  47 | PROVISIONED_BILLING_MODE = "PROVISIONED"
  48 | RESTORE_WRITE_CAPACITY = 25
  49 | RESTORE_READ_CAPACITY = 25
  50 | SCHEMA_FILE = "schema.json"
  51 | THREAD_START_DELAY = 1  # seconds
  52 | 
  53 | 
  54 | def encoder(self, obj):
  55 |     if isinstance(obj, datetime.datetime):
  56 |         return obj.isoformat()
  57 | 
  58 |     if isinstance(obj, bytes):
  59 |         return base64.b64encode(obj).decode("utf-8")
  60 | 
  61 |     return json.JSONEncoder.encoder(self, obj)
  62 | 
  63 | 
  64 | json.JSONEncoder.default = encoder
  65 | 
  66 | 
  67 | def process_item_types(dct):
  68 |     for item in dct["Items"]:
  69 |         for key in item:
  70 |             val = item[key]
  71 |             if "B" in val:
  72 |                 item[key]["B"] = base64.b64decode(val["B"].encode("utf-8"))
  73 | 
  74 | 
  75 | def _get_aws_client(
  76 |     service: str,
  77 |     profile: str = None,
  78 |     region: str = None,
  79 |     secret_key: str = None,
  80 |     access_key: str = None,
  81 |     endpoint_url: str = None,
  82 | ):
  83 |     """
  84 |     Build connection to some AWS service.
  85 |     """
  86 | 
  87 |     if region:
  88 |         aws_region = region
  89 |     else:
  90 |         aws_region = os.getenv("AWS_DEFAULT_REGION")
  91 | 
  92 |     # Fallback to querying metadata for region
  93 |     if not aws_region:
  94 |         try:
  95 |             azone = (
  96 |                 urlopen(
  97 |                     METADATA_URL + "placement/availability-zone", data=None, timeout=5
  98 |                 )
  99 |                 .read()
 100 |                 .decode()
 101 |             )
 102 |             aws_region = azone[:-1]
 103 |         except HTTPError as e:
 104 |             logging.exception(
 105 |                 "Error determining region used for AWS client.  Typo in code?\n\n"
 106 |                 + str(e)
 107 |             )
 108 |             sys.exit(1)
 109 |         except URLError:
 110 |             logging.exception("Timed out connecting to metadata service.\n\n")
 111 |             sys.exit(1)
 112 | 
 113 |     if profile:
 114 |         session = boto3.Session(
 115 |             profile_name=profile,
 116 |             aws_access_key_id=access_key,
 117 |             aws_secret_access_key=secret_key,
 118 |         )
 119 |         client = session.client(service, region_name=aws_region)
 120 |     else:
 121 |         client = boto3.client(
 122 |             service,
 123 |             region_name=aws_region,
 124 |             aws_access_key_id=access_key,
 125 |             aws_secret_access_key=secret_key,
 126 |             endpoint_url=endpoint_url,
 127 |         )
 128 |     return client
 129 | 
 130 | 
 131 | def get_table_name_by_tag(profile, region, tag):
 132 |     """
 133 |     Using provided connection to dynamodb and tag, get all tables that have provided tag
 134 | 
 135 |     Profile provided and, if needed, used to build connection to STS.
 136 |     """
 137 | 
 138 |     matching_tables = []
 139 |     all_tables = []
 140 |     sts = _get_aws_client(profile=profile, region=region, service="sts")
 141 |     dynamo = _get_aws_client(profile=profile, region=region, service="dynamodb")
 142 |     account_number = sts.get_caller_identity().get("Account")
 143 |     paginator = dynamo.get_paginator(operation_name="list_tables")
 144 |     tag_key = tag.split("=")[0]
 145 |     tag_value = tag.split("=")[1]
 146 | 
 147 |     get_all_tables = paginator.paginate()
 148 |     for page in get_all_tables:
 149 |         for table in page["TableNames"]:
 150 |             all_tables.append(table)
 151 |             logging.debug("Found table " + table)
 152 | 
 153 |     for table in all_tables:
 154 |         table_arn = "arn:aws:dynamodb:{}:{}:table/{}".format(
 155 |             region, account_number, table
 156 |         )
 157 |         table_tags = dynamo.list_tags_of_resource(ResourceArn=table_arn)
 158 |         for found_tag in table_tags["Tags"]:
 159 |             if found_tag["Key"] == tag_key:
 160 |                 logging.debug("Checking table " + table + " tag " + found_tag["Key"])
 161 |                 if found_tag["Value"] == tag_value:
 162 |                     matching_tables.append(table)
 163 |                     logging.info("Matched table " + table)
 164 | 
 165 |     return matching_tables
 166 | 
 167 | 
 168 | def do_put_bucket_object(profile, region, bucket, bucket_object):
 169 |     """
 170 |     Put object into bucket.  Only called if we've also created an archive file with do_archive()
 171 | 
 172 |     Bucket must exist prior to running this function.
 173 |     profile could be None.
 174 |     bucket_object is file to be uploaded
 175 |     """
 176 | 
 177 |     s3 = _get_aws_client(profile=profile, region=region, service="s3")
 178 |     logging.info("Uploading backup to S3 bucket " + bucket)
 179 |     try:
 180 |         s3.upload_file(
 181 |             bucket_object,
 182 |             bucket,
 183 |             bucket_object,
 184 |             ExtraArgs={"ServerSideEncryption": "AES256"},
 185 |         )
 186 |     except s3.exceptions.ClientError as e:
 187 |         logging.exception("Failed to put file to S3 bucket\n\n" + str(e))
 188 |         sys.exit(1)
 189 | 
 190 | 
 191 | def do_get_s3_archive(profile, region, bucket, table, archive):
 192 |     """
 193 |     Fetch latest file named filename from S3
 194 | 
 195 |     Bucket must exist prior to running this function.
 196 |     filename is args.dumpPath.  File would be "args.dumpPath" with suffix .tar.bz2 or .zip
 197 |     """
 198 | 
 199 |     s3 = _get_aws_client(profile=profile, region=region, service="s3")
 200 | 
 201 |     if archive:
 202 |         if archive == "tar":
 203 |             archive_type = "tar.bz2"
 204 |         else:
 205 |             archive_type = "zip"
 206 | 
 207 |     # Make sure bucket exists before continuing
 208 |     try:
 209 |         s3.head_bucket(Bucket=bucket)
 210 |     except s3.exceptions.ClientError as e:
 211 |         logging.exception(
 212 |             "S3 bucket " + bucket + " does not exist. "
 213 |             "Can't get backup file\n\n" + str(e)
 214 |         )
 215 |         sys.exit(1)
 216 | 
 217 |     try:
 218 |         contents = s3.list_objects_v2(Bucket=bucket, Prefix=args.dumpPath)
 219 |     except s3.exceptions.ClientError as e:
 220 |         logging.exception(
 221 |             "Issue listing contents of bucket " + bucket + "\n\n" + str(e)
 222 |         )
 223 |         sys.exit(1)
 224 | 
 225 |     # Script will always overwrite older backup.  Bucket versioning stores multiple backups.
 226 |     # Therefore, just get item from bucket based on table name since that's what we name the files.
 227 |     filename = None
 228 |     for d in contents["Contents"]:
 229 |         if d["Key"] == "{}/{}.{}".format(args.dumpPath, table, archive_type):
 230 |             filename = d["Key"]
 231 | 
 232 |     if not filename:
 233 |         logging.exception(
 234 |             "Unable to find file to restore from.  "
 235 |             "Confirm the name of the table you're restoring."
 236 |         )
 237 |         sys.exit(1)
 238 | 
 239 |     output_file = "/tmp/" + os.path.basename(filename)
 240 |     logging.info("Downloading file " + filename + " to " + output_file)
 241 |     s3.download_file(bucket, filename, output_file)
 242 | 
 243 |     # Extract archive based on suffix
 244 |     if tarfile.is_tarfile(output_file):
 245 |         try:
 246 |             logging.info("Extracting tar file...")
 247 |             with tarfile.open(name=output_file, mode="r:bz2") as a:
 248 |                 a.extractall(path=".")
 249 |         except tarfile.ReadError as e:
 250 |             logging.exception("Error reading downloaded archive\n\n" + str(e))
 251 |             sys.exit(1)
 252 |         except tarfile.ExtractError as e:
 253 |             # ExtractError is raised for non-fatal errors on extract method
 254 |             logging.error("Error during extraction: " + str(e))
 255 | 
 256 |     # Assuming zip file here since we're only supporting tar and zip at this time
 257 |     else:
 258 |         try:
 259 |             logging.info("Extracting zip file...")
 260 |             with zipfile.ZipFile(output_file, "r") as z:
 261 |                 z.extractall(path=".")
 262 |         except zipfile.BadZipFile as e:
 263 |             logging.exception("Problem extracting zip file\n\n" + str(e))
 264 |             sys.exit(1)
 265 | 
 266 | 
 267 | def do_archive(archive_type, dump_path):
 268 |     """
 269 |     Create compressed archive of dump_path.
 270 | 
 271 |     Accepts archive_type of zip or tar and requires dump_path, directory added to archive
 272 |     """
 273 | 
 274 |     archive_base = dump_path
 275 | 
 276 |     if archive_type.lower() == "tar":
 277 |         archive = archive_base + ".tar.bz2"
 278 |         try:
 279 |             logging.info("Creating tar file " + archive + "...")
 280 |             with tarfile.open(name=archive, mode="w:bz2") as a:
 281 |                 for root, dirs, files in os.walk(archive_base):
 282 |                     for file in files:
 283 |                         a.add(os.path.join(root, file))
 284 |                 return True, archive
 285 |         except tarfile.CompressionError as e:
 286 |             logging.exception(
 287 |                 "compression method is not supported or the data cannot be"
 288 |                 " decoded properly.\n\n" + str(e)
 289 |             )
 290 |             sys.exit(1)
 291 |         except tarfile.TarError as e:
 292 |             logging.exception("Error creating tarfile archive.\n\n" + str(e))
 293 |             sys.exit(1)
 294 | 
 295 |     elif archive_type.lower() == "zip":
 296 |         try:
 297 |             logging.info("Creating zip file...")
 298 |             archive = archive_base + ".zip"
 299 |             with zipfile.ZipFile(archive, "w") as z:
 300 |                 for root, dirs, files in os.walk(archive_base):
 301 |                     for file in files:
 302 |                         z.write(os.path.join(root, file))
 303 |                 return True, archive
 304 |         except zipfile.BadZipFile as e:
 305 |             logging.exception("Problem creating zip file\n\n" + str(e))
 306 |             sys.exit(1)
 307 |         except zipfile.LargeZipFile:
 308 |             logging.exception(
 309 |                 "Zip file would be too large.  Update code to use Zip64 to continue."
 310 |             )
 311 |             sys.exit(1)
 312 | 
 313 |     else:
 314 |         logging.error(
 315 |             "Unsupported archive format received.  Probably shouldn't have "
 316 |             "made it to this code path.  Skipping attempt at creating archive file"
 317 |         )
 318 |         return False, None
 319 | 
 320 | 
 321 | def get_table_name_matches(conn, table_name_wildcard):
 322 |     """
 323 |     Find tables to backup
 324 |     """
 325 | 
 326 |     all_tables = []
 327 |     last_evaluated_table_name = None
 328 | 
 329 |     while True:
 330 |         optional_args = {}
 331 |         if last_evaluated_table_name is not None:
 332 |             optional_args["ExclusiveStartTableName"] = last_evaluated_table_name
 333 |         table_list = conn.list_tables(**optional_args)
 334 |         all_tables.extend(table_list["TableNames"])
 335 | 
 336 |         try:
 337 |             last_evaluated_table_name = table_list["LastEvaluatedTableName"]
 338 |         except KeyError:
 339 |             break
 340 | 
 341 |     matching_tables = []
 342 |     for table_name in all_tables:
 343 |         if fnmatch.fnmatch(table_name, table_name_wildcard):
 344 |             logging.info("Adding %s", table_name)
 345 |             matching_tables.append(table_name)
 346 | 
 347 |     return matching_tables
 348 | 
 349 | 
 350 | def get_restore_table_matches(table_name_wildcard, separator):
 351 |     """
 352 |     Find tables to restore
 353 |     """
 354 | 
 355 |     matching_tables = []
 356 |     try:
 357 |         dir_list = os.listdir("./" + args.dumpPath)
 358 |     except OSError:
 359 |         logging.info(
 360 |             'Cannot find "./%s", Now trying user provided absolute dump path..'
 361 |             % args.dumpPath
 362 |         )
 363 |         try:
 364 |             dir_list = os.listdir(args.dumpPath)
 365 |         except OSError:
 366 |             logging.info(
 367 |                 'Cannot find "%s", Now trying current working directory..'
 368 |                 % args.dumpPath
 369 |             )
 370 |             dump_data_path = CURRENT_WORKING_DIR
 371 |             try:
 372 |                 dir_list = os.listdir(dump_data_path)
 373 |             except OSError:
 374 |                 logging.info(
 375 |                     'Cannot find "%s" directory containing dump files!' % dump_data_path
 376 |                 )
 377 |                 sys.exit(1)
 378 | 
 379 |     for dir_name in dir_list:
 380 |         if table_name_wildcard == "*":
 381 |             matching_tables.append(dir_name)
 382 |         elif separator == "":
 383 |             if dir_name.startswith(
 384 |                 re.sub(
 385 |                     r"([A-Z])", r" \1", table_name_wildcard.split("*", 1)[0]
 386 |                 ).split()[0]
 387 |             ):
 388 |                 matching_tables.append(dir_name)
 389 |         elif dir_name.split(separator, 1)[0] == table_name_wildcard.split("*", 1)[0]:
 390 |             matching_tables.append(dir_name)
 391 | 
 392 |     return matching_tables
 393 | 
 394 | 
 395 | def change_prefix(source_table_name, source_wildcard, destination_wildcard, separator):
 396 |     """
 397 |     Update prefix used for searching tables
 398 |     """
 399 | 
 400 |     source_prefix = source_wildcard.split("*", 1)[0]
 401 |     destination_prefix = destination_wildcard.split("*", 1)[0]
 402 |     if separator == "":
 403 |         if re.sub(r"([A-Z])", r" \1", source_table_name).split()[0] == source_prefix:
 404 |             return destination_prefix + re.sub(
 405 |                 r"([A-Z])", r" \1", source_table_name
 406 |             ).split(" ", 1)[1].replace(" ", "")
 407 |     if source_table_name.split(separator, 1)[0] == source_prefix:
 408 |         return destination_prefix + separator + source_table_name.split(separator, 1)[1]
 409 | 
 410 | 
 411 | def delete_table(conn, sleep_interval: int, table_name: str):
 412 |     """
 413 |     Delete table table_name
 414 |     """
 415 | 
 416 |     if not args.dataOnly:
 417 |         if not args.noConfirm:
 418 |             confirmation = input(
 419 |                 "About to delete table {}. Type 'yes' to continue: ".format(table_name)
 420 |             )
 421 |             if confirmation != "yes":
 422 |                 logging.warn("Confirmation not received. Stopping.")
 423 |                 sys.exit(1)
 424 |         while True:
 425 |             # delete table if exists
 426 |             table_exist = True
 427 |             try:
 428 |                 conn.delete_table(TableName=table_name)
 429 |             except conn.exceptions.ResourceNotFoundException:
 430 |                 table_exist = False
 431 |                 logging.info(table_name + " table deleted!")
 432 |                 break
 433 |             except conn.exceptions.LimitExceededException:
 434 |                 logging.info(
 435 |                     "Limit exceeded, retrying deletion of " + table_name + ".."
 436 |                 )
 437 |                 time.sleep(sleep_interval)
 438 |             except conn.exceptions.ProvisionedThroughputExceededException:
 439 |                 logging.info(
 440 |                     "Control plane limit exceeded, retrying deletion of "
 441 |                     + table_name
 442 |                     + ".."
 443 |                 )
 444 |                 time.sleep(sleep_interval)
 445 |             except conn.exceptions.ResourceInUseException:
 446 |                 logging.info(table_name + " table is being deleted..")
 447 |                 time.sleep(sleep_interval)
 448 |             except conn.exceptions.ClientError as e:
 449 |                 logging.exception(e)
 450 |                 sys.exit(1)
 451 | 
 452 |         # if table exists, wait till deleted
 453 |         if table_exist:
 454 |             try:
 455 |                 while True:
 456 |                     logging.info(
 457 |                         "Waiting for "
 458 |                         + table_name
 459 |                         + " table to be deleted.. ["
 460 |                         + conn.describe_table(table_name)["Table"]["TableStatus"]
 461 |                         + "]"
 462 |                     )
 463 |                     time.sleep(sleep_interval)
 464 |             except conn.exceptions.ResourceNotFoundException:
 465 |                 logging.info(table_name + " table deleted.")
 466 |             except conn.exceptions.ClientError as e:
 467 |                 logging.exception(e)
 468 |                 sys.exit(1)
 469 | 
 470 | 
 471 | def mkdir_p(path):
 472 |     """
 473 |     Create directory to hold dump
 474 |     """
 475 | 
 476 |     try:
 477 |         os.makedirs(path)
 478 |     except OSError as exc:
 479 |         if not (exc.errno == errno.EEXIST and os.path.isdir(path)):
 480 |             raise
 481 | 
 482 | 
 483 | def batch_write(conn, sleep_interval, table_name, put_requests):
 484 |     """
 485 |     Write data to table_name
 486 |     """
 487 | 
 488 |     request_items = {table_name: put_requests}
 489 |     i = 1
 490 |     sleep = sleep_interval
 491 |     while True:
 492 |         response = conn.batch_write_item(RequestItems=request_items)
 493 |         unprocessed_items = response["UnprocessedItems"]
 494 | 
 495 |         if len(unprocessed_items) == 0:
 496 |             break
 497 |         if len(unprocessed_items) > 0 and i <= MAX_RETRY:
 498 |             logging.debug(
 499 |                 str(len(unprocessed_items))
 500 |                 + " unprocessed items, retrying after %s seconds.. [%s/%s]"
 501 |                 % (str(sleep), str(i), str(MAX_RETRY))
 502 |             )
 503 |             request_items = unprocessed_items
 504 |             time.sleep(sleep)
 505 |             sleep += sleep_interval
 506 |             i += 1
 507 |         else:
 508 |             logging.info(
 509 |                 "Max retries reached, failed to processed batch write: "
 510 |                 + json.dumps(unprocessed_items, indent=JSON_INDENT)
 511 |             )
 512 |             logging.info("Ignoring and continuing..")
 513 |             break
 514 | 
 515 | 
 516 | def wait_for_active_table(conn, table_name, verb):
 517 |     """
 518 |     Wait for table to be indesired state
 519 |     """
 520 | 
 521 |     while True:
 522 |         if (
 523 |             conn.describe_table(TableName=table_name)["Table"]["TableStatus"]
 524 |             != "ACTIVE"
 525 |         ):
 526 |             logging.info(
 527 |                 "Waiting for "
 528 |                 + table_name
 529 |                 + " table to be "
 530 |                 + verb
 531 |                 + ".. ["
 532 |                 + conn.describe_table(TableName=table_name)["Table"]["TableStatus"]
 533 |                 + "]"
 534 |             )
 535 |             time.sleep(sleep_interval)
 536 |         else:
 537 |             logging.info(table_name + " " + verb + ".")
 538 |             break
 539 | 
 540 | 
 541 | def update_provisioned_throughput(
 542 |     conn, table_name, read_capacity, write_capacity, wait=True
 543 | ):
 544 |     """
 545 |     Update provisioned throughput on the table to provided values
 546 |     """
 547 | 
 548 |     logging.info(
 549 |         "Updating "
 550 |         + table_name
 551 |         + " table read capacity to: "
 552 |         + str(read_capacity)
 553 |         + ", write capacity to: "
 554 |         + str(write_capacity)
 555 |     )
 556 |     while True:
 557 |         try:
 558 |             conn.update_table(
 559 |                 TableName=table_name,
 560 |                 ProvisionedThroughput={
 561 |                     "ReadCapacityUnits": int(read_capacity),
 562 |                     "WriteCapacityUnits": int(write_capacity),
 563 |                 },
 564 |             )
 565 |             break
 566 |         except conn.exceptions.ResourceNotFoundException:
 567 |             logging.info(
 568 |                 "Limit exceeded, retrying updating throughput of " + table_name + ".."
 569 |             )
 570 |             time.sleep(sleep_interval)
 571 |         except conn.exceptions.ProvisionedThroughputExceededException:
 572 |             logging.info(
 573 |                 "Control plane limit exceeded, retrying updating throughput"
 574 |                 "of " + table_name + ".."
 575 |             )
 576 |             time.sleep(sleep_interval)
 577 | 
 578 |     # wait for provisioned throughput update completion
 579 |     if wait:
 580 |         wait_for_active_table(conn, table_name, "updated")
 581 | 
 582 | 
 583 | def do_empty(dynamo, table_name, billing_mode):
 584 |     """
 585 |     Empty table named table_name
 586 |     """
 587 | 
 588 |     logging.info("Starting Empty for " + table_name + "..")
 589 | 
 590 |     # get table schema
 591 |     logging.info("Fetching table schema for " + table_name)
 592 |     table_data = dynamo.describe_table(TableName=table_name)
 593 | 
 594 |     table_desc = table_data["Table"]
 595 |     table_attribute_definitions = table_desc["AttributeDefinitions"]
 596 |     table_key_schema = table_desc["KeySchema"]
 597 |     original_read_capacity = table_desc["ProvisionedThroughput"]["ReadCapacityUnits"]
 598 |     original_write_capacity = table_desc["ProvisionedThroughput"]["WriteCapacityUnits"]
 599 |     table_local_secondary_indexes = table_desc.get("LocalSecondaryIndexes")
 600 |     table_global_secondary_indexes = table_desc.get("GlobalSecondaryIndexes")
 601 | 
 602 |     optional_args = {}
 603 |     if billing_mode == PROVISIONED_BILLING_MODE:
 604 |         table_provisioned_throughput = {
 605 |             "ReadCapacityUnits": int(original_read_capacity),
 606 |             "WriteCapacityUnits": int(original_write_capacity),
 607 |         }
 608 |         optional_args["ProvisionedThroughput"] = table_provisioned_throughput
 609 | 
 610 |     if table_local_secondary_indexes is not None:
 611 |         optional_args["LocalSecondaryIndexes"] = table_local_secondary_indexes
 612 | 
 613 |     if table_global_secondary_indexes is not None:
 614 |         optional_args["GlobalSecondaryIndexes"] = table_global_secondary_indexes
 615 | 
 616 |     table_provisioned_throughput = {
 617 |         "ReadCapacityUnits": int(original_read_capacity),
 618 |         "WriteCapacityUnits": int(original_write_capacity),
 619 |     }
 620 | 
 621 |     logging.info("Deleting Table " + table_name)
 622 | 
 623 |     delete_table(dynamo, sleep_interval, table_name)
 624 | 
 625 |     logging.info("Creating Table " + table_name)
 626 | 
 627 |     while True:
 628 |         try:
 629 |             dynamo.create_table(
 630 |                 AttributeDefinitions=table_attribute_definitions,
 631 |                 TableName=table_name,
 632 |                 KeySchema=table_key_schema,
 633 |                 BillingMode=billing_mode,
 634 |                 **optional_args
 635 |             )
 636 |             break
 637 |         except dynamo.exceptions.LimitExceededException:
 638 |             logging.info("Limit exceeded, retrying creation of " + table_name + "..")
 639 |             time.sleep(sleep_interval)
 640 |         except dynamo.exceptions.ProvisionedThroughputExceededException:
 641 |             logging.info(
 642 |                 "Control plane limit exceeded, retrying creation of "
 643 |                 + table_name
 644 |                 + ".."
 645 |             )
 646 |             time.sleep(sleep_interval)
 647 |         except dynamo.exceptions.ClientError as e:
 648 |             logging.exception(e)
 649 |             sys.exit(1)
 650 | 
 651 |     # wait for table creation completion
 652 |     wait_for_active_table(dynamo, table_name, "created")
 653 | 
 654 |     logging.info(
 655 |         "Recreation of "
 656 |         + table_name
 657 |         + " completed. Time taken: "
 658 |         + str(datetime.datetime.now().replace(microsecond=0) - start_time)
 659 |     )
 660 | 
 661 | 
 662 | def do_backup(
 663 |     dynamo,
 664 |     read_capacity,
 665 |     table_queue=None,
 666 |     src_table=None,
 667 |     filter_option=None,
 668 |     limit=None,
 669 | ):
 670 |     """
 671 |     Connect to DynamoDB and perform the backup for src_table or each table in table_queue
 672 |     """
 673 | 
 674 |     if src_table:
 675 |         table_name = src_table
 676 | 
 677 |     if table_queue:
 678 |         while True:
 679 |             table_name = table_queue.get()
 680 |             if table_name is None:
 681 |                 break
 682 | 
 683 |             logging.info("Starting backup for " + table_name + "..")
 684 | 
 685 |             # trash data, re-create subdir
 686 |             if os.path.exists(args.dumpPath + os.sep + table_name):
 687 |                 shutil.rmtree(args.dumpPath + os.sep + table_name)
 688 |             mkdir_p(args.dumpPath + os.sep + table_name)
 689 | 
 690 |             # get table schema
 691 |             logging.info("Dumping table schema for " + table_name)
 692 |             f = open(args.dumpPath + os.sep + table_name + os.sep + SCHEMA_FILE, "w+")
 693 |             table_desc = dynamo.describe_table(TableName=table_name)
 694 |             f.write(json.dumps(table_desc, indent=JSON_INDENT))
 695 |             f.close()
 696 | 
 697 |             if not args.schemaOnly:
 698 |                 original_read_capacity = table_desc["Table"]["ProvisionedThroughput"][
 699 |                     "ReadCapacityUnits"
 700 |                 ]
 701 |                 original_write_capacity = table_desc["Table"]["ProvisionedThroughput"][
 702 |                     "WriteCapacityUnits"
 703 |                 ]
 704 | 
 705 |                 # override table read capacity if specified
 706 |                 if (
 707 |                     read_capacity is not None
 708 |                     and read_capacity != original_read_capacity
 709 |                 ):
 710 |                     update_provisioned_throughput(
 711 |                         dynamo, table_name, read_capacity, original_write_capacity
 712 |                     )
 713 | 
 714 |                 # get table data
 715 |                 logging.info("Dumping table items for " + table_name)
 716 |                 mkdir_p(args.dumpPath + os.sep + table_name + os.sep + DATA_DIR)
 717 | 
 718 |                 i = 1
 719 |                 num_items = 0
 720 |                 last_evaluated_key = None
 721 | 
 722 |                 while True:
 723 |                     try:
 724 |                         optional_args = {}
 725 |                         if last_evaluated_key is not None:
 726 |                             optional_args["ExclusiveStartKey"] = last_evaluated_key
 727 |                         if filter_option is not None:
 728 |                             optional_args.update(filter_option)
 729 |                         scanned_table = dynamo.scan(
 730 |                             TableName=table_name, **optional_args
 731 |                         )
 732 |                     except dynamo.exceptions.ProvisionedThroughputExceededException:
 733 |                         logging.error(
 734 |                             "EXCEEDED THROUGHPUT ON TABLE "
 735 |                             + table_name
 736 |                             + ".  BACKUP FOR IT IS USELESS."
 737 |                         )
 738 |                         table_queue.task_done()
 739 | 
 740 |                     f = open(
 741 |                         args.dumpPath
 742 |                         + os.sep
 743 |                         + table_name
 744 |                         + os.sep
 745 |                         + DATA_DIR
 746 |                         + os.sep
 747 |                         + str(i).zfill(4)
 748 |                         + ".json",
 749 |                         "w+",
 750 |                     )
 751 |                     del scanned_table["ResponseMetadata"]
 752 | 
 753 |                     f.write(json.dumps(scanned_table, indent=JSON_INDENT))
 754 |                     f.close()
 755 | 
 756 |                     i += 1
 757 | 
 758 |                     num_items += len(scanned_table["Items"])
 759 |                     if limit and num_items > limit:
 760 |                         break
 761 | 
 762 |                     try:
 763 |                         last_evaluated_key = scanned_table["LastEvaluatedKey"]
 764 |                     except KeyError:
 765 |                         break
 766 | 
 767 |                 # revert back to original table read capacity if specified
 768 |                 if (
 769 |                     read_capacity is not None
 770 |                     and read_capacity != original_read_capacity
 771 |                 ):
 772 |                     update_provisioned_throughput(
 773 |                         dynamo,
 774 |                         table_name,
 775 |                         original_read_capacity,
 776 |                         original_write_capacity,
 777 |                         False,
 778 |                     )
 779 | 
 780 |                 logging.info(
 781 |                     "Backup for "
 782 |                     + table_name
 783 |                     + " table completed. Time taken: "
 784 |                     + str(datetime.datetime.now().replace(microsecond=0) - start_time)
 785 |                 )
 786 | 
 787 |             table_queue.task_done()
 788 | 
 789 | 
 790 | def prepare_provisioned_throughput_for_restore(provisioned_throughput):
 791 |     """
 792 |     This function makes sure that the payload returned for the boto3 API call create_table is compatible
 793 |     with the provisioned throughput attribute
 794 |     See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html
 795 |     """
 796 |     return {
 797 |         "ReadCapacityUnits": provisioned_throughput["ReadCapacityUnits"],
 798 |         "WriteCapacityUnits": provisioned_throughput["WriteCapacityUnits"],
 799 |     }
 800 | 
 801 | 
 802 | def prepare_lsi_for_restore(lsi):
 803 |     """
 804 |     This function makes sure that the payload returned for the boto3 API call create_table is compatible
 805 |     See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html#DynamoDB.Client.create_table
 806 |     """
 807 |     return {
 808 |         "IndexName": lsi["IndexName"],
 809 |         "KeySchema": lsi["KeySchema"],
 810 |         "Projection": lsi["Projection"],
 811 |     }
 812 | 
 813 | 
 814 | def prepare_gsi_for_restore(gsi, billing_mode):
 815 |     """
 816 |     This function makes sure that the payload returned for the boto3 API call create_table is compatible
 817 |     See: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/dynamodb.html
 818 |     """
 819 |     result = {
 820 |         "IndexName": gsi["IndexName"],
 821 |         "KeySchema": gsi["KeySchema"],
 822 |         "Projection": gsi["Projection"],
 823 |     }
 824 | 
 825 |     if billing_mode != PAY_PER_REQUEST_BILLING_MODE:
 826 |         result["ProvisionedThroughput"] = prepare_provisioned_throughput_for_restore(
 827 |             gsi["ProvisionedThroughput"]
 828 |         )
 829 | 
 830 |     return result
 831 | 
 832 | 
 833 | def do_restore(
 834 |     dynamo,
 835 |     sleep_interval,
 836 |     source_table,
 837 |     destination_table,
 838 |     write_capacity,
 839 |     billing_mode,
 840 | ):
 841 |     """
 842 |     Restore table
 843 |     """
 844 |     logging.info(
 845 |         "Starting restore for " + source_table + " to " + destination_table + ".."
 846 |     )
 847 | 
 848 |     # create table using schema
 849 |     # restore source_table from dump directory if it exists else try current working directory
 850 |     if os.path.exists("%s/%s" % (args.dumpPath, source_table)):
 851 |         dump_data_path = args.dumpPath
 852 |     else:
 853 |         logging.info(
 854 |             'Cannot find "./%s/%s", Now trying current working directory..'
 855 |             % (args.dumpPath, source_table)
 856 |         )
 857 |         if os.path.exists("%s/%s" % (CURRENT_WORKING_DIR, source_table)):
 858 |             dump_data_path = CURRENT_WORKING_DIR
 859 |         else:
 860 |             logging.info(
 861 |                 'Cannot find "%s/%s" directory containing dump files!'
 862 |                 % (CURRENT_WORKING_DIR, source_table)
 863 |             )
 864 |             sys.exit(1)
 865 |     table_data = json.load(
 866 |         open(dump_data_path + os.sep + source_table + os.sep + SCHEMA_FILE)
 867 |     )
 868 |     table = table_data["Table"]
 869 |     table_attribute_definitions = table["AttributeDefinitions"]
 870 |     table_table_name = destination_table
 871 |     table_key_schema = table["KeySchema"]
 872 |     original_read_capacity = table["ProvisionedThroughput"]["ReadCapacityUnits"]
 873 |     original_write_capacity = table["ProvisionedThroughput"]["WriteCapacityUnits"]
 874 |     table_local_secondary_indexes = table.get("LocalSecondaryIndexes")
 875 |     table_global_secondary_indexes = table.get("GlobalSecondaryIndexes")
 876 | 
 877 |     # override table write capacity if specified, else use RESTORE_WRITE_CAPACITY if original
 878 |     # write capacity is lower
 879 |     if write_capacity is None:
 880 |         if original_write_capacity < RESTORE_WRITE_CAPACITY:
 881 |             write_capacity = RESTORE_WRITE_CAPACITY
 882 |         else:
 883 |             write_capacity = original_write_capacity
 884 | 
 885 |     if original_write_capacity == 0:
 886 |         original_write_capacity = RESTORE_WRITE_CAPACITY
 887 | 
 888 |     # ensure that read capacity is at least RESTORE_READ_CAPACITY
 889 |     if original_read_capacity < RESTORE_READ_CAPACITY:
 890 |         read_capacity = RESTORE_WRITE_CAPACITY
 891 |     else:
 892 |         read_capacity = original_read_capacity
 893 | 
 894 |     if original_read_capacity == 0:
 895 |         original_read_capacity = RESTORE_READ_CAPACITY
 896 | 
 897 |     # override GSI write capacities if specified, else use RESTORE_WRITE_CAPACITY if original
 898 |     # write capacity is lower
 899 |     original_gsi_write_capacities = []
 900 |     original_gsi_read_capacities = []
 901 |     if table_global_secondary_indexes is not None:
 902 |         for gsi in table_global_secondary_indexes:
 903 |             # keeps track of original gsi write capacity units. If provisioned capacity is 0, set to
 904 |             # RESTORE_WRITE_CAPACITY as fallback given that 0 is not allowed for write capacities
 905 |             original_gsi_write_capacity = gsi["ProvisionedThroughput"][
 906 |                 "WriteCapacityUnits"
 907 |             ]
 908 |             if original_gsi_write_capacity == 0:
 909 |                 original_gsi_write_capacity = RESTORE_WRITE_CAPACITY
 910 | 
 911 |             original_gsi_write_capacities.append(original_gsi_write_capacity)
 912 | 
 913 |             if gsi["ProvisionedThroughput"]["WriteCapacityUnits"] < int(write_capacity):
 914 |                 gsi["ProvisionedThroughput"]["WriteCapacityUnits"] = int(write_capacity)
 915 | 
 916 |             # keeps track of original gsi read capacity units. If provisioned capacity is 0, set to
 917 |             # RESTORE_READ_CAPACITY as fallback given that 0 is not allowed for read capacities
 918 |             original_gsi_read_capacity = gsi["ProvisionedThroughput"][
 919 |                 "ReadCapacityUnits"
 920 |             ]
 921 |             if original_gsi_read_capacity == 0:
 922 |                 original_gsi_read_capacity = RESTORE_READ_CAPACITY
 923 | 
 924 |             original_gsi_read_capacities.append(original_gsi_read_capacity)
 925 | 
 926 |             if (
 927 |                 gsi["ProvisionedThroughput"]["ReadCapacityUnits"]
 928 |                 < RESTORE_READ_CAPACITY
 929 |             ):
 930 |                 gsi["ProvisionedThroughput"][
 931 |                     "ReadCapacityUnits"
 932 |                 ] = RESTORE_READ_CAPACITY
 933 | 
 934 |     # temp provisioned throughput for restore
 935 |     table_provisioned_throughput = {
 936 |         "ReadCapacityUnits": int(read_capacity),
 937 |         "WriteCapacityUnits": int(write_capacity),
 938 |     }
 939 | 
 940 |     optional_args = {}
 941 |     if billing_mode == PROVISIONED_BILLING_MODE:
 942 |         optional_args["ProvisionedThroughput"] = table_provisioned_throughput
 943 | 
 944 |     if not args.dataOnly:
 945 |         logging.info(
 946 |             "Creating "
 947 |             + destination_table
 948 |             + " table with temp write capacity of "
 949 |             + str(write_capacity)
 950 |         )
 951 | 
 952 |         if table_local_secondary_indexes is not None:
 953 |             optional_args["LocalSecondaryIndexes"] = [
 954 |                 prepare_lsi_for_restore(gsi) for gsi in table_local_secondary_indexes
 955 |             ]
 956 | 
 957 |         if table_global_secondary_indexes is not None:
 958 |             optional_args["GlobalSecondaryIndexes"] = [
 959 |                 prepare_gsi_for_restore(gsi, billing_mode)
 960 |                 for gsi in table_global_secondary_indexes
 961 |             ]
 962 | 
 963 |         while True:
 964 |             try:
 965 |                 dynamo.create_table(
 966 |                     AttributeDefinitions=table_attribute_definitions,
 967 |                     TableName=table_table_name,
 968 |                     KeySchema=table_key_schema,
 969 |                     BillingMode=billing_mode,
 970 |                     **optional_args
 971 |                 )
 972 |                 break
 973 |             except dynamo.exceptions.LimitExceededException:
 974 |                 logging.info(
 975 |                     "Limit exceeded, retrying creation of " + destination_table + ".."
 976 |                 )
 977 |                 time.sleep(sleep_interval)
 978 |             except dynamo.exceptions.ProvisionedThroughputExceededException:
 979 |                 logging.info(
 980 |                     "Control plane limit exceeded, "
 981 |                     "retrying creation of " + destination_table + ".."
 982 |                 )
 983 |                 time.sleep(sleep_interval)
 984 |             except dynamo.exceptions.ClientError as e:
 985 |                 logging.exception(e)
 986 |                 sys.exit(1)
 987 | 
 988 |         # wait for table creation completion
 989 |         wait_for_active_table(dynamo, destination_table, "created")
 990 |     elif not args.skipThroughputUpdate:
 991 |         # update provisioned capacity
 992 |         if int(write_capacity) > original_write_capacity:
 993 |             update_provisioned_throughput(
 994 |                 dynamo, destination_table, original_read_capacity, write_capacity, False
 995 |             )
 996 | 
 997 |     if not args.schemaOnly:
 998 |         # read data files
 999 |         logging.info("Restoring data for " + destination_table + " table..")
1000 |         data_file_list = os.listdir(
1001 |             dump_data_path + os.sep + source_table + os.sep + DATA_DIR + os.sep
1002 |         )
1003 |         data_file_list.sort()
1004 | 
1005 |         for data_file in data_file_list:
1006 |             logging.info("Processing " + data_file + " of " + destination_table)
1007 |             items = []
1008 |             item_data = json.load(
1009 |                 open(
1010 |                     dump_data_path
1011 |                     + os.sep
1012 |                     + source_table
1013 |                     + os.sep
1014 |                     + DATA_DIR
1015 |                     + os.sep
1016 |                     + data_file
1017 |                 ),
1018 |             )
1019 |             process_item_types(item_data)
1020 |             items.extend(item_data["Items"])
1021 | 
1022 |             # batch write data
1023 |             put_requests = []
1024 |             while len(items) > 0:
1025 |                 put_requests.append({"PutRequest": {"Item": items.pop(0)}})
1026 | 
1027 |                 # flush every MAX_BATCH_WRITE
1028 |                 if len(put_requests) == MAX_BATCH_WRITE:
1029 |                     logging.debug(
1030 |                         "Writing next "
1031 |                         + str(MAX_BATCH_WRITE)
1032 |                         + " items to "
1033 |                         + destination_table
1034 |                         + ".."
1035 |                     )
1036 |                     batch_write(
1037 |                         dynamo,
1038 |                         BATCH_WRITE_SLEEP_INTERVAL,
1039 |                         destination_table,
1040 |                         put_requests,
1041 |                     )
1042 |                     del put_requests[:]
1043 | 
1044 |             # flush remainder
1045 |             if len(put_requests) > 0:
1046 |                 batch_write(
1047 |                     dynamo, BATCH_WRITE_SLEEP_INTERVAL, destination_table, put_requests
1048 |                 )
1049 | 
1050 |         if not args.skipThroughputUpdate:
1051 |             # revert to original table write capacity if it has been modified
1052 |             if (
1053 |                 int(write_capacity) != original_write_capacity
1054 |                 or int(read_capacity) != original_read_capacity
1055 |             ):
1056 |                 update_provisioned_throughput(
1057 |                     dynamo,
1058 |                     destination_table,
1059 |                     original_read_capacity,
1060 |                     original_write_capacity,
1061 |                     False,
1062 |                 )
1063 | 
1064 |             # loop through each GSI to check if it has changed and update if necessary
1065 |             if table_global_secondary_indexes is not None:
1066 |                 gsi_data = []
1067 |                 for gsi in table_global_secondary_indexes:
1068 |                     wcu = gsi["ProvisionedThroughput"]["WriteCapacityUnits"]
1069 |                     rcu = gsi["ProvisionedThroughput"]["ReadCapacityUnits"]
1070 |                     original_gsi_write_capacity = original_gsi_write_capacities.pop(0)
1071 |                     original_gsi_read_capacity = original_gsi_read_capacities.pop(0)
1072 |                     if (
1073 |                         original_gsi_write_capacity != wcu
1074 |                         or original_gsi_read_capacity != rcu
1075 |                     ):
1076 |                         gsi_data.append(
1077 |                             {
1078 |                                 "Update": {
1079 |                                     "IndexName": gsi["IndexName"],
1080 |                                     "ProvisionedThroughput": {
1081 |                                         "ReadCapacityUnits": int(
1082 |                                             original_gsi_read_capacity
1083 |                                         ),
1084 |                                         "WriteCapacityUnits": int(
1085 |                                             original_gsi_write_capacity
1086 |                                         ),
1087 |                                     },
1088 |                                 }
1089 |                             }
1090 |                         )
1091 | 
1092 |                 if gsi_data:
1093 |                     logging.info(
1094 |                         "Updating "
1095 |                         + destination_table
1096 |                         + " global secondary indexes write and read capacities as necessary.."
1097 |                     )
1098 |                     while True:
1099 |                         try:
1100 |                             dynamo.update_table(
1101 |                                 TableName=destination_table,
1102 |                                 GlobalSecondaryIndexUpdates=gsi_data,
1103 |                             )
1104 |                             break
1105 |                         except dynamo.exceptions.LimitExceededException:
1106 |                             logging.info(
1107 |                                 "Limit exceeded, retrying updating throughput of"
1108 |                                 "GlobalSecondaryIndexes in " + destination_table + ".."
1109 |                             )
1110 |                             time.sleep(sleep_interval)
1111 |                         except dynamo.exceptions.ProvisionedThroughputExceededException:
1112 |                             logging.info(
1113 |                                 "Control plane limit exceeded, retrying updating throughput of"
1114 |                                 "GlobalSecondaryIndexes in " + destination_table + ".."
1115 |                             )
1116 |                             time.sleep(sleep_interval)
1117 | 
1118 |         # wait for table to become active
1119 |         wait_for_active_table(dynamo, destination_table, "active")
1120 | 
1121 |         logging.info(
1122 |             "Restore for "
1123 |             + source_table
1124 |             + " to "
1125 |             + destination_table
1126 |             + " table completed. Time taken: "
1127 |             + str(datetime.datetime.now().replace(microsecond=0) - start_time)
1128 |         )
1129 |     else:
1130 |         logging.info(
1131 |             "Empty schema of "
1132 |             + source_table
1133 |             + " table created. Time taken: "
1134 |             + str(datetime.datetime.now().replace(microsecond=0) - start_time)
1135 |         )
1136 | 
1137 | 
1138 | def main():
1139 |     """
1140 |     Entrypoint to the script
1141 |     """
1142 | 
1143 |     global args, sleep_interval, start_time
1144 | 
1145 |     # parse args
1146 |     parser = argparse.ArgumentParser(
1147 |         description="Simple DynamoDB backup/restore/empty."
1148 |     )
1149 |     parser.add_argument(
1150 |         "-a",
1151 |         "--archive",
1152 |         help="Type of compressed archive to create. If unset, don't create archive",
1153 |         choices=["zip", "tar"],
1154 |     )
1155 |     parser.add_argument(
1156 |         "-b",
1157 |         "--bucket",
1158 |         help="S3 bucket in which to store or retrieve backups. [must already exist]",
1159 |     )
1160 |     parser.add_argument(
1161 |         "-m",
1162 |         "--mode",
1163 |         help="Operation to perform",
1164 |         choices=["backup", "restore", "empty"],
1165 |     )
1166 |     parser.add_argument(
1167 |         "-r",
1168 |         "--region",
1169 |         help="AWS region to use, e.g. 'us-west-1'. "
1170 |         "Can use any region for local testing",
1171 |     )
1172 |     parser.add_argument(
1173 |         "--host",
1174 |         help="Host of local DynamoDB. This parameter initialises dynamodump for local DynamoDB testing [required only for local]",
1175 |     )
1176 |     parser.add_argument(
1177 |         "--port", help="Port of local DynamoDB [required only for local]"
1178 |     )
1179 |     parser.add_argument(
1180 |         "--accessKey", help="Access key of local DynamoDB [required only for local]"
1181 |     )
1182 |     parser.add_argument(
1183 |         "--secretKey", help="Secret key of local DynamoDB [required only for local]"
1184 |     )
1185 |     parser.add_argument(
1186 |         "-p",
1187 |         "--profile",
1188 |         help="AWS credentials file profile to use. Allows you to use a "
1189 |         "profile instead accessKey, secretKey authentication",
1190 |     )
1191 |     parser.add_argument(
1192 |         "-s",
1193 |         "--srcTable",
1194 |         help="Source DynamoDB table name to backup or restore from, "
1195 |         "use 'tablename*' for wildcard prefix selection or '*' for "
1196 |         "all tables.  Mutually exclusive with --tag",
1197 |     )
1198 |     parser.add_argument(
1199 |         "-d",
1200 |         "--destTable",
1201 |         help="Destination DynamoDB table name to backup or restore to, "
1202 |         "use 'tablename*' for wildcard prefix selection "
1203 |         "(defaults to use '-' separator) [optional, defaults to source]",
1204 |     )
1205 |     parser.add_argument(
1206 |         "--prefixSeparator",
1207 |         help="Specify a different prefix separator, e.g. '.' [optional]",
1208 |     )
1209 |     parser.add_argument(
1210 |         "--noSeparator",
1211 |         action="store_true",
1212 |         help="Overrides the use of a prefix separator for backup wildcard "
1213 |         "searches [optional]",
1214 |     )
1215 |     parser.add_argument(
1216 |         "--readCapacity",
1217 |         help="Change the temp read capacity of the DynamoDB table to backup "
1218 |         "from [optional]",
1219 |     )
1220 |     parser.add_argument(
1221 |         "-t",
1222 |         "--tag",
1223 |         help="Tag to use for identifying tables to back up.  "
1224 |         "Mutually exclusive with srcTable.  Provided as KEY=VALUE",
1225 |     )
1226 |     parser.add_argument(
1227 |         "--writeCapacity",
1228 |         help="Change the temp write capacity of the DynamoDB table to restore "
1229 |         "to [defaults to " + str(RESTORE_WRITE_CAPACITY) + ", optional]",
1230 |     )
1231 |     parser.add_argument(
1232 |         "--schemaOnly",
1233 |         action="store_true",
1234 |         default=False,
1235 |         help="Backup or restore the schema only. Do not backup/restore data. "
1236 |         "Can be used with both backup and restore modes. Cannot be used with "
1237 |         "the --dataOnly [optional]",
1238 |     )
1239 |     parser.add_argument(
1240 |         "--dataOnly",
1241 |         action="store_true",
1242 |         default=False,
1243 |         help="Restore data only. Do not delete/recreate schema [optional for "
1244 |         "restore]",
1245 |     )
1246 |     parser.add_argument(
1247 |         "--noConfirm",
1248 |         action="store_true",
1249 |         default=False,
1250 |         help="Don't ask for confirmation before deleting existing schemas.",
1251 |     )
1252 |     parser.add_argument(
1253 |         "--skipThroughputUpdate",
1254 |         action="store_true",
1255 |         default=False,
1256 |         help="Skip updating throughput values across tables [optional]",
1257 |     )
1258 |     parser.add_argument(
1259 |         "--dumpPath",
1260 |         help="Directory to place and search for DynamoDB table "
1261 |         "backups (defaults to use '" + str(DATA_DUMP) + "') [optional]",
1262 |         default=str(DATA_DUMP),
1263 |     )
1264 |     parser.add_argument(
1265 |         "--billingMode",
1266 |         help="Set billing mode between "
1267 |         + str(PROVISIONED_BILLING_MODE)
1268 |         + "|"
1269 |         + str(PAY_PER_REQUEST_BILLING_MODE)
1270 |         + " (defaults to use '"
1271 |         + str(PROVISIONED_BILLING_MODE)
1272 |         + "') [optional]",
1273 |         choices=[PROVISIONED_BILLING_MODE, PAY_PER_REQUEST_BILLING_MODE],
1274 |         default=str(PROVISIONED_BILLING_MODE),
1275 |     )
1276 |     parser.add_argument(
1277 |         "--log", help="Logging level - DEBUG|INFO|WARNING|ERROR|CRITICAL [optional]"
1278 |     )
1279 |     parser.add_argument(
1280 |         "--limit",
1281 |         help="Limit option for backup, will stop the back up process after number of backed up items reaches the limit [optional]",
1282 |         type=int,
1283 |     )
1284 |     parser.add_argument(
1285 |         "-f",
1286 |         "--filterOption",
1287 |         help="Filter option for backup, JSON file of which keys are ['FilterExpression', 'ExpressionAttributeNames', 'ExpressionAttributeValues']",
1288 |     )
1289 |     args = parser.parse_args()
1290 | 
1291 |     # set log level
1292 |     log_level = LOG_LEVEL
1293 |     if args.log is not None:
1294 |         log_level = args.log.upper()
1295 |     logging.basicConfig(level=getattr(logging, log_level))
1296 | 
1297 |     # Check to make sure that --dataOnly and --schemaOnly weren't simultaneously specified
1298 |     if args.schemaOnly and args.dataOnly:
1299 |         logging.info("Options --schemaOnly and --dataOnly are mutually exclusive.")
1300 |         sys.exit(1)
1301 | 
1302 |     # instantiate connection
1303 |     if args.host:
1304 |         conn = _get_aws_client(
1305 |             service="dynamodb",
1306 |             access_key=args.accessKey,
1307 |             secret_key=args.secretKey,
1308 |             region=args.region,
1309 |             endpoint_url="http://" + args.host + ":" + args.port,
1310 |         )
1311 |         sleep_interval = LOCAL_SLEEP_INTERVAL
1312 |     else:
1313 |         if not args.profile:
1314 |             conn = _get_aws_client(
1315 |                 service="dynamodb",
1316 |                 access_key=args.accessKey,
1317 |                 secret_key=args.secretKey,
1318 |                 region=args.region,
1319 |             )
1320 |             sleep_interval = AWS_SLEEP_INTERVAL
1321 |         else:
1322 |             conn = _get_aws_client(
1323 |                 service="dynamodb",
1324 |                 profile=args.profile,
1325 |                 region=args.region,
1326 |             )
1327 |             sleep_interval = AWS_SLEEP_INTERVAL
1328 | 
1329 |     # don't proceed if connection is not established
1330 |     if not conn:
1331 |         logging.info("Unable to establish connection with dynamodb")
1332 |         sys.exit(1)
1333 | 
1334 |     # set prefix separator
1335 |     prefix_separator = DEFAULT_PREFIX_SEPARATOR
1336 |     if args.prefixSeparator is not None:
1337 |         prefix_separator = args.prefixSeparator
1338 |     if args.noSeparator is True:
1339 |         prefix_separator = None
1340 | 
1341 |     # set filter options
1342 |     filter_option = None
1343 |     if args.filterOption is not None:
1344 |         with open(args.filterOption, "r") as f:
1345 |             filter_option = json.load(f)
1346 |             if filter_option.keys() != set(
1347 |                 (
1348 |                     "FilterExpression",
1349 |                     "ExpressionAttributeNames",
1350 |                     "ExpressionAttributeValues",
1351 |                 )
1352 |             ):
1353 |                 raise Exception("Invalid filter option format")
1354 | 
1355 |     # do backup/restore
1356 |     start_time = datetime.datetime.now().replace(microsecond=0)
1357 |     if args.mode == "backup":
1358 |         matching_backup_tables = []
1359 |         if args.tag:
1360 |             # Use Boto3 to find tags.  Boto3 provides a paginator that makes searching ta
1361 |             matching_backup_tables = get_table_name_by_tag(
1362 |                 args.profile, args.region, args.tag
1363 |             )
1364 |         elif args.srcTable.find("*") != -1:
1365 |             matching_backup_tables = get_table_name_matches(conn, args.srcTable)
1366 |         elif args.srcTable:
1367 |             matching_backup_tables.append(args.srcTable)
1368 | 
1369 |         if len(matching_backup_tables) == 0:
1370 |             logging.info("No matching tables found.  Nothing to do.")
1371 |             sys.exit(0)
1372 |         else:
1373 |             logging.info(
1374 |                 "Found "
1375 |                 + str(len(matching_backup_tables))
1376 |                 + " table(s) in DynamoDB host to backup: "
1377 |                 + ", ".join(matching_backup_tables)
1378 |             )
1379 | 
1380 |         try:
1381 |             if args.srcTable.find("*") == -1:
1382 |                 do_backup(
1383 |                     conn,
1384 |                     args.read_capacity,
1385 |                     table_queue=None,
1386 |                     filter_option=filter_option,
1387 |                     limit=args.limit,
1388 |                 )
1389 |             else:
1390 |                 do_backup(
1391 |                     conn,
1392 |                     args.read_capacity,
1393 |                     matching_backup_tables,
1394 |                     filter_option=filter_option,
1395 |                     limit=args.limit,
1396 |                 )
1397 |         except AttributeError:
1398 |             # Didn't specify srcTable if we get here
1399 | 
1400 |             q = Queue()
1401 |             threads = []
1402 | 
1403 |             for _ in range(MAX_NUMBER_BACKUP_WORKERS):
1404 |                 t = threading.Thread(
1405 |                     target=do_backup,
1406 |                     args=(conn, args.readCapacity),
1407 |                     kwargs={
1408 |                         "table_queue": q,
1409 |                         "filter_option": filter_option,
1410 |                         "limit": args.limit,
1411 |                     },
1412 |                 )
1413 |                 t.start()
1414 |                 threads.append(t)
1415 |                 time.sleep(THREAD_START_DELAY)
1416 | 
1417 |             for table in matching_backup_tables:
1418 |                 q.put(table)
1419 | 
1420 |             q.join()
1421 | 
1422 |             for _ in range(MAX_NUMBER_BACKUP_WORKERS):
1423 |                 q.put(None)
1424 |             for t in threads:
1425 |                 t.join()
1426 | 
1427 |             try:
1428 |                 logging.info("Backup of table(s) " + args.srcTable + " completed!")
1429 |             except (NameError, TypeError):
1430 |                 logging.info(
1431 |                     "Backup of table(s) "
1432 |                     + ", ".join(matching_backup_tables)
1433 |                     + " completed!"
1434 |                 )
1435 | 
1436 |             if args.archive:
1437 |                 if args.tag:
1438 |                     for table in matching_backup_tables:
1439 |                         dump_path = args.dumpPath + os.sep + table
1440 |                         did_archive, archive_file = do_archive(args.archive, dump_path)
1441 |                         if args.bucket and did_archive:
1442 |                             do_put_bucket_object(
1443 |                                 args.profile, args.region, args.bucket, archive_file
1444 |                             )
1445 |                 else:
1446 |                     did_archive, archive_file = do_archive(args.archive, args.dumpPath)
1447 | 
1448 |                 if args.bucket and did_archive:
1449 |                     do_put_bucket_object(
1450 |                         args.profile, args.region, args.bucket, archive_file
1451 |                     )
1452 | 
1453 |     elif args.mode == "restore":
1454 |         if args.destTable is not None:
1455 |             dest_table = args.destTable
1456 |         else:
1457 |             dest_table = args.srcTable
1458 | 
1459 |         # If backups are in S3 download and extract the backup to use during restoration
1460 |         if args.bucket:
1461 |             do_get_s3_archive(
1462 |                 args.profile, args.region, args.bucket, args.srcTable, args.archive
1463 |             )
1464 | 
1465 |         if dest_table.find("*") != -1:
1466 |             matching_destination_tables = get_table_name_matches(conn, dest_table)
1467 |             delete_str = ": " if args.dataOnly else " to be deleted: "
1468 |             logging.info(
1469 |                 "Found "
1470 |                 + str(len(matching_destination_tables))
1471 |                 + " table(s) in DynamoDB host"
1472 |                 + delete_str
1473 |                 + ", ".join(matching_destination_tables)
1474 |             )
1475 | 
1476 |             threads = []
1477 |             for table in matching_destination_tables:
1478 |                 t = threading.Thread(
1479 |                     target=delete_table, args=(conn, sleep_interval, table)
1480 |                 )
1481 |                 threads.append(t)
1482 |                 t.start()
1483 |                 time.sleep(THREAD_START_DELAY)
1484 | 
1485 |             for thread in threads:
1486 |                 thread.join()
1487 | 
1488 |             matching_restore_tables = get_restore_table_matches(
1489 |                 args.srcTable, prefix_separator
1490 |             )
1491 |             logging.info(
1492 |                 "Found "
1493 |                 + str(len(matching_restore_tables))
1494 |                 + " table(s) in "
1495 |                 + args.dumpPath
1496 |                 + " to restore: "
1497 |                 + ", ".join(matching_restore_tables)
1498 |             )
1499 | 
1500 |             threads = []
1501 |             for source_table in matching_restore_tables:
1502 |                 if args.srcTable == "*":
1503 |                     t = threading.Thread(
1504 |                         target=do_restore,
1505 |                         args=(
1506 |                             conn,
1507 |                             sleep_interval,
1508 |                             source_table,
1509 |                             source_table,
1510 |                             args.writeCapacity,
1511 |                             args.billingMode,
1512 |                         ),
1513 |                     )
1514 |                 else:
1515 |                     t = threading.Thread(
1516 |                         target=do_restore,
1517 |                         args=(
1518 |                             conn,
1519 |                             sleep_interval,
1520 |                             source_table,
1521 |                             change_prefix(
1522 |                                 source_table,
1523 |                                 args.srcTable,
1524 |                                 dest_table,
1525 |                                 prefix_separator,
1526 |                             ),
1527 |                             args.writeCapacity,
1528 |                             args.billingMode,
1529 |                         ),
1530 |                     )
1531 |                 threads.append(t)
1532 |                 t.start()
1533 |                 time.sleep(THREAD_START_DELAY)
1534 | 
1535 |             for thread in threads:
1536 |                 thread.join()
1537 | 
1538 |             logging.info(
1539 |                 "Restore of table(s) "
1540 |                 + args.srcTable
1541 |                 + " to "
1542 |                 + dest_table
1543 |                 + " completed!"
1544 |             )
1545 |         else:
1546 |             delete_table(
1547 |                 conn=conn, sleep_interval=sleep_interval, table_name=dest_table
1548 |             )
1549 |             do_restore(
1550 |                 dynamo=conn,
1551 |                 sleep_interval=sleep_interval,
1552 |                 source_table=args.srcTable,
1553 |                 destination_table=dest_table,
1554 |                 write_capacity=args.writeCapacity,
1555 |                 billing_mode=args.billingMode,
1556 |             )
1557 |     elif args.mode == "empty":
1558 |         if args.srcTable.find("*") != -1:
1559 |             matching_backup_tables = get_table_name_matches(conn, args.srcTable)
1560 |             logging.info(
1561 |                 "Found "
1562 |                 + str(len(matching_backup_tables))
1563 |                 + " table(s) in DynamoDB host to empty: "
1564 |                 + ", ".join(matching_backup_tables)
1565 |             )
1566 | 
1567 |             threads = []
1568 |             for table in matching_backup_tables:
1569 |                 t = threading.Thread(
1570 |                     target=do_empty, args=(conn, table, args.billingMode)
1571 |                 )
1572 |                 threads.append(t)
1573 |                 t.start()
1574 |                 time.sleep(THREAD_START_DELAY)
1575 | 
1576 |             for thread in threads:
1577 |                 thread.join()
1578 | 
1579 |             logging.info("Empty of table(s) " + args.srcTable + " completed!")
1580 |         else:
1581 |             do_empty(conn, args.srcTable, args.billingMode)
1582 | 
1583 | 
1584 | if __name__ == "__main__":
1585 |     main()
1586 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = [
3 |     "setuptools>=42",
4 |     "wheel"
5 | ]
6 | build-backend = "setuptools.build_meta"
7 | 


--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "additionalReviewers": ["bchew"],
 3 |   "branchPrefix": "renovate-",
 4 |   "extends": ["config:recommended"],
 5 |   "labels": ["dependencies"],
 6 |   "packageRules": [
 7 |     {
 8 |       "matchPackageNames": ["boto3"],
 9 |       "schedule": ["every 3 months on the first day of the month"]
10 |     }
11 |   ],
12 |   "pip_requirements": {
13 |     "fileMatch": ["^requirements.*\\.txt"]
14 |   },
15 |   "pre-commit": {
16 |     "enabled": true
17 |   },
18 |   "schedule": [
19 |     "before 3am on the first day of the month"
20 |   ],
21 |   "prConcurrentLimit": 0,
22 |   "prHourlyLimit": 0
23 | }
24 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | black==25.1.0
3 | flake8==7.1.2
4 | pre-commit==4.1.0
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.37.25
2 | six==1.17.0
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r", encoding="utf-8") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="dynamodump",
 8 |     version="1.10.1",
 9 |     author="Benny Chew",
10 |     author_email="noreply@bennychew.com",
11 |     description="Simple backup and restore for Amazon DynamoDB using AWS SDK for Python (boto3)",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/bchew/dynamodump",
15 |     project_urls={
16 |         "Releases": "https://github.com/bchew/dynamodump/releases",
17 |     },
18 |     classifiers=[
19 |         "Programming Language :: Python :: 3",
20 |         "License :: OSI Approved :: MIT License",
21 |     ],
22 |     packages=["dynamodump"],
23 |     python_requires=">=3.9",
24 |     install_requires=["boto3==1.37.25", "six==1.17.0"],
25 |     entry_points={
26 |         "console_scripts": ["dynamodump=dynamodump.dynamodump:main"],
27 |     },
28 | )
29 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Test script which assumes DynamoDB Local is ready and available via `docker compose up`
 3 | 
 4 | # Test basic restore and backup
 5 | mkdir -p dump && cp -a tests/testTable dump
 6 | python dynamodump/dynamodump.py -m restore --noConfirm -r local -s testTable -d testRestoredTable \
 7 |   --host localhost --port 8000 --accessKey a --secretKey a
 8 | python dynamodump/dynamodump.py -m backup -r local -s testRestoredTable --host localhost --port 8000 \
 9 |   --accessKey a --secretKey a
10 | python tests/test.py
11 | 
12 | # Test wildcard restore and backup
13 | python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "*" --host localhost --port 8000 \
14 |   --accessKey a --secretKey a
15 | rm -rf dump/test*
16 | python dynamodump/dynamodump.py -m backup -r local -s "*" --host localhost --port 8000 --accessKey a \
17 |   --secretKey a
18 | python tests/test.py
19 | 
20 | # Test prefixed wildcard restore and backup
21 | python dynamodump/dynamodump.py -m restore --noConfirm -r local -s "test*" --host localhost --port 8000 \
22 |   --accessKey a --secretKey a --prefixSeparator ""
23 | rm -rf dump/test*
24 | python dynamodump/dynamodump.py -m backup -r local -s "test*" --host localhost --port 8000 --accessKey a \
25 |   --secretKey a --prefixSeparator ""
26 | python tests/test.py
27 | 
28 | # Clean up
29 | rm -rf dump/test*
30 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bchew/dynamodump/4546edbe57c5e4062215f9ad5cdf7e676e4f6376/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import json
 3 | import os
 4 | import unittest
 5 | 
 6 | TEST_DATA_PATH = "tests/testTable"
 7 | DUMP_DATA_DIR = os.getenv("DUMP_DATA_DIR", "dump")
 8 | DUMP_DATA_PATH = f"{DUMP_DATA_DIR}/testRestoredTable"
 9 | SCHEMA_FILE = "schema.json"
10 | DATA_FILE = "0001.json"
11 | 
12 | 
13 | class TestDynamoDump(unittest.TestCase):
14 |     def setUp(self):
15 |         with open(TEST_DATA_PATH + "/" + SCHEMA_FILE, "r") as f:
16 |             data = f.read()
17 |         self.test_table_schema = json.loads(data)
18 | 
19 |         with open(DUMP_DATA_PATH + "/" + SCHEMA_FILE, "r") as f:
20 |             data = f.read()
21 |         self.restored_test_table_schema = json.loads(data)
22 | 
23 |         with open(TEST_DATA_PATH + "/data/" + DATA_FILE, "r") as f:
24 |             data = f.read()
25 |         self.test_table_data = json.loads(data)
26 | 
27 |         with open(DUMP_DATA_PATH + "/data/" + DATA_FILE, "r") as f:
28 |             data = f.read()
29 |         self.restored_test_table_data = json.loads(data)
30 | 
31 |     def test_schema(self):
32 |         self.assertEqual(
33 |             self.test_table_schema["Table"]["AttributeDefinitions"],
34 |             self.restored_test_table_schema["Table"]["AttributeDefinitions"],
35 |         )
36 |         self.assertEqual(
37 |             self.test_table_schema["Table"]["ProvisionedThroughput"][
38 |                 "WriteCapacityUnits"
39 |             ],
40 |             self.restored_test_table_schema["Table"]["ProvisionedThroughput"][
41 |                 "WriteCapacityUnits"
42 |             ],
43 |         )
44 |         self.assertEqual(
45 |             self.test_table_schema["Table"]["ProvisionedThroughput"][
46 |                 "ReadCapacityUnits"
47 |             ],
48 |             self.restored_test_table_schema["Table"]["ProvisionedThroughput"][
49 |                 "ReadCapacityUnits"
50 |             ],
51 |         )
52 |         self.assertEqual(
53 |             self.test_table_schema["Table"]["KeySchema"],
54 |             self.restored_test_table_schema["Table"]["KeySchema"],
55 |         )
56 |         self.assertEqual(
57 |             self.test_table_schema["Table"]["TableSizeBytes"],
58 |             self.restored_test_table_schema["Table"]["TableSizeBytes"],
59 |         )
60 |         self.assertEqual(
61 |             "testRestoredTable", self.restored_test_table_schema["Table"]["TableName"]
62 |         )
63 |         self.assertEqual(
64 |             self.test_table_schema["Table"]["TableStatus"],
65 |             self.restored_test_table_schema["Table"]["TableStatus"],
66 |         )
67 |         self.assertEqual(
68 |             self.test_table_schema["Table"]["ItemCount"],
69 |             self.restored_test_table_schema["Table"]["ItemCount"],
70 |         )
71 | 
72 |     def test_data(self):
73 |         self.assertEqual(self.test_table_data, self.restored_test_table_data)
74 | 
75 | 
76 | if __name__ == "__main__":
77 |     unittest.main()
78 | 


--------------------------------------------------------------------------------
/tests/testTable/data/0001.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Count": 1, 
 3 |   "Items": [
 4 |     {
 5 |       "lastName": {
 6 |         "S": "Doe"
 7 |       }, 
 8 |       "id": {
 9 |         "N": "1"
10 |       }, 
11 |       "firstName": {
12 |         "S": "John"
13 |       }
14 |     }
15 |   ], 
16 |   "ScannedCount": 1
17 | }


--------------------------------------------------------------------------------
/tests/testTable/schema.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Table": {
 3 |     "TableArn": "arn:aws:dynamodb:ddblocal:000000000000:table/testTable", 
 4 |     "AttributeDefinitions": [
 5 |       {
 6 |         "AttributeName": "id", 
 7 |         "AttributeType": "N"
 8 |       }
 9 |     ], 
10 |     "ProvisionedThroughput": {
11 |       "NumberOfDecreasesToday": 0, 
12 |       "WriteCapacityUnits": 25, 
13 |       "LastIncreaseDateTime": 0.0, 
14 |       "NumberOfDecreasesToday": 0, 
15 |       "ReadCapacityUnits": 1, 
16 |       "LastDecreaseDateTime": 0.0
17 |     }, 
18 |     "TableSizeBytes": 28, 
19 |     "TableName": "testTable", 
20 |     "TableStatus": "ACTIVE", 
21 |     "KeySchema": [
22 |       {
23 |         "KeyType": "HASH", 
24 |         "AttributeName": "id"
25 |       }
26 |     ], 
27 |     "ItemCount": 1, 
28 |     "CreationDateTime": 1517103019.926
29 |   }
30 | }


--------------------------------------------------------------------------------