├── .circleci
└── config.yml
├── .editorconfig
├── .gitattributes
├── .gitignore
├── .husky
├── _
│ └── husky.sh
├── post-commit
└── pre-commit
├── .idea
├── codeStyles
│ ├── Project.xml
│ └── codeStyleConfig.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── jsLibraryMappings.xml
├── misc.xml
├── modules.xml
├── opensource-npm-template.iml
└── vcs.xml
├── .npmrc
├── .nvmrc
├── CHANGELOG.md
├── README.md
├── benchmark.ts
├── eslint.config.mjs
├── jest-dynamodb-config.js
├── license
├── package.json
├── populate.ts
├── renovate.json
├── src
├── helpers
│ ├── ddb.ts
│ ├── delete-all.ts
│ └── insert-many.ts
├── index.test.ts
└── index.ts
└── tsconfig.json
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2.1
2 |
3 | orbs:
4 | node: circleci/node@7.0.0
5 |
6 | parameters:
7 | node_version:
8 | type: string
9 | default: '22.14.0-browsers'
10 |
11 | commands:
12 | install_deps:
13 | steps:
14 | - run: echo "//registry.npmjs.org/:_authToken=$NPM_TOKEN" >> .npmrc
15 | - node/install-packages:
16 | pkg-manager: yarn
17 | cache-version: v2-all
18 | cache-only-lockfile: true
19 | app-dir: ~/repo
20 | override-ci-command: yarn install --pure-lockfile --no-progress
21 |
22 | jobs:
23 | build:
24 | executor:
25 | name: node/default
26 | tag: << pipeline.parameters.node_version >>
27 | working_directory: ~/repo
28 | steps:
29 | - checkout
30 | - install_deps
31 | - run: yarn test
32 | - run: yarn type-check
33 | - run: yarn lint:ci
34 |
--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
1 | root = true
2 |
3 | [*]
4 | end_of_line = lf
5 | charset = utf-8
6 | trim_trailing_whitespace = true
7 | insert_final_newline = true
8 | indent_style = space
9 | indent_size = 2
10 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | *.js text eol=lf
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | .serverless/
3 | coverage/
4 | lib/
5 | node_modules/
6 |
7 | *.log
8 | junit.xml
9 | draft.js
10 | *.draft.js
11 | draft.ts
12 | *.draft.ts
13 | yarn.lock
14 |
15 | .env
16 | .DS_Store
17 |
18 | # User-specific stuff
19 | .idea/**/workspace.xml
20 | .idea/**/tasks.xml
21 | .idea/**/usage.statistics.xml
22 | .idea/**/shelf
23 |
24 | # File-based project format
25 | *.iws
26 |
27 | .idea/$CACHE_FILE$
28 | .idea/$PRODUCT_WORKSPACE_FILE$
29 | .idea/.gitignore
30 | !.husky/_/husky.sh
31 |
--------------------------------------------------------------------------------
/.husky/_/husky.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | if [ -z "$husky_skip_init" ]; then
3 | debug () {
4 | if [ "$HUSKY_DEBUG" = "1" ]; then
5 | echo "husky (debug) - $1"
6 | fi
7 | }
8 |
9 | readonly hook_name="$(basename "$0")"
10 | debug "starting $hook_name..."
11 |
12 | if [ "$HUSKY" = "0" ]; then
13 | debug "HUSKY env variable is set to 0, skipping hook"
14 | exit 0
15 | fi
16 |
17 | if [ -f ~/.huskyrc ]; then
18 | debug "sourcing ~/.huskyrc"
19 | . ~/.huskyrc
20 | fi
21 |
22 | export readonly husky_skip_init=1
23 | sh -e "$0" "$@"
24 | exitCode="$?"
25 |
26 | if [ $exitCode != 0 ]; then
27 | echo "husky - $hook_name hook exited with code $exitCode (error)"
28 | fi
29 |
30 | exit $exitCode
31 | fi
32 |
--------------------------------------------------------------------------------
/.husky/post-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | git update-index --again
4 |
--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | . "$(dirname "$0")/_/husky.sh"
3 | yarn lint-staged
4 |
--------------------------------------------------------------------------------
/.idea/codeStyles/Project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/jsLibraryMappings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/opensource-npm-template.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | package-lock=false
2 |
--------------------------------------------------------------------------------
/.nvmrc:
--------------------------------------------------------------------------------
1 | v22.13.0
2 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Breaking Changes
2 |
3 | ## 3.0.0
4 |
5 | - Switched `node` version `18`->`22`
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # dynamodb-query-optimized [](https://circleci.com/gh/shelfio/dynamodb-query-optimized/tree/master)
2 |
3 | > 2x faster DynamoDB queries when you need to query 2+ MB of data
4 |
5 | Read the blog post article explaining how it works: https://vladholubiev.medium.com/how-to-speed-up-long-dynamodb-queries-by-2x-c66a2987d53a
6 |
7 | ## Install
8 |
9 | ```
10 | $ yarn add @shelf/dynamodb-query-optimized
11 | ```
12 |
13 | ## Benchmark
14 |
15 | ```
16 | Regular query: <1 MB of items: 650ms
17 | Optimized query: <1 MB of items: 704ms
18 |
19 | Regular query: ~21 MB of items: 9.023s
20 | Optimized query: ~21 MB of items: 4.988s # almost 2x faster
21 | ```
22 |
23 | ## Usage
24 |
25 | For now, it supports `aws-sdk` v2. Feel free to submit a PR to support `aws-sdk` v3!
26 |
27 | ### Optimized query for 2+ MB of data
28 |
29 | Queries DDB from both ends of the query in parallel. Stops and returns results when the middle is reached.
30 |
31 | ```js
32 | import {queryOptimized} from '@shelf/dynamodb-query-optimized';
33 | import DynamoDB from 'aws-sdk/clients/dynamodb';
34 |
35 | const ddb = new DynamoDB.DocumentClient({region: 'us-east-1'});
36 |
37 | const results = await queryOptimized({
38 | queryFunction: ddb.query.bind(ddb),
39 | queryParams: {
40 | TableName: 'example_table',
41 | ProjectionExpression: 'hash_key, range_key',
42 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)',
43 | ExpressionAttributeNames: {
44 | '#hash_key': 'hash_key',
45 | '#range_key': 'range_key',
46 | },
47 | ExpressionAttributeValues: {
48 | ':hash_key': hash_key,
49 | ':range_key': range_key,
50 | },
51 | },
52 | });
53 |
54 | console.log(results);
55 | /*
56 | [{hash_key: 'foo', range_key: 'bar'}, {hash_key: 'foo', range_key: 'baz'}]
57 | */
58 | ```
59 |
60 | ### Regular query for <2 MB of data
61 |
62 | Queries DDB and continues to paginate through all results until query is exhausted.
63 |
64 | ```js
65 | import {queryRegular} from '@shelf/dynamodb-query-optimized';
66 | import DynamoDB from 'aws-sdk/clients/dynamodb';
67 |
68 | const ddb = new DynamoDB.DocumentClient({region: 'us-east-1'});
69 |
70 | const results = await queryRegular({
71 | queryFunction: ddb.query.bind(ddb),
72 | queryParams: {
73 | TableName: 'example_table',
74 | ProjectionExpression: 'hash_key, range_key',
75 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)',
76 | ExpressionAttributeNames: {
77 | '#hash_key': 'hash_key',
78 | '#range_key': 'range_key',
79 | },
80 | ExpressionAttributeValues: {
81 | ':hash_key': hash_key,
82 | ':range_key': range_key,
83 | },
84 | },
85 | });
86 |
87 | console.log(results);
88 | /*
89 | [{hash_key: 'foo', range_key: 'bar'}, {hash_key: 'foo', range_key: 'baz'}]
90 | */
91 | ```
92 |
93 | ## Publish
94 |
95 | ```sh
96 | $ git checkout master
97 | $ yarn version
98 | $ yarn publish
99 | $ git push origin master --tags
100 | ```
101 |
102 | ## License
103 |
104 | MIT © [Shelf](https://shelf.io)
105 |
--------------------------------------------------------------------------------
/benchmark.ts:
--------------------------------------------------------------------------------
1 | import DynamoDB from 'aws-sdk/clients/dynamodb';
2 | import {queryOptimized, queryRegular} from './src';
3 |
4 | const ddb = new DynamoDB.DocumentClient({region: 'us-east-1'});
5 |
6 | (async () => {
7 | // warm up TCP connection
8 | await testQueryRegular('hk5');
9 |
10 | console.time('Regular query: <1 MB of items');
11 | await testQueryRegular('hk5');
12 | console.timeEnd('Regular query: <1 MB of items');
13 |
14 | console.time('Optimized query: <1 MB of items');
15 | await testQueryOptimized('hk5');
16 | console.timeEnd('Optimized query: <1 MB of items');
17 |
18 | console.time('Regular query: ~21 MB of items');
19 | await testQueryRegular('hk6');
20 | console.timeEnd('Regular query: ~21 MB of items');
21 |
22 | console.time('Optimized query: ~21 MB of items');
23 | await testQueryOptimized('hk6');
24 | console.timeEnd('Optimized query: ~21 MB of items');
25 | })();
26 |
27 | function testQueryRegular(hash_key: string) {
28 | return queryRegular({
29 | queryFunction: ddb.query.bind(ddb),
30 | queryParams: {
31 | TableName: 'ddb-query-optimized',
32 | ProjectionExpression: 'hash_key, range_key',
33 | KeyConditionExpression: '#hash_key = :hash_key',
34 | FilterExpression: '#number > :number',
35 | ExpressionAttributeNames: {
36 | '#hash_key': 'hash_key',
37 | '#number': 'number',
38 | },
39 | ExpressionAttributeValues: {
40 | ':hash_key': hash_key,
41 | ':number': 0.5,
42 | },
43 | },
44 | });
45 | }
46 |
47 | function testQueryOptimized(hash_key: string) {
48 | return queryOptimized({
49 | queryFunction: ddb.query.bind(ddb),
50 | queryParams: {
51 | TableName: 'ddb-query-optimized',
52 | ProjectionExpression: 'hash_key, range_key',
53 | KeyConditionExpression: '#hash_key = :hash_key',
54 | FilterExpression: '#number > :number',
55 | ExpressionAttributeNames: {
56 | '#hash_key': 'hash_key',
57 | '#number': 'number',
58 | },
59 | ExpressionAttributeValues: {
60 | ':hash_key': hash_key,
61 | ':number': 0.5,
62 | },
63 | },
64 | });
65 | }
66 |
--------------------------------------------------------------------------------
/eslint.config.mjs:
--------------------------------------------------------------------------------
1 | import rules from '@shelf/eslint-config/typescript.js';
2 |
3 | export default [
4 | ...rules,
5 | {files: ['**/*.js', '**/*.jsx', '**/*.ts', '**/*.tsx', '**/*.json']},
6 | {
7 | ignores: [
8 | '.idea/',
9 | 'coverage/',
10 | 'draft.js',
11 | 'lib/',
12 | 'dist/',
13 | 'node_modules/',
14 | 'packages/**/tsconfig.types.json',
15 | 'packages/**/node_modules/**',
16 | 'packages/**/lib/**',
17 | 'renovate.json',
18 | ],
19 | },
20 | ];
21 |
--------------------------------------------------------------------------------
/jest-dynamodb-config.js:
--------------------------------------------------------------------------------
1 | function getTableConfig(tableName) {
2 | return {
3 | TableName: tableName,
4 | KeySchema: [
5 | {AttributeName: 'hash_key', KeyType: 'HASH'},
6 | {AttributeName: 'range_key', KeyType: 'RANGE'},
7 | ],
8 | AttributeDefinitions: [
9 | {AttributeName: 'hash_key', AttributeType: 'S'},
10 | {AttributeName: 'range_key', AttributeType: 'S'},
11 | ],
12 | ProvisionedThroughput: {ReadCapacityUnits: 1, WriteCapacityUnits: 1},
13 | StreamSpecification: {
14 | StreamEnabled: true,
15 | StreamViewType: 'NEW_AND_OLD_IMAGES',
16 | },
17 | };
18 | }
19 |
20 | module.exports = {
21 | tables: [getTableConfig('example_table')],
22 | };
23 |
--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Gemshelf Inc. (shelf.io)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6 |
7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8 |
9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@shelf/dynamodb-query-optimized",
3 | "version": "3.0.0",
4 | "description": "2x faster DynamoDB queries when you need to query 2+ MB of data",
5 | "license": "MIT",
6 | "author": {
7 | "name": "Vlad Holubiev",
8 | "email": "vlad@shelf.io",
9 | "url": "https://shelf.io"
10 | },
11 | "main": "lib",
12 | "types": "lib/index.d.ts",
13 | "files": [
14 | "lib"
15 | ],
16 | "scripts": {
17 | "build": "rm -rf lib/ && yarn build:types && yarn build:code",
18 | "build:code": "babel src --out-dir lib --ignore '**/*.test.ts' --extensions '.ts' && find ./lib -name '*.test.d.ts' -delete",
19 | "build:types": "tsc --emitDeclarationOnly --declaration --isolatedModules false --declarationDir lib",
20 | "coverage": "yarn test --coverage",
21 | "lint": "yarn lint:ci --fix",
22 | "lint:ci": "eslint . --quiet",
23 | "prepack": "yarn build",
24 | "test": "export ENVIRONMENT=local && jest src",
25 | "type-check": "tsc --noEmit",
26 | "type-check:watch": "npm run type-check -- --watch"
27 | },
28 | "lint-staged": {
29 | "*.{html,md,yml}": [
30 | "prettier --write"
31 | ],
32 | "*.{js,ts,json}": [
33 | "eslint --fix"
34 | ]
35 | },
36 | "babel": {
37 | "extends": "@shelf/babel-config/backend"
38 | },
39 | "prettier": "@shelf/prettier-config",
40 | "jest": {
41 | "preset": "@shelf/jest-dynamodb"
42 | },
43 | "dependencies": {
44 | "@shelf/aws-ddb-with-xray": "2.1.0",
45 | "lodash": "4.17.21",
46 | "p-map": "4.0.0"
47 | },
48 | "devDependencies": {
49 | "@babel/cli": "7.26.4",
50 | "@babel/core": "7.26.9",
51 | "@shelf/babel-config": "1.0.2",
52 | "@shelf/eslint-config": "4.2.1",
53 | "@shelf/jest-dynamodb": "3.4.1",
54 | "@shelf/prettier-config": "1.0.0",
55 | "@shelf/tsconfig": "0.0.11",
56 | "@types/jest": "28.1.8",
57 | "@types/lodash": "4.17.15",
58 | "@types/node": "22",
59 | "eslint": "9.21.0",
60 | "husky": "8.0.3",
61 | "jest": "28.1.3",
62 | "lint-staged": "13.3.0",
63 | "prettier": "3.5.2",
64 | "typescript": "4.9.5"
65 | },
66 | "peerDependencies": {
67 | "@aws-sdk/client-dynamodb": "3.x.x",
68 | "@aws-sdk/lib-dynamodb": "3.x.x"
69 | },
70 | "engines": {
71 | "node": ">=22"
72 | },
73 | "publishConfig": {
74 | "access": "public"
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/populate.ts:
--------------------------------------------------------------------------------
1 | import {insertMany} from './src/helpers/insert-many';
2 |
3 | (async () => {
4 | const listToInsert = [];
5 |
6 | // Each item is approx. 21.5 KB
7 | // We insert approx. 21.5 MB of data with hash_key: 'hk5'
8 | // and ~0.8 Mb of data with hash_key: 'hk6'
9 | for (let i = 0; i < 1040; i++) {
10 | listToInsert.push({
11 | hash_key: i >= 1000 ? 'hk5' : 'hk6',
12 | range_key: `rk-${i}`,
13 | foo: 'hello world '.repeat(100),
14 | bar: 'hello world '.repeat(100),
15 | baz: 'hello world '.repeat(100),
16 | items: new Array(1000).fill(Math.random()),
17 | number: Math.random(),
18 | });
19 | }
20 |
21 | await insertMany({
22 | TableName: 'ddb-query-optimized',
23 | Items: listToInsert,
24 | });
25 | })();
26 |
--------------------------------------------------------------------------------
/renovate.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": ["github>shelfio/renovate-config-public"],
3 | "labels": ["backend"],
4 | "ignoreDeps": [
5 | "cimg/node"
6 | ]
7 | }
8 |
--------------------------------------------------------------------------------
/src/helpers/ddb.ts:
--------------------------------------------------------------------------------
1 | import {getDocumentClient} from '@shelf/aws-ddb-with-xray';
2 |
3 | export const ddb = getDocumentClient({
4 | documentClientConfig: {
5 | marshallOptions: {convertEmptyValues: true},
6 | ...(process.env.DDB_DEBUG_LOGS && {logger: console}),
7 | },
8 | credentials: {
9 | accessKeyId: 'fakeMyKeyId',
10 | secretAccessKey: 'fakeSecretAccessKey',
11 | sessionToken: 'fakeSessionToken',
12 | },
13 | clientConfig: {
14 | endpoint: 'http://localhost:8000',
15 | tls: false,
16 | region: 'local-env',
17 | ...(process.env.DDB_DEBUG_LOGS && {logger: console}),
18 | },
19 | });
20 |
--------------------------------------------------------------------------------
/src/helpers/delete-all.ts:
--------------------------------------------------------------------------------
1 | import {ScanCommand} from '@aws-sdk/lib-dynamodb';
2 | import {chunk} from 'lodash';
3 | import type {
4 | BatchWriteCommandInput,
5 | BatchWriteCommandOutput,
6 | ScanCommandInput,
7 | } from '@aws-sdk/lib-dynamodb';
8 | import type {AttributeValue, WriteRequest} from '@aws-sdk/client-dynamodb';
9 | import {ddb} from './ddb';
10 | import {batchWrite} from './insert-many';
11 |
12 | export async function deleteAll(params: ScanCommandInput): Promise {
13 | let LastEvaluatedKey;
14 | let resp;
15 |
16 | do {
17 | const scanCommand: ScanCommand = new ScanCommand({
18 | ...params,
19 | ...(LastEvaluatedKey ? {ExclusiveStartKey: LastEvaluatedKey} : {}),
20 | });
21 |
22 | resp = await ddb.send(scanCommand);
23 |
24 | // eslint-disable-next-line prefer-destructuring
25 | LastEvaluatedKey = resp.LastEvaluatedKey;
26 | } while (LastEvaluatedKey);
27 |
28 | return deleteMany({
29 | TableName: params.TableName!,
30 | Keys: resp.Items as Record[],
31 | });
32 | }
33 |
34 | type DeleteRequestItem = Omit;
35 |
36 | type DeleteManyParams = {
37 | TableName: string;
38 | Keys: Record[];
39 | };
40 |
41 | export function deleteMany(
42 | params: DeleteManyParams,
43 | retryCount = 0
44 | ): Promise {
45 | const {TableName, Keys} = params;
46 | const keysChunks = chunk(Keys, 25);
47 |
48 | const paramsChunks = keysChunks.map(
49 | (keysChunk): BatchWriteCommandInput => makeDeleteRequestItems({TableName, Keys: keysChunk})
50 | );
51 |
52 | return Promise.all(
53 | paramsChunks.map((params): Promise => batchWrite(params, retryCount))
54 | );
55 | }
56 |
57 | function makeDeleteRequestItems(params: DeleteManyParams): BatchWriteCommandInput {
58 | const {TableName, Keys} = params;
59 |
60 | return {
61 | RequestItems: {
62 | [TableName]: Keys.map((key): DeleteRequestItem => makeDeleteRequestItem(key)),
63 | },
64 | };
65 | }
66 |
67 | function makeDeleteRequestItem(key: Record): DeleteRequestItem {
68 | return {
69 | DeleteRequest: {
70 | Key: key,
71 | },
72 | };
73 | }
74 |
--------------------------------------------------------------------------------
/src/helpers/insert-many.ts:
--------------------------------------------------------------------------------
1 | import {BatchWriteCommand} from '@aws-sdk/lib-dynamodb';
2 | import {chunk} from 'lodash';
3 | import pMap from 'p-map';
4 | import type {AttributeValue, WriteRequest} from '@aws-sdk/client-dynamodb';
5 | import type {BatchWriteCommandInput, BatchWriteCommandOutput} from '@aws-sdk/lib-dynamodb';
6 | import {ddb} from './ddb';
7 |
8 | type InsertManyParams = {
9 | TableName: string;
10 | Items: any[];
11 | };
12 |
13 | export function insertMany(
14 | params: InsertManyParams,
15 | retryCount = 3
16 | ): Promise {
17 | const {TableName, Items} = params;
18 | const itemsChunks = chunk(Items, 25);
19 |
20 | const paramsChunks = itemsChunks.map(
21 | (itemsChunk): BatchWriteCommandInput => makePutRequestItems({TableName, Items: itemsChunk})
22 | );
23 |
24 | return pMap(
25 | paramsChunks,
26 | (params): Promise => batchWrite(params, retryCount),
27 | {concurrency: 100, stopOnError: false}
28 | );
29 | }
30 |
31 | export async function batchWrite(
32 | params: BatchWriteCommandInput,
33 | retryCounter = 0
34 | ): Promise {
35 | if (retryCounter > 10) {
36 | retryCounter = 10;
37 | }
38 |
39 | const results = await ddb.send(new BatchWriteCommand(params));
40 |
41 | const isAnyOpFailed = Boolean(Object.keys(results?.UnprocessedItems || {}).length);
42 |
43 | if (retryCounter > 0 && isAnyOpFailed) {
44 | return batchWrite(
45 | {
46 | RequestItems: results.UnprocessedItems as BatchWriteCommandInput['RequestItems'],
47 | },
48 | retryCounter - 1
49 | );
50 | }
51 |
52 | return results;
53 | }
54 |
55 | type PutRequestItem = Omit;
56 |
57 | function makePutRequestItems(params: InsertManyParams): BatchWriteCommandInput {
58 | const {TableName, Items} = params;
59 |
60 | return {
61 | RequestItems: {
62 | [TableName]: Items.map((item): PutRequestItem => makePutRequestItem(item)),
63 | },
64 | };
65 | }
66 |
67 | function makePutRequestItem(item: Record): PutRequestItem {
68 | return {
69 | PutRequest: {
70 | Item: item,
71 | },
72 | };
73 | }
74 |
--------------------------------------------------------------------------------
/src/index.test.ts:
--------------------------------------------------------------------------------
1 | import {marshall} from '@aws-sdk/util-dynamodb';
2 | import {QueryCommand} from '@aws-sdk/client-dynamodb';
3 | import {insertMany} from './helpers/insert-many';
4 | import {deleteAll} from './helpers/delete-all';
5 | import {ddb} from './helpers/ddb';
6 | import {queryOptimized, queryRegular} from './';
7 |
8 | const hash_key = 'some-hash-key';
9 | const range_key = 'some-range-key';
10 | const getRangeKey = (range: number) => `${range_key}-${range}`;
11 | jest.setTimeout(120000);
12 |
13 | beforeAll(async () => {
14 | const listToInsert = [];
15 |
16 | // Each item is approx. 3.6 KB
17 | // We insert approx. 10.5 MB of data with hash_key: 'some-hash-key'
18 | // and ~0.9 Mb of data with hash_key: 'some-hash-key-1mb'
19 | for (let i = 0; i < 3250; i++) {
20 | listToInsert.push({
21 | hash_key: i >= 3000 ? 'some-hash-key-1mb' : hash_key,
22 | range_key: getRangeKey(i),
23 | name: 'hello',
24 | description: 'hello world',
25 | foo: 'hello world '.repeat(100),
26 | bar: 'hello world '.repeat(100),
27 | baz: 'hello world '.repeat(100),
28 | });
29 | }
30 |
31 | await insertMany({
32 | TableName: 'example_table',
33 | Items: listToInsert,
34 | });
35 | });
36 |
37 | it(`should return all elements using optimized find query for 10 MB table`, async () => {
38 | const result = await testQueryOptimized('some-hash-key');
39 |
40 | expect(result).toHaveLength(3000);
41 | });
42 |
43 | it(`should return all elements using regular find query for 10 MB table`, async () => {
44 | const result = await testQueryRegular('some-hash-key');
45 |
46 | expect(result).toHaveLength(3000);
47 | });
48 |
49 | it(`should return all elements using optimized find query for 1 MB table`, async () => {
50 | const result = await testQueryOptimized('some-hash-key-1mb');
51 |
52 | expect(result).toHaveLength(250);
53 | });
54 |
55 | it(`should return unmarshalled element for 1 MB table`, async () => {
56 | const result = await testQueryOptimized('some-hash-key-1mb');
57 |
58 | expect(result[0]).toEqual({hash_key: 'some-hash-key-1mb', range_key: 'some-range-key-3000'});
59 | });
60 |
61 | it(`should return all elements using regular find query for 1 MB table`, async () => {
62 | const result = await testQueryRegular('some-hash-key-1mb');
63 |
64 | expect(result).toHaveLength(250);
65 | });
66 |
67 | afterAll(async () => {
68 | await deleteAll({
69 | TableName: 'example_table',
70 | ProjectionExpression: 'hash_key, range_key',
71 | });
72 | });
73 |
74 | function testQueryRegular(hash_key: string) {
75 | return queryRegular({
76 | QueryCommand: QueryCommand,
77 | client: ddb,
78 | queryParams: {
79 | TableName: 'example_table',
80 | ProjectionExpression: 'hash_key, range_key',
81 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)',
82 | ExpressionAttributeNames: {
83 | '#hash_key': 'hash_key',
84 | '#range_key': 'range_key',
85 | },
86 | ExpressionAttributeValues: marshall({
87 | ':hash_key': hash_key,
88 | ':range_key': range_key,
89 | }),
90 | },
91 | });
92 | }
93 |
94 | function testQueryOptimized(hash_key: string) {
95 | return queryOptimized({
96 | QueryCommand: QueryCommand,
97 | client: ddb,
98 | queryParams: {
99 | TableName: 'example_table',
100 | ProjectionExpression: 'hash_key, range_key',
101 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)',
102 | ExpressionAttributeNames: {
103 | '#hash_key': 'hash_key',
104 | '#range_key': 'range_key',
105 | },
106 | ExpressionAttributeValues: marshall({
107 | ':hash_key': hash_key,
108 | ':range_key': range_key,
109 | }),
110 | },
111 | });
112 | }
113 |
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | import {isEqual, uniqBy} from 'lodash';
2 | import {unmarshall} from '@aws-sdk/util-dynamodb';
3 | import type {AttributeValue, QueryCommandInput, QueryCommandOutput} from '@aws-sdk/client-dynamodb';
4 | import type {DynamoDBClient} from '@aws-sdk/client-dynamodb';
5 | import type {QueryCommand} from '@aws-sdk/client-dynamodb';
6 |
7 | type QueryOptimizedParams = {
8 | client: DynamoDBClient;
9 | QueryCommand: typeof QueryCommand;
10 | queryParams: Omit;
11 | };
12 |
13 | //
14 | // This method is optimized to query indices where a query might scan 2+ MB of data.
15 | // It works by launching 2 parallel queries that iterate from both ends of the index
16 | // until the meet in the middle
17 | //
18 | export async function queryOptimized>({
19 | queryParams,
20 | QueryCommand,
21 | client,
22 | }: QueryOptimizedParams): Promise {
23 | let allItems: T[] = [];
24 | let allItemsFromLeftQuery: T[] = [];
25 | let allItemsFromRightQuery: T[] = [];
26 |
27 | let isMiddleReached = false;
28 | let queryLeftLastEvaluatedKey;
29 | let queryRightLastEvaluatedKey;
30 | let areBothQueriesExhausted = false;
31 |
32 | do {
33 | const responses = await Promise.all([
34 | executeLeftQuery({client, queryParams, QueryCommand}, queryLeftLastEvaluatedKey),
35 | executeRightQuery({client, queryParams, QueryCommand}, queryRightLastEvaluatedKey),
36 | ]);
37 |
38 | const [respLeft, respRight] = responses as any;
39 |
40 | if (respLeft.LastEvaluatedKey) {
41 | queryLeftLastEvaluatedKey = respLeft.LastEvaluatedKey;
42 | }
43 |
44 | if (respRight.LastEvaluatedKey) {
45 | queryRightLastEvaluatedKey = respRight.LastEvaluatedKey;
46 | }
47 |
48 | // If both queries don't have a cursor to fetch the next item - stop iterating
49 | areBothQueriesExhausted = !queryLeftLastEvaluatedKey && !queryRightLastEvaluatedKey;
50 |
51 | if (!isMiddleReached) {
52 | isMiddleReached = checkIfMiddleReached(allItemsFromLeftQuery, allItemsFromRightQuery);
53 | }
54 |
55 | allItemsFromLeftQuery = allItemsFromLeftQuery.concat(respLeft.Items!);
56 | allItemsFromRightQuery = allItemsFromRightQuery.concat(respRight.Items!);
57 |
58 | allItems = allItems.concat(respLeft.Items!);
59 | allItems = allItems.concat(respRight.Items!);
60 | } while (!isMiddleReached && !areBothQueriesExhausted);
61 |
62 | return uniqBy(allItems, item => JSON.stringify(item)).map(item => unmarshall(item) as T);
63 | }
64 |
65 | export async function queryRegular>({
66 | client,
67 | queryParams,
68 | QueryCommand,
69 | }: QueryOptimizedParams): Promise {
70 | let allItems: T[] = [];
71 | let lastEvaluatedKey;
72 |
73 | do {
74 | const resp: QueryCommandOutput = await executeLeftQuery(
75 | {
76 | client,
77 | queryParams,
78 | QueryCommand,
79 | },
80 | lastEvaluatedKey
81 | );
82 |
83 | if (resp.Items && resp.Items.length) {
84 | allItems = allItems.concat(resp.Items! as T[]);
85 | }
86 |
87 | lastEvaluatedKey = resp.LastEvaluatedKey;
88 | } while (lastEvaluatedKey);
89 |
90 | return allItems;
91 | }
92 |
93 | function executeLeftQuery(
94 | {client, queryParams, QueryCommand}: QueryOptimizedParams,
95 | key?: any
96 | ): Promise {
97 | return client.send(
98 | new QueryCommand({
99 | ...queryParams,
100 | ...(key ? {ExclusiveStartKey: key} : {}),
101 | ScanIndexForward: true,
102 | })
103 | );
104 | }
105 |
106 | function executeRightQuery(
107 | {client, queryParams, QueryCommand}: QueryOptimizedParams,
108 | key?: any
109 | ): Promise {
110 | return client.send(
111 | new QueryCommand({
112 | ...queryParams,
113 | ...(key ? {ExclusiveStartKey: key} : {}),
114 | ScanIndexForward: false,
115 | })
116 | );
117 | }
118 |
119 | function checkIfMiddleReached(allItemsFromLeftQuery: T[], allItemsFromRightQuery: T[]): boolean {
120 | return allItemsFromLeftQuery.some(leftItem =>
121 | allItemsFromRightQuery.some(rightItem => isEqual(rightItem, leftItem))
122 | );
123 | }
124 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": "@shelf/tsconfig/backend",
3 | "compilerOptions": {
4 | "strict": true
5 | },
6 | "exclude": ["node_modules"],
7 | "include": ["src"]
8 | }
9 |
--------------------------------------------------------------------------------