├── .circleci └── config.yml ├── .editorconfig ├── .gitattributes ├── .gitignore ├── .husky ├── _ │ └── husky.sh ├── post-commit └── pre-commit ├── .idea ├── codeStyles │ ├── Project.xml │ └── codeStyleConfig.xml ├── inspectionProfiles │ └── Project_Default.xml ├── jsLibraryMappings.xml ├── misc.xml ├── modules.xml ├── opensource-npm-template.iml └── vcs.xml ├── .npmrc ├── .nvmrc ├── CHANGELOG.md ├── README.md ├── benchmark.ts ├── eslint.config.mjs ├── jest-dynamodb-config.js ├── license ├── package.json ├── populate.ts ├── renovate.json ├── src ├── helpers │ ├── ddb.ts │ ├── delete-all.ts │ └── insert-many.ts ├── index.test.ts └── index.ts └── tsconfig.json /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | node: circleci/node@7.0.0 5 | 6 | parameters: 7 | node_version: 8 | type: string 9 | default: '22.14.0-browsers' 10 | 11 | commands: 12 | install_deps: 13 | steps: 14 | - run: echo "//registry.npmjs.org/:_authToken=$NPM_TOKEN" >> .npmrc 15 | - node/install-packages: 16 | pkg-manager: yarn 17 | cache-version: v2-all 18 | cache-only-lockfile: true 19 | app-dir: ~/repo 20 | override-ci-command: yarn install --pure-lockfile --no-progress 21 | 22 | jobs: 23 | build: 24 | executor: 25 | name: node/default 26 | tag: << pipeline.parameters.node_version >> 27 | working_directory: ~/repo 28 | steps: 29 | - checkout 30 | - install_deps 31 | - run: yarn test 32 | - run: yarn type-check 33 | - run: yarn lint:ci 34 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | charset = utf-8 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 2 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.js text eol=lf 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .serverless/ 3 | coverage/ 4 | lib/ 5 | node_modules/ 6 | 7 | *.log 8 | junit.xml 9 | draft.js 10 | *.draft.js 11 | draft.ts 12 | *.draft.ts 13 | yarn.lock 14 | 15 | .env 16 | .DS_Store 17 | 18 | # User-specific stuff 19 | .idea/**/workspace.xml 20 | .idea/**/tasks.xml 21 | .idea/**/usage.statistics.xml 22 | .idea/**/shelf 23 | 24 | # File-based project format 25 | *.iws 26 | 27 | .idea/$CACHE_FILE$ 28 | .idea/$PRODUCT_WORKSPACE_FILE$ 29 | .idea/.gitignore 30 | !.husky/_/husky.sh 31 | -------------------------------------------------------------------------------- /.husky/_/husky.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ -z "$husky_skip_init" ]; then 3 | debug () { 4 | if [ "$HUSKY_DEBUG" = "1" ]; then 5 | echo "husky (debug) - $1" 6 | fi 7 | } 8 | 9 | readonly hook_name="$(basename "$0")" 10 | debug "starting $hook_name..." 11 | 12 | if [ "$HUSKY" = "0" ]; then 13 | debug "HUSKY env variable is set to 0, skipping hook" 14 | exit 0 15 | fi 16 | 17 | if [ -f ~/.huskyrc ]; then 18 | debug "sourcing ~/.huskyrc" 19 | . ~/.huskyrc 20 | fi 21 | 22 | export readonly husky_skip_init=1 23 | sh -e "$0" "$@" 24 | exitCode="$?" 25 | 26 | if [ $exitCode != 0 ]; then 27 | echo "husky - $hook_name hook exited with code $exitCode (error)" 28 | fi 29 | 30 | exit $exitCode 31 | fi 32 | -------------------------------------------------------------------------------- /.husky/post-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | git update-index --again 4 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | . "$(dirname "$0")/_/husky.sh" 3 | yarn lint-staged 4 | -------------------------------------------------------------------------------- /.idea/codeStyles/Project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 11 | 12 | 19 | 20 | 22 | 23 | 35 | 36 | 37 | 39 | 40 | 41 | 48 | 49 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/jsLibraryMappings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/opensource-npm-template.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /.nvmrc: -------------------------------------------------------------------------------- 1 | v22.13.0 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Breaking Changes 2 | 3 | ## 3.0.0 4 | 5 | - Switched `node` version `18`->`22` 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dynamodb-query-optimized [![CircleCI](https://circleci.com/gh/shelfio/dynamodb-query-optimized/tree/master.svg?style=svg)](https://circleci.com/gh/shelfio/dynamodb-query-optimized/tree/master)![](https://img.shields.io/badge/code_style-prettier-ff69b4.svg) 2 | 3 | > 2x faster DynamoDB queries when you need to query 2+ MB of data 4 | 5 | Read the blog post article explaining how it works: https://vladholubiev.medium.com/how-to-speed-up-long-dynamodb-queries-by-2x-c66a2987d53a 6 | 7 | ## Install 8 | 9 | ``` 10 | $ yarn add @shelf/dynamodb-query-optimized 11 | ``` 12 | 13 | ## Benchmark 14 | 15 | ``` 16 | Regular query: <1 MB of items: 650ms 17 | Optimized query: <1 MB of items: 704ms 18 | 19 | Regular query: ~21 MB of items: 9.023s 20 | Optimized query: ~21 MB of items: 4.988s # almost 2x faster 21 | ``` 22 | 23 | ## Usage 24 | 25 | For now, it supports `aws-sdk` v2. Feel free to submit a PR to support `aws-sdk` v3! 26 | 27 | ### Optimized query for 2+ MB of data 28 | 29 | Queries DDB from both ends of the query in parallel. Stops and returns results when the middle is reached. 30 | 31 | ```js 32 | import {queryOptimized} from '@shelf/dynamodb-query-optimized'; 33 | import DynamoDB from 'aws-sdk/clients/dynamodb'; 34 | 35 | const ddb = new DynamoDB.DocumentClient({region: 'us-east-1'}); 36 | 37 | const results = await queryOptimized({ 38 | queryFunction: ddb.query.bind(ddb), 39 | queryParams: { 40 | TableName: 'example_table', 41 | ProjectionExpression: 'hash_key, range_key', 42 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)', 43 | ExpressionAttributeNames: { 44 | '#hash_key': 'hash_key', 45 | '#range_key': 'range_key', 46 | }, 47 | ExpressionAttributeValues: { 48 | ':hash_key': hash_key, 49 | ':range_key': range_key, 50 | }, 51 | }, 52 | }); 53 | 54 | console.log(results); 55 | /* 56 | [{hash_key: 'foo', range_key: 'bar'}, {hash_key: 'foo', range_key: 'baz'}] 57 | */ 58 | ``` 59 | 60 | ### Regular query for <2 MB of data 61 | 62 | Queries DDB and continues to paginate through all results until query is exhausted. 63 | 64 | ```js 65 | import {queryRegular} from '@shelf/dynamodb-query-optimized'; 66 | import DynamoDB from 'aws-sdk/clients/dynamodb'; 67 | 68 | const ddb = new DynamoDB.DocumentClient({region: 'us-east-1'}); 69 | 70 | const results = await queryRegular({ 71 | queryFunction: ddb.query.bind(ddb), 72 | queryParams: { 73 | TableName: 'example_table', 74 | ProjectionExpression: 'hash_key, range_key', 75 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)', 76 | ExpressionAttributeNames: { 77 | '#hash_key': 'hash_key', 78 | '#range_key': 'range_key', 79 | }, 80 | ExpressionAttributeValues: { 81 | ':hash_key': hash_key, 82 | ':range_key': range_key, 83 | }, 84 | }, 85 | }); 86 | 87 | console.log(results); 88 | /* 89 | [{hash_key: 'foo', range_key: 'bar'}, {hash_key: 'foo', range_key: 'baz'}] 90 | */ 91 | ``` 92 | 93 | ## Publish 94 | 95 | ```sh 96 | $ git checkout master 97 | $ yarn version 98 | $ yarn publish 99 | $ git push origin master --tags 100 | ``` 101 | 102 | ## License 103 | 104 | MIT © [Shelf](https://shelf.io) 105 | -------------------------------------------------------------------------------- /benchmark.ts: -------------------------------------------------------------------------------- 1 | import DynamoDB from 'aws-sdk/clients/dynamodb'; 2 | import {queryOptimized, queryRegular} from './src'; 3 | 4 | const ddb = new DynamoDB.DocumentClient({region: 'us-east-1'}); 5 | 6 | (async () => { 7 | // warm up TCP connection 8 | await testQueryRegular('hk5'); 9 | 10 | console.time('Regular query: <1 MB of items'); 11 | await testQueryRegular('hk5'); 12 | console.timeEnd('Regular query: <1 MB of items'); 13 | 14 | console.time('Optimized query: <1 MB of items'); 15 | await testQueryOptimized('hk5'); 16 | console.timeEnd('Optimized query: <1 MB of items'); 17 | 18 | console.time('Regular query: ~21 MB of items'); 19 | await testQueryRegular('hk6'); 20 | console.timeEnd('Regular query: ~21 MB of items'); 21 | 22 | console.time('Optimized query: ~21 MB of items'); 23 | await testQueryOptimized('hk6'); 24 | console.timeEnd('Optimized query: ~21 MB of items'); 25 | })(); 26 | 27 | function testQueryRegular(hash_key: string) { 28 | return queryRegular({ 29 | queryFunction: ddb.query.bind(ddb), 30 | queryParams: { 31 | TableName: 'ddb-query-optimized', 32 | ProjectionExpression: 'hash_key, range_key', 33 | KeyConditionExpression: '#hash_key = :hash_key', 34 | FilterExpression: '#number > :number', 35 | ExpressionAttributeNames: { 36 | '#hash_key': 'hash_key', 37 | '#number': 'number', 38 | }, 39 | ExpressionAttributeValues: { 40 | ':hash_key': hash_key, 41 | ':number': 0.5, 42 | }, 43 | }, 44 | }); 45 | } 46 | 47 | function testQueryOptimized(hash_key: string) { 48 | return queryOptimized({ 49 | queryFunction: ddb.query.bind(ddb), 50 | queryParams: { 51 | TableName: 'ddb-query-optimized', 52 | ProjectionExpression: 'hash_key, range_key', 53 | KeyConditionExpression: '#hash_key = :hash_key', 54 | FilterExpression: '#number > :number', 55 | ExpressionAttributeNames: { 56 | '#hash_key': 'hash_key', 57 | '#number': 'number', 58 | }, 59 | ExpressionAttributeValues: { 60 | ':hash_key': hash_key, 61 | ':number': 0.5, 62 | }, 63 | }, 64 | }); 65 | } 66 | -------------------------------------------------------------------------------- /eslint.config.mjs: -------------------------------------------------------------------------------- 1 | import rules from '@shelf/eslint-config/typescript.js'; 2 | 3 | export default [ 4 | ...rules, 5 | {files: ['**/*.js', '**/*.jsx', '**/*.ts', '**/*.tsx', '**/*.json']}, 6 | { 7 | ignores: [ 8 | '.idea/', 9 | 'coverage/', 10 | 'draft.js', 11 | 'lib/', 12 | 'dist/', 13 | 'node_modules/', 14 | 'packages/**/tsconfig.types.json', 15 | 'packages/**/node_modules/**', 16 | 'packages/**/lib/**', 17 | 'renovate.json', 18 | ], 19 | }, 20 | ]; 21 | -------------------------------------------------------------------------------- /jest-dynamodb-config.js: -------------------------------------------------------------------------------- 1 | function getTableConfig(tableName) { 2 | return { 3 | TableName: tableName, 4 | KeySchema: [ 5 | {AttributeName: 'hash_key', KeyType: 'HASH'}, 6 | {AttributeName: 'range_key', KeyType: 'RANGE'}, 7 | ], 8 | AttributeDefinitions: [ 9 | {AttributeName: 'hash_key', AttributeType: 'S'}, 10 | {AttributeName: 'range_key', AttributeType: 'S'}, 11 | ], 12 | ProvisionedThroughput: {ReadCapacityUnits: 1, WriteCapacityUnits: 1}, 13 | StreamSpecification: { 14 | StreamEnabled: true, 15 | StreamViewType: 'NEW_AND_OLD_IMAGES', 16 | }, 17 | }; 18 | } 19 | 20 | module.exports = { 21 | tables: [getTableConfig('example_table')], 22 | }; 23 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Gemshelf Inc. (shelf.io) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@shelf/dynamodb-query-optimized", 3 | "version": "3.0.0", 4 | "description": "2x faster DynamoDB queries when you need to query 2+ MB of data", 5 | "license": "MIT", 6 | "author": { 7 | "name": "Vlad Holubiev", 8 | "email": "vlad@shelf.io", 9 | "url": "https://shelf.io" 10 | }, 11 | "main": "lib", 12 | "types": "lib/index.d.ts", 13 | "files": [ 14 | "lib" 15 | ], 16 | "scripts": { 17 | "build": "rm -rf lib/ && yarn build:types && yarn build:code", 18 | "build:code": "babel src --out-dir lib --ignore '**/*.test.ts' --extensions '.ts' && find ./lib -name '*.test.d.ts' -delete", 19 | "build:types": "tsc --emitDeclarationOnly --declaration --isolatedModules false --declarationDir lib", 20 | "coverage": "yarn test --coverage", 21 | "lint": "yarn lint:ci --fix", 22 | "lint:ci": "eslint . --quiet", 23 | "prepack": "yarn build", 24 | "test": "export ENVIRONMENT=local && jest src", 25 | "type-check": "tsc --noEmit", 26 | "type-check:watch": "npm run type-check -- --watch" 27 | }, 28 | "lint-staged": { 29 | "*.{html,md,yml}": [ 30 | "prettier --write" 31 | ], 32 | "*.{js,ts,json}": [ 33 | "eslint --fix" 34 | ] 35 | }, 36 | "babel": { 37 | "extends": "@shelf/babel-config/backend" 38 | }, 39 | "prettier": "@shelf/prettier-config", 40 | "jest": { 41 | "preset": "@shelf/jest-dynamodb" 42 | }, 43 | "dependencies": { 44 | "@shelf/aws-ddb-with-xray": "2.1.0", 45 | "lodash": "4.17.21", 46 | "p-map": "4.0.0" 47 | }, 48 | "devDependencies": { 49 | "@babel/cli": "7.26.4", 50 | "@babel/core": "7.26.9", 51 | "@shelf/babel-config": "1.0.2", 52 | "@shelf/eslint-config": "4.2.1", 53 | "@shelf/jest-dynamodb": "3.4.1", 54 | "@shelf/prettier-config": "1.0.0", 55 | "@shelf/tsconfig": "0.0.11", 56 | "@types/jest": "28.1.8", 57 | "@types/lodash": "4.17.15", 58 | "@types/node": "22", 59 | "eslint": "9.21.0", 60 | "husky": "8.0.3", 61 | "jest": "28.1.3", 62 | "lint-staged": "13.3.0", 63 | "prettier": "3.5.2", 64 | "typescript": "4.9.5" 65 | }, 66 | "peerDependencies": { 67 | "@aws-sdk/client-dynamodb": "3.x.x", 68 | "@aws-sdk/lib-dynamodb": "3.x.x" 69 | }, 70 | "engines": { 71 | "node": ">=22" 72 | }, 73 | "publishConfig": { 74 | "access": "public" 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /populate.ts: -------------------------------------------------------------------------------- 1 | import {insertMany} from './src/helpers/insert-many'; 2 | 3 | (async () => { 4 | const listToInsert = []; 5 | 6 | // Each item is approx. 21.5 KB 7 | // We insert approx. 21.5 MB of data with hash_key: 'hk5' 8 | // and ~0.8 Mb of data with hash_key: 'hk6' 9 | for (let i = 0; i < 1040; i++) { 10 | listToInsert.push({ 11 | hash_key: i >= 1000 ? 'hk5' : 'hk6', 12 | range_key: `rk-${i}`, 13 | foo: 'hello world '.repeat(100), 14 | bar: 'hello world '.repeat(100), 15 | baz: 'hello world '.repeat(100), 16 | items: new Array(1000).fill(Math.random()), 17 | number: Math.random(), 18 | }); 19 | } 20 | 21 | await insertMany({ 22 | TableName: 'ddb-query-optimized', 23 | Items: listToInsert, 24 | }); 25 | })(); 26 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["github>shelfio/renovate-config-public"], 3 | "labels": ["backend"], 4 | "ignoreDeps": [ 5 | "cimg/node" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /src/helpers/ddb.ts: -------------------------------------------------------------------------------- 1 | import {getDocumentClient} from '@shelf/aws-ddb-with-xray'; 2 | 3 | export const ddb = getDocumentClient({ 4 | documentClientConfig: { 5 | marshallOptions: {convertEmptyValues: true}, 6 | ...(process.env.DDB_DEBUG_LOGS && {logger: console}), 7 | }, 8 | credentials: { 9 | accessKeyId: 'fakeMyKeyId', 10 | secretAccessKey: 'fakeSecretAccessKey', 11 | sessionToken: 'fakeSessionToken', 12 | }, 13 | clientConfig: { 14 | endpoint: 'http://localhost:8000', 15 | tls: false, 16 | region: 'local-env', 17 | ...(process.env.DDB_DEBUG_LOGS && {logger: console}), 18 | }, 19 | }); 20 | -------------------------------------------------------------------------------- /src/helpers/delete-all.ts: -------------------------------------------------------------------------------- 1 | import {ScanCommand} from '@aws-sdk/lib-dynamodb'; 2 | import {chunk} from 'lodash'; 3 | import type { 4 | BatchWriteCommandInput, 5 | BatchWriteCommandOutput, 6 | ScanCommandInput, 7 | } from '@aws-sdk/lib-dynamodb'; 8 | import type {AttributeValue, WriteRequest} from '@aws-sdk/client-dynamodb'; 9 | import {ddb} from './ddb'; 10 | import {batchWrite} from './insert-many'; 11 | 12 | export async function deleteAll(params: ScanCommandInput): Promise { 13 | let LastEvaluatedKey; 14 | let resp; 15 | 16 | do { 17 | const scanCommand: ScanCommand = new ScanCommand({ 18 | ...params, 19 | ...(LastEvaluatedKey ? {ExclusiveStartKey: LastEvaluatedKey} : {}), 20 | }); 21 | 22 | resp = await ddb.send(scanCommand); 23 | 24 | // eslint-disable-next-line prefer-destructuring 25 | LastEvaluatedKey = resp.LastEvaluatedKey; 26 | } while (LastEvaluatedKey); 27 | 28 | return deleteMany({ 29 | TableName: params.TableName!, 30 | Keys: resp.Items as Record[], 31 | }); 32 | } 33 | 34 | type DeleteRequestItem = Omit; 35 | 36 | type DeleteManyParams = { 37 | TableName: string; 38 | Keys: Record[]; 39 | }; 40 | 41 | export function deleteMany( 42 | params: DeleteManyParams, 43 | retryCount = 0 44 | ): Promise { 45 | const {TableName, Keys} = params; 46 | const keysChunks = chunk(Keys, 25); 47 | 48 | const paramsChunks = keysChunks.map( 49 | (keysChunk): BatchWriteCommandInput => makeDeleteRequestItems({TableName, Keys: keysChunk}) 50 | ); 51 | 52 | return Promise.all( 53 | paramsChunks.map((params): Promise => batchWrite(params, retryCount)) 54 | ); 55 | } 56 | 57 | function makeDeleteRequestItems(params: DeleteManyParams): BatchWriteCommandInput { 58 | const {TableName, Keys} = params; 59 | 60 | return { 61 | RequestItems: { 62 | [TableName]: Keys.map((key): DeleteRequestItem => makeDeleteRequestItem(key)), 63 | }, 64 | }; 65 | } 66 | 67 | function makeDeleteRequestItem(key: Record): DeleteRequestItem { 68 | return { 69 | DeleteRequest: { 70 | Key: key, 71 | }, 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /src/helpers/insert-many.ts: -------------------------------------------------------------------------------- 1 | import {BatchWriteCommand} from '@aws-sdk/lib-dynamodb'; 2 | import {chunk} from 'lodash'; 3 | import pMap from 'p-map'; 4 | import type {AttributeValue, WriteRequest} from '@aws-sdk/client-dynamodb'; 5 | import type {BatchWriteCommandInput, BatchWriteCommandOutput} from '@aws-sdk/lib-dynamodb'; 6 | import {ddb} from './ddb'; 7 | 8 | type InsertManyParams = { 9 | TableName: string; 10 | Items: any[]; 11 | }; 12 | 13 | export function insertMany( 14 | params: InsertManyParams, 15 | retryCount = 3 16 | ): Promise { 17 | const {TableName, Items} = params; 18 | const itemsChunks = chunk(Items, 25); 19 | 20 | const paramsChunks = itemsChunks.map( 21 | (itemsChunk): BatchWriteCommandInput => makePutRequestItems({TableName, Items: itemsChunk}) 22 | ); 23 | 24 | return pMap( 25 | paramsChunks, 26 | (params): Promise => batchWrite(params, retryCount), 27 | {concurrency: 100, stopOnError: false} 28 | ); 29 | } 30 | 31 | export async function batchWrite( 32 | params: BatchWriteCommandInput, 33 | retryCounter = 0 34 | ): Promise { 35 | if (retryCounter > 10) { 36 | retryCounter = 10; 37 | } 38 | 39 | const results = await ddb.send(new BatchWriteCommand(params)); 40 | 41 | const isAnyOpFailed = Boolean(Object.keys(results?.UnprocessedItems || {}).length); 42 | 43 | if (retryCounter > 0 && isAnyOpFailed) { 44 | return batchWrite( 45 | { 46 | RequestItems: results.UnprocessedItems as BatchWriteCommandInput['RequestItems'], 47 | }, 48 | retryCounter - 1 49 | ); 50 | } 51 | 52 | return results; 53 | } 54 | 55 | type PutRequestItem = Omit; 56 | 57 | function makePutRequestItems(params: InsertManyParams): BatchWriteCommandInput { 58 | const {TableName, Items} = params; 59 | 60 | return { 61 | RequestItems: { 62 | [TableName]: Items.map((item): PutRequestItem => makePutRequestItem(item)), 63 | }, 64 | }; 65 | } 66 | 67 | function makePutRequestItem(item: Record): PutRequestItem { 68 | return { 69 | PutRequest: { 70 | Item: item, 71 | }, 72 | }; 73 | } 74 | -------------------------------------------------------------------------------- /src/index.test.ts: -------------------------------------------------------------------------------- 1 | import {marshall} from '@aws-sdk/util-dynamodb'; 2 | import {QueryCommand} from '@aws-sdk/client-dynamodb'; 3 | import {insertMany} from './helpers/insert-many'; 4 | import {deleteAll} from './helpers/delete-all'; 5 | import {ddb} from './helpers/ddb'; 6 | import {queryOptimized, queryRegular} from './'; 7 | 8 | const hash_key = 'some-hash-key'; 9 | const range_key = 'some-range-key'; 10 | const getRangeKey = (range: number) => `${range_key}-${range}`; 11 | jest.setTimeout(120000); 12 | 13 | beforeAll(async () => { 14 | const listToInsert = []; 15 | 16 | // Each item is approx. 3.6 KB 17 | // We insert approx. 10.5 MB of data with hash_key: 'some-hash-key' 18 | // and ~0.9 Mb of data with hash_key: 'some-hash-key-1mb' 19 | for (let i = 0; i < 3250; i++) { 20 | listToInsert.push({ 21 | hash_key: i >= 3000 ? 'some-hash-key-1mb' : hash_key, 22 | range_key: getRangeKey(i), 23 | name: 'hello', 24 | description: 'hello world', 25 | foo: 'hello world '.repeat(100), 26 | bar: 'hello world '.repeat(100), 27 | baz: 'hello world '.repeat(100), 28 | }); 29 | } 30 | 31 | await insertMany({ 32 | TableName: 'example_table', 33 | Items: listToInsert, 34 | }); 35 | }); 36 | 37 | it(`should return all elements using optimized find query for 10 MB table`, async () => { 38 | const result = await testQueryOptimized('some-hash-key'); 39 | 40 | expect(result).toHaveLength(3000); 41 | }); 42 | 43 | it(`should return all elements using regular find query for 10 MB table`, async () => { 44 | const result = await testQueryRegular('some-hash-key'); 45 | 46 | expect(result).toHaveLength(3000); 47 | }); 48 | 49 | it(`should return all elements using optimized find query for 1 MB table`, async () => { 50 | const result = await testQueryOptimized('some-hash-key-1mb'); 51 | 52 | expect(result).toHaveLength(250); 53 | }); 54 | 55 | it(`should return unmarshalled element for 1 MB table`, async () => { 56 | const result = await testQueryOptimized('some-hash-key-1mb'); 57 | 58 | expect(result[0]).toEqual({hash_key: 'some-hash-key-1mb', range_key: 'some-range-key-3000'}); 59 | }); 60 | 61 | it(`should return all elements using regular find query for 1 MB table`, async () => { 62 | const result = await testQueryRegular('some-hash-key-1mb'); 63 | 64 | expect(result).toHaveLength(250); 65 | }); 66 | 67 | afterAll(async () => { 68 | await deleteAll({ 69 | TableName: 'example_table', 70 | ProjectionExpression: 'hash_key, range_key', 71 | }); 72 | }); 73 | 74 | function testQueryRegular(hash_key: string) { 75 | return queryRegular({ 76 | QueryCommand: QueryCommand, 77 | client: ddb, 78 | queryParams: { 79 | TableName: 'example_table', 80 | ProjectionExpression: 'hash_key, range_key', 81 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)', 82 | ExpressionAttributeNames: { 83 | '#hash_key': 'hash_key', 84 | '#range_key': 'range_key', 85 | }, 86 | ExpressionAttributeValues: marshall({ 87 | ':hash_key': hash_key, 88 | ':range_key': range_key, 89 | }), 90 | }, 91 | }); 92 | } 93 | 94 | function testQueryOptimized(hash_key: string) { 95 | return queryOptimized({ 96 | QueryCommand: QueryCommand, 97 | client: ddb, 98 | queryParams: { 99 | TableName: 'example_table', 100 | ProjectionExpression: 'hash_key, range_key', 101 | KeyConditionExpression: '#hash_key = :hash_key AND begins_with(#range_key, :range_key)', 102 | ExpressionAttributeNames: { 103 | '#hash_key': 'hash_key', 104 | '#range_key': 'range_key', 105 | }, 106 | ExpressionAttributeValues: marshall({ 107 | ':hash_key': hash_key, 108 | ':range_key': range_key, 109 | }), 110 | }, 111 | }); 112 | } 113 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import {isEqual, uniqBy} from 'lodash'; 2 | import {unmarshall} from '@aws-sdk/util-dynamodb'; 3 | import type {AttributeValue, QueryCommandInput, QueryCommandOutput} from '@aws-sdk/client-dynamodb'; 4 | import type {DynamoDBClient} from '@aws-sdk/client-dynamodb'; 5 | import type {QueryCommand} from '@aws-sdk/client-dynamodb'; 6 | 7 | type QueryOptimizedParams = { 8 | client: DynamoDBClient; 9 | QueryCommand: typeof QueryCommand; 10 | queryParams: Omit; 11 | }; 12 | 13 | // 14 | // This method is optimized to query indices where a query might scan 2+ MB of data. 15 | // It works by launching 2 parallel queries that iterate from both ends of the index 16 | // until the meet in the middle 17 | // 18 | export async function queryOptimized>({ 19 | queryParams, 20 | QueryCommand, 21 | client, 22 | }: QueryOptimizedParams): Promise { 23 | let allItems: T[] = []; 24 | let allItemsFromLeftQuery: T[] = []; 25 | let allItemsFromRightQuery: T[] = []; 26 | 27 | let isMiddleReached = false; 28 | let queryLeftLastEvaluatedKey; 29 | let queryRightLastEvaluatedKey; 30 | let areBothQueriesExhausted = false; 31 | 32 | do { 33 | const responses = await Promise.all([ 34 | executeLeftQuery({client, queryParams, QueryCommand}, queryLeftLastEvaluatedKey), 35 | executeRightQuery({client, queryParams, QueryCommand}, queryRightLastEvaluatedKey), 36 | ]); 37 | 38 | const [respLeft, respRight] = responses as any; 39 | 40 | if (respLeft.LastEvaluatedKey) { 41 | queryLeftLastEvaluatedKey = respLeft.LastEvaluatedKey; 42 | } 43 | 44 | if (respRight.LastEvaluatedKey) { 45 | queryRightLastEvaluatedKey = respRight.LastEvaluatedKey; 46 | } 47 | 48 | // If both queries don't have a cursor to fetch the next item - stop iterating 49 | areBothQueriesExhausted = !queryLeftLastEvaluatedKey && !queryRightLastEvaluatedKey; 50 | 51 | if (!isMiddleReached) { 52 | isMiddleReached = checkIfMiddleReached(allItemsFromLeftQuery, allItemsFromRightQuery); 53 | } 54 | 55 | allItemsFromLeftQuery = allItemsFromLeftQuery.concat(respLeft.Items!); 56 | allItemsFromRightQuery = allItemsFromRightQuery.concat(respRight.Items!); 57 | 58 | allItems = allItems.concat(respLeft.Items!); 59 | allItems = allItems.concat(respRight.Items!); 60 | } while (!isMiddleReached && !areBothQueriesExhausted); 61 | 62 | return uniqBy(allItems, item => JSON.stringify(item)).map(item => unmarshall(item) as T); 63 | } 64 | 65 | export async function queryRegular>({ 66 | client, 67 | queryParams, 68 | QueryCommand, 69 | }: QueryOptimizedParams): Promise { 70 | let allItems: T[] = []; 71 | let lastEvaluatedKey; 72 | 73 | do { 74 | const resp: QueryCommandOutput = await executeLeftQuery( 75 | { 76 | client, 77 | queryParams, 78 | QueryCommand, 79 | }, 80 | lastEvaluatedKey 81 | ); 82 | 83 | if (resp.Items && resp.Items.length) { 84 | allItems = allItems.concat(resp.Items! as T[]); 85 | } 86 | 87 | lastEvaluatedKey = resp.LastEvaluatedKey; 88 | } while (lastEvaluatedKey); 89 | 90 | return allItems; 91 | } 92 | 93 | function executeLeftQuery( 94 | {client, queryParams, QueryCommand}: QueryOptimizedParams, 95 | key?: any 96 | ): Promise { 97 | return client.send( 98 | new QueryCommand({ 99 | ...queryParams, 100 | ...(key ? {ExclusiveStartKey: key} : {}), 101 | ScanIndexForward: true, 102 | }) 103 | ); 104 | } 105 | 106 | function executeRightQuery( 107 | {client, queryParams, QueryCommand}: QueryOptimizedParams, 108 | key?: any 109 | ): Promise { 110 | return client.send( 111 | new QueryCommand({ 112 | ...queryParams, 113 | ...(key ? {ExclusiveStartKey: key} : {}), 114 | ScanIndexForward: false, 115 | }) 116 | ); 117 | } 118 | 119 | function checkIfMiddleReached(allItemsFromLeftQuery: T[], allItemsFromRightQuery: T[]): boolean { 120 | return allItemsFromLeftQuery.some(leftItem => 121 | allItemsFromRightQuery.some(rightItem => isEqual(rightItem, leftItem)) 122 | ); 123 | } 124 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@shelf/tsconfig/backend", 3 | "compilerOptions": { 4 | "strict": true 5 | }, 6 | "exclude": ["node_modules"], 7 | "include": ["src"] 8 | } 9 | --------------------------------------------------------------------------------