├── .github ├── pull_request_template.md └── workflows │ ├── codeql.yml │ ├── golangci.yml │ └── unit_tests.yml ├── .gitignore ├── .golangci.yml ├── CHANGELOG.md ├── LICENSE ├── Makefile ├── README.md ├── codecov.yml ├── common ├── errors │ ├── errors.go │ └── errors_test.go ├── logger │ └── logger.go └── utils │ ├── bytes.go │ ├── bytes_test.go │ ├── path.go │ └── versions.go ├── config ├── config.go └── toml.go ├── go.mod ├── go.sum ├── proto ├── changelog.pb.go ├── commit_info.go ├── commit_info.pb.go └── memiavl │ ├── changelog.proto │ └── commit_info.proto ├── sc ├── memiavl │ ├── README.md │ ├── benchmark_test.go │ ├── db.go │ ├── db_test.go │ ├── export.go │ ├── filelock.go │ ├── import.go │ ├── iterator.go │ ├── iterator_test.go │ ├── layout_little_endian.go │ ├── layout_native.go │ ├── mem_node.go │ ├── mmap.go │ ├── multitree.go │ ├── node.go │ ├── opts.go │ ├── persisted_node.go │ ├── proof.go │ ├── proof_test.go │ ├── snapshot.go │ ├── snapshot_test.go │ ├── tree.go │ └── tree_test.go ├── store.go └── types │ ├── snapshot.go │ ├── store.go │ └── tree.go ├── ss ├── pebbledb │ ├── batch.go │ ├── bench_test.go │ ├── comparator.go │ ├── db.go │ ├── db_test.go │ ├── hash_test.go │ └── iterator.go ├── pebbledb_init.go ├── pruning │ └── manager.go ├── rocksdb │ ├── README.md │ ├── batch.go │ ├── bench_test.go │ ├── comparator.go │ ├── db.go │ ├── db_test.go │ ├── iterator.go │ └── opts.go ├── rocksdb_init.go ├── sqlite │ ├── batch.go │ ├── bench_test.go │ ├── db.go │ ├── db_test.go │ └── iterator.go ├── sqlite_init.go ├── store.go ├── store_test.go ├── test │ ├── storage_bench_suite.go │ ├── storage_test_suite.go │ └── utils.go ├── types │ └── store.go └── util │ ├── iterator.go │ ├── modules.go │ ├── types.go │ └── xor_hasher.go ├── stream ├── changelog │ ├── changelog.go │ ├── changelog_test.go │ ├── subscriber.go │ └── utils.go └── types │ └── types.go └── tools ├── Makefile ├── README.md ├── cmd └── seidb │ ├── benchmark │ ├── generate.go │ ├── iteration.go │ ├── random_read.go │ ├── reverse_iteration.go │ ├── types.go │ └── write.go │ ├── main.go │ └── operations │ ├── dump_db.go │ ├── dump_iavl.go │ ├── module.go │ ├── prune.go │ ├── replay_changelog.go │ └── state_size.go ├── dbbackend ├── backend.go └── benchmark.go └── utils └── utils.go /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Describe your changes and provide context 2 | 3 | ## Testing performed to validate your change 4 | 5 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: "CodeQL" 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - "**.go" 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - "**.go" 12 | 13 | jobs: 14 | analyze: 15 | name: Analyze 16 | runs-on: ubuntu-latest 17 | permissions: 18 | actions: read 19 | contents: read 20 | security-events: write 21 | 22 | steps: 23 | - name: Checkout repository 24 | uses: actions/checkout@v3 25 | - uses: actions/setup-go@v3 26 | with: 27 | go-version: '1.19' 28 | # Initializes the CodeQL tools for scanning. 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v2 31 | with: 32 | languages: "go" 33 | queries: crypto-com/cosmos-sdk-codeql@main,security-and-quality 34 | # If you wish to specify custom queries, you can do so here or in a config file. 35 | # By default, queries listed here will override any specified in a config file. 36 | # Prefix the list here with "+" to use these queries and those in the config file. 37 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 38 | 39 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 40 | # If this step fails, then you should remove it and run the build manually (see below) 41 | - name: Autobuild 42 | uses: github/codeql-action/autobuild@v2 43 | 44 | # ℹ️ Command-line programs to run using the OS shell. 45 | # 📚 https://git.io/JvXDl 46 | 47 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 48 | # and modify them (or add more) to build your code if your project 49 | # uses a compiled language 50 | 51 | #- run: | 52 | # make bootstrap 53 | # make release 54 | 55 | - name: Perform CodeQL Analysis 56 | uses: github/codeql-action/analyze@v2 -------------------------------------------------------------------------------- /.github/workflows/golangci.yml: -------------------------------------------------------------------------------- 1 | name: golangci-lint 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - main 8 | pull_request: 9 | permissions: 10 | contents: read 11 | # Optional: allow read access to pull request. Use with `only-new-issues` option. 12 | # pull-requests: read 13 | jobs: 14 | golangci: 15 | name: lint 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/setup-go@v3 19 | with: 20 | go-version: '1.19' 21 | - uses: actions/checkout@v3 22 | - uses: golangci/golangci-lint-action@v3 23 | with: 24 | version: v1.49 25 | args: --config=.golangci.yml --timeout=10m -------------------------------------------------------------------------------- /.github/workflows/unit_tests.yml: -------------------------------------------------------------------------------- 1 | name: Unit Test 2 | on: 3 | push: 4 | tags: 5 | - v* 6 | branches: 7 | - main 8 | pull_request: 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | tests: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/setup-go@v3 17 | with: 18 | go-version: '1.19' 19 | - uses: actions/checkout@v3 20 | - name: Run Go Tests 21 | run: | 22 | make test-all 23 | 24 | upload-coverage-report: 25 | needs: tests 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/checkout@v3 29 | - uses: actions/setup-go@v3 30 | with: 31 | go-version: '1.19' 32 | 33 | # Download all coverage reports from the 'tests' job 34 | - name: Download coverage reports 35 | uses: actions/download-artifact@v3 36 | 37 | - name: Set GOPATH 38 | run: echo "GOPATH=$(go env GOPATH)" >> $GITHUB_ENV 39 | 40 | - name: Add GOPATH/bin to PATH 41 | run: echo "GOBIN=$(go env GOPATH)/bin" >> $GITHUB_ENV 42 | 43 | - name: Install gocovmerge 44 | run: go get github.com/wadey/gocovmerge && go install github.com/wadey/gocovmerge 45 | 46 | - name: Merge coverage reports 47 | run: gocovmerge $(find . -type f -name '*profile.out') > coverage.txt 48 | 49 | - name: Check coverage report lines 50 | run: wc -l coverage.txt 51 | continue-on-error: true 52 | 53 | - name: Check coverage report files 54 | run: ls **/*profile.out 55 | continue-on-error: true 56 | 57 | # Now we upload the merged report to Codecov 58 | - name: Upload coverage to Codecov 59 | uses: codecov/codecov-action@v3 60 | with: 61 | file: ./coverage.txt 62 | token: ${{ secrets.CODECOV_TOKEN }} 63 | fail_ci_if_error: true 64 | 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # If you prefer the allow list template instead of the deny list, see community template: 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore 3 | # 4 | # Binaries for programs and plugins 5 | *.exe 6 | *.exe~ 7 | *.dll 8 | *.so 9 | *.dylib 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Dependency directories (remove the comment below to include it) 15 | # vendor/ 16 | 17 | # Other files 18 | .DS_Store 19 | *.swp 20 | *.swo 21 | *.swl 22 | *.swm 23 | *.swn 24 | *.pyc 25 | .dccache 26 | .idea -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | tests: false 3 | # # timeout for analysis, e.g. 30s, 5m, default is 1m 4 | timeout: 10m 5 | build-tags: 6 | - codeanalysis 7 | 8 | linters: 9 | disable-all: true 10 | enable: 11 | - bodyclose 12 | - dogsled 13 | - exportloopref 14 | - errcheck 15 | - goconst 16 | - gocritic 17 | - gofmt 18 | - goimports 19 | - gosec 20 | - gosimple 21 | - govet 22 | - ineffassign 23 | - misspell 24 | - nakedret 25 | - staticcheck 26 | # - structcheck ## author abandoned project 27 | - stylecheck 28 | - revive 29 | - typecheck 30 | - unconvert 31 | - unused 32 | - unparam 33 | - misspell 34 | # - nolintlint ## does not work with IDEs like VSCode which automatically insert leading spaces 35 | 36 | issues: 37 | exclude-rules: 38 | - text: "Use of weak random number generator" 39 | linters: 40 | - gosec 41 | - text: "ST1003:" 42 | linters: 43 | - stylecheck 44 | # FIXME: Disabled until golangci-lint updates stylecheck with this fix: 45 | # https://github.com/dominikh/go-tools/issues/389 46 | - text: "ST1016:" 47 | linters: 48 | - stylecheck -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sei-protocol/sei-db/a90aa3ade77e93ac53b10d0505f6f4180f233478/CHANGELOG.md -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | COMMIT := $(shell git log -1 --format='%H') 4 | BUILDDIR ?= $(CURDIR)/build 5 | INVARIANT_CHECK_INTERVAL ?= $(INVARIANT_CHECK_INTERVAL:-0) 6 | GOPATH ?= $(shell $(GO) env GOPATH) 7 | export PROJECT_HOME=$(shell git rev-parse --show-toplevel) 8 | export GO_PKG_PATH=$(HOME)/go/pkg 9 | export GO111MODULE = on 10 | 11 | # process build tags 12 | 13 | LEDGER_ENABLED ?= true 14 | build_tags = netgo 15 | ifeq ($(LEDGER_ENABLED),true) 16 | ifeq ($(OS),Windows_NT) 17 | GCCEXE = $(shell where gcc.exe 2> NUL) 18 | ifeq ($(GCCEXE),) 19 | $(error gcc.exe not installed for ledger support, please install or set LEDGER_ENABLED=false) 20 | else 21 | build_tags += ledger 22 | endif 23 | else 24 | UNAME_S = $(shell uname -s) 25 | ifeq ($(UNAME_S),OpenBSD) 26 | $(warning OpenBSD detected, disabling ledger support (https://github.com/cosmos/cosmos-sdk/issues/1988)) 27 | else 28 | GCC = $(shell command -v gcc 2> /dev/null) 29 | ifeq ($(GCC),) 30 | $(error gcc not installed for ledger support, please install or set LEDGER_ENABLED=false) 31 | else 32 | build_tags += ledger 33 | endif 34 | endif 35 | endif 36 | endif 37 | 38 | build_tags += $(BUILD_TAGS) 39 | build_tags := $(strip $(build_tags)) 40 | 41 | whitespace := 42 | whitespace += $(whitespace) 43 | comma := , 44 | build_tags_comma_sep := $(subst $(whitespace),$(comma),$(build_tags)) 45 | 46 | # process linker flags 47 | ldflags = "" 48 | 49 | ifeq ($(LINK_STATICALLY),true) 50 | ldflags += -linkmode=external -extldflags "-Wl,-z,muldefs -static" 51 | endif 52 | ldflags += $(LDFLAGS) 53 | ldflags := $(strip $(ldflags)) 54 | 55 | # BUILD_FLAGS := -tags "$(build_tags)" -ldflags '$(ldflags)' -race 56 | BUILD_FLAGS := -tags "$(build_tags)" -ldflags '$(ldflags)' 57 | 58 | 59 | test-all: 60 | go test -v -mod=readonly ./... -covermode=atomic 61 | 62 | lint-all: 63 | golangci-lint run --config=.golangci.yml 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SeiDB 2 | SeiDB is the next-gen on-chain database which is designed to replace the [IAVL Store](https://github.com/cosmos/iavl) of Cosmos based chain. 3 | The goal of SeiDB is to improve the overall data access performance and prevent state bloat issues. 4 | 5 | ## Key Wins of SeiDB 6 | - Reduces active chain state size by 60% 7 | - Reduces historical data growth rate by ~90% 8 | - Improves state sync times by 1200% and block sync time by 2x 9 | - Enables 287x improvement in block commit times 10 | - Provides faster state access and state commit resulting in overall TPS improved by 2x 11 | - All while ensuring Sei archive nodes are able to achieve the same high performance as any full node. 12 | 13 | ## Architecture 14 | The original idea of SeiDB came from [Cosmos StoreV2 ADR](https://docs.cosmos.network/main/build/architecture/adr-065-store-v2), in which the high level idea is that instead of 15 | using a single giant database to store both latest and historical data, SeiDb split into 2 separate storage layers: 16 | - State Commitment (SC Store): This stores the active chain state data in a memory mapped Merkle tree, providing fast transaction state access and Merkle hashing 17 | - State Store (SS Store): Specifically designed and tuned for full nodes and archive nodes to serve historical queries 18 | 19 | ### Advantages 20 | - SC and SS backends becomes be easily swappable 21 | - SS store only need to store raw key/values to save disk space and reduce write amplifications 22 | - The delineation of active state and historical data massively improves performance for all node operators in the Sei ecosystem. 23 | 24 | ### Trade-offs 25 | - Not supporting historical proofs for all historical blocks 26 | - Lacking integrity and correctness validation for historical data 27 | 28 | ## State Commitment (SC) Layer 29 | Responsibility of SC layer: 30 | - Provide root app hash for each new block 31 | - Provide data access layer for transaction execution 32 | - Provide API to import/export chain state for state sync requirements 33 | - Provide historical proofs for heights not pruned yet 34 | 35 | SeiDB currently forks [MemIAVL](https://github.com/crypto-org-chain/cronos/tree/main/memiavl) and uses that as its SC layer implementation. 36 | 37 | In order to keep backward compatible with existing Cosmos chains, MemIAVL uses the same data structure (Merkelized AVL tree) as Cosmos SDK. 38 | 39 | However, the biggest difference is that MemIAVL represent IAVL tree with memory-mapped flat files instead of persisting the whole tree as key/values in the database engine. 40 | 41 | ## State Store (SS) Layer 42 | The goal of SS store is to provide a modular storage backend which supports multiple implementations, 43 | to facilitate storing versioned raw key/value pairs in a fast embedded database. 44 | 45 | The responsibility and functions of SS include the following: 46 | - Provided fast and efficient queries for versioned raw key/value pairs 47 | - Provide versioned CRUD operations 48 | - Provide versioned batching functionality 49 | - Provide versioned iteration functionality 50 | - Provide pruning functionality 51 | 52 | ### DB Backend 53 | Extensive benchmarking was conducted with Sei chain key-value data measuring random write, read and forward / back iteration performance for LevelDB, RocksDB, PebbleDB, and SQLite. 54 | Our benchmarks shows that PebbleDB performs the best among these database backends, which is why SeiDB SS store use PebbleDB as the recommended default backend. 55 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | precision: 2 3 | round: down 4 | status: 5 | project: 6 | default: 7 | target: 60% 8 | threshold: 1% # allow this much decrease on project 9 | 10 | comment: 11 | layout: "reach,diff,flags,tree,betaprofiling" 12 | behavior: default # update if exists else create new 13 | require_changes: true 14 | 15 | ignore: 16 | - "*.md" 17 | - "*.rst" 18 | - "**/*pb*.go" 19 | - "tests/**/*" 20 | - "benchmark/" -------------------------------------------------------------------------------- /common/errors/errors.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | "strings" 6 | ) 7 | 8 | var ( 9 | ErrKeyEmpty = errors.New("key empty") 10 | ErrRecordNotFound = errors.New("record not found") 11 | ErrStartAfterEnd = errors.New("start key after end key") 12 | ErrorExportDone = errors.New("export is complete") 13 | ) 14 | 15 | // Join returns an error that wraps the given errors. 16 | // Any nil error values are discarded. 17 | // Join returns nil if errs contains no non-nil values. 18 | // The error formats as the concatenation of the strings obtained 19 | // by calling the Error method of each element of errs, with a newline 20 | // between each string. 21 | func Join(errs ...error) error { 22 | var errStrs []string 23 | numErrs := 0 24 | for _, err := range errs { 25 | if err != nil { 26 | numErrs++ 27 | if err.Error() != "" { 28 | errStrs = append(errStrs, err.Error()) 29 | } 30 | } 31 | } 32 | 33 | if numErrs <= 0 { 34 | return nil 35 | } 36 | 37 | return errors.New(strings.Join(errStrs, "\n")) 38 | 39 | } 40 | -------------------------------------------------------------------------------- /common/errors/errors_test.go: -------------------------------------------------------------------------------- 1 | package errors 2 | 3 | import ( 4 | "errors" 5 | "testing" 6 | ) 7 | 8 | func TestJoinReturnsNil(t *testing.T) { 9 | if err := Join(); err != nil { 10 | t.Errorf("errors.Join() = %v, want nil", err) 11 | } 12 | if err := Join(nil); err != nil { 13 | t.Errorf("errors.Join(nil) = %v, want nil", err) 14 | } 15 | if err := Join(nil, nil); err != nil { 16 | t.Errorf("errors.Join(nil, nil) = %v, want nil", err) 17 | } 18 | } 19 | 20 | func TestJoinError(t *testing.T) { 21 | err1 := errors.New("err1") 22 | err2 := errors.New("err2") 23 | for _, test := range []struct { 24 | errs []error 25 | want string 26 | }{{ 27 | errs: []error{err1}, 28 | want: "err1", 29 | }, { 30 | errs: []error{err1, err2}, 31 | want: "err1\nerr2", 32 | }, { 33 | errs: []error{err1, nil, err2}, 34 | want: "err1\nerr2", 35 | }} { 36 | got := Join(test.errs...).Error() 37 | if got != test.want { 38 | t.Errorf("Join(%v).Error() = %q; want %q", test.errs, got, test.want) 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /common/logger/logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | // Logger is what any CometBFT library should take. 4 | type Logger interface { 5 | Debug(msg string, keyvals ...interface{}) 6 | Info(msg string, keyvals ...interface{}) 7 | Error(msg string, keyvals ...interface{}) 8 | } 9 | 10 | type nopLogger struct{} 11 | 12 | // Interface assertions 13 | var _ Logger = (*nopLogger)(nil) 14 | 15 | // NewNopLogger returns a logger that doesn't do anything. 16 | func NewNopLogger() Logger { return &nopLogger{} } 17 | 18 | func (nopLogger) Info(string, ...interface{}) {} 19 | func (nopLogger) Debug(string, ...interface{}) {} 20 | func (nopLogger) Error(string, ...interface{}) {} 21 | -------------------------------------------------------------------------------- /common/utils/bytes.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | // Clone returns a copy of b[:len(b)]. 4 | // The result may have additional unused capacity. 5 | // Clone(nil) returns nil. 6 | func Clone(b []byte) []byte { 7 | if b == nil { 8 | return nil 9 | } 10 | return append([]byte{}, b...) 11 | } 12 | 13 | // Equal reports whether a and b 14 | // are the same length and contain the same bytes. 15 | // A nil argument is equivalent to an empty slice. 16 | func Equal(a, b []byte) bool { 17 | // Neither cmd/compile nor gccgo allocates for these string conversions. 18 | return string(a) == string(b) 19 | } 20 | -------------------------------------------------------------------------------- /common/utils/bytes_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestClone(t *testing.T) { 9 | var cloneTests = [][]byte{ 10 | []byte(nil), 11 | Clone([]byte{}), 12 | []byte(strings.Repeat("a", 42))[:0], 13 | []byte(strings.Repeat("a", 42))[:0:0], 14 | []byte("short"), 15 | []byte(strings.Repeat("a", 42)), 16 | } 17 | for _, input := range cloneTests { 18 | clone := Clone(input) 19 | if !Equal(clone, input) { 20 | t.Errorf("Clone(%q) = %q; want %q", input, clone, input) 21 | } 22 | 23 | if input == nil && clone != nil { 24 | t.Errorf("Clone(%#v) return value should be equal to nil slice.", input) 25 | } 26 | 27 | if input != nil && clone == nil { 28 | t.Errorf("Clone(%#v) return value should not be equal to nil slice.", input) 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /common/utils/path.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "path/filepath" 4 | 5 | func GetCommitStorePath(homePath string) string { 6 | return filepath.Join(homePath, "data", "committer.db") 7 | } 8 | 9 | func GetStateStorePath(homePath string, backend string) string { 10 | return filepath.Join(homePath, "data", backend) 11 | } 12 | 13 | func GetChangelogPath(dbPath string) string { 14 | return filepath.Join(dbPath, "changelog") 15 | } 16 | -------------------------------------------------------------------------------- /common/utils/versions.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | // NextVersion get the next version 4 | func NextVersion(v int64, initialVersion uint32) int64 { 5 | if v == 0 && initialVersion > 1 { 6 | return int64(initialVersion) 7 | } 8 | return v + 1 9 | } 10 | 11 | // VersionToIndex converts version to rlog index based on initial version 12 | func VersionToIndex(version int64, initialVersion uint32) uint64 { 13 | if initialVersion > 1 { 14 | return uint64(version) - uint64(initialVersion) + 1 15 | } 16 | return uint64(version) 17 | } 18 | 19 | // IndexToVersion converts rlog index to version, reverse of versionToIndex 20 | func IndexToVersion(index uint64, initialVersion uint32) int64 { 21 | if initialVersion > 1 { 22 | return int64(index) + int64(initialVersion) - 1 23 | } 24 | return int64(index) 25 | } 26 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | const ( 4 | DefaultSnapshotInterval = 10000 5 | DefaultSnapshotKeepRecent = 1 6 | DefaultSnapshotWriterLimit = 1 7 | DefaultAsyncCommitBuffer = 100 8 | DefaultCacheSize = 100000 9 | DefaultSSKeepRecent = 100000 10 | DefaultSSPruneInterval = 600 11 | DefaultSSImportWorkers = 1 12 | DefaultSSAsyncBuffer = 100 13 | DefaultSSHashRange = 1000000 14 | ) 15 | 16 | type StateCommitConfig struct { 17 | // Enable defines if the state-commit should be enabled. 18 | // If true, it will replace the existing IAVL db backend with memIAVL. 19 | // defaults to false. 20 | Enable bool `mapstructure:"enable"` 21 | 22 | // Directory defines the state-commit store directory 23 | // If not explicitly set, default to application home directory 24 | Directory string `mapstructure:"directory"` 25 | 26 | // ZeroCopy defines if the memiavl should return slices pointing to mmap-ed buffers directly (zero-copy), 27 | // the zero-copied slices must not be retained beyond current block's execution. 28 | // the sdk address cache will be disabled if zero-copy is enabled. 29 | // defaults to false. 30 | ZeroCopy bool `mapstructure:"zero-copy"` 31 | 32 | // AsyncCommitBuffer defines the size of asynchronous commit queue 33 | // this greatly improve block catching-up performance, <= 0 means synchronous commit. 34 | // defaults to 100 35 | AsyncCommitBuffer int `mapstructure:"async-commit-buffer"` 36 | 37 | // SnapshotKeepRecent defines what many old snapshots (excluding the latest one) to keep 38 | // defaults to 1 to make sure ibc relayers work. 39 | SnapshotKeepRecent uint32 `mapstructure:"snapshot-keep-recent"` 40 | 41 | // SnapshotInterval defines the block interval the memiavl snapshot is taken, default to 10000. 42 | SnapshotInterval uint32 `mapstructure:"snapshot-interval"` 43 | 44 | // SnapshotWriterLimit defines the concurrency for taking commit store snapshot 45 | SnapshotWriterLimit int `mapstructure:"snapshot-writer-limit"` 46 | 47 | // CacheSize defines the size of the cache for each memiavl store. 48 | // Deprecated: this is removed, we will just rely on mmap page cache 49 | CacheSize int `mapstructure:"cache-size"` 50 | } 51 | 52 | type StateStoreConfig struct { 53 | 54 | // Enable defines if the state-store should be enabled for historical queries. 55 | Enable bool `mapstructure:"enable"` 56 | 57 | // DBDirectory defines the directory to store the state store db files 58 | // If not explicitly set, default to application home directory 59 | // default to empty 60 | DBDirectory string `mapstructure:"db-directory"` 61 | 62 | // DedicatedChangelog defines if we should use a separate changelog for SS store other than sharing with SC 63 | DedicatedChangelog bool `mapstructure:"dedicated-changelog"` 64 | 65 | // Backend defines the backend database used for state-store 66 | // Supported backends: pebbledb, rocksdb 67 | // defaults to pebbledb 68 | Backend string `mapstructure:"backend"` 69 | 70 | // AsyncWriteBuffer defines the async queue length for commits to be applied to State Store 71 | // Set <= 0 for synchronous writes, which means commits also need to wait for data to be persisted in State Store. 72 | // defaults to 100 73 | AsyncWriteBuffer int `mapstructure:"async-write-buffer"` 74 | 75 | // KeepRecent defines the number of versions to keep in state store 76 | // Setting it to 0 means keep everything. 77 | // Default to keep the last 100,000 blocks 78 | KeepRecent int `mapstructure:"keep-recent"` 79 | 80 | // PruneIntervalSeconds defines the interval in seconds to trigger pruning 81 | // default to every 600 seconds 82 | PruneIntervalSeconds int `mapstructure:"prune-interval-seconds"` 83 | 84 | // ImportNumWorkers defines the number of goroutines used during import 85 | // defaults to 1 86 | ImportNumWorkers int `mapstructure:"import-num-workers"` 87 | 88 | // Whether to keep last version of a key during pruning or delete 89 | // defaults to true 90 | KeepLastVersion bool `mapstructure:"keep-last-version"` 91 | 92 | // Range of blocks after which a XOR hash is computed and stored 93 | // defaults to 1,000,000 blocks 94 | HashRange int64 `json:"hash_range"` 95 | } 96 | 97 | func DefaultStateCommitConfig() StateCommitConfig { 98 | return StateCommitConfig{ 99 | AsyncCommitBuffer: DefaultAsyncCommitBuffer, 100 | CacheSize: DefaultCacheSize, 101 | SnapshotInterval: DefaultSnapshotInterval, 102 | SnapshotKeepRecent: DefaultSnapshotKeepRecent, 103 | } 104 | } 105 | 106 | func DefaultStateStoreConfig() StateStoreConfig { 107 | return StateStoreConfig{ 108 | Backend: "pebbledb", 109 | AsyncWriteBuffer: DefaultSSAsyncBuffer, 110 | KeepRecent: DefaultSSKeepRecent, 111 | PruneIntervalSeconds: DefaultSSPruneInterval, 112 | ImportNumWorkers: DefaultSSImportWorkers, 113 | KeepLastVersion: true, 114 | HashRange: DefaultSSHashRange, 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /config/toml.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | // DefaultConfigTemplate defines the configuration template for the seiDB configuration 4 | const DefaultConfigTemplate = ` 5 | ############################################################################# 6 | ### SeiDB Configuration ### 7 | ############################################################################# 8 | 9 | [state-commit] 10 | # Enable defines if the SeiDB should be enabled to override existing IAVL db backend. 11 | sc-enable = {{ .StateCommit.Enable }} 12 | 13 | # Defines the SC store directory, if not explicitly set, default to application home directory 14 | sc-directory = "{{ .StateCommit.Directory }}" 15 | 16 | # ZeroCopy defines if memiavl should return slices pointing to mmap-ed buffers directly (zero-copy), 17 | # the zero-copied slices must not be retained beyond current block's execution. 18 | # the sdk address cache will be disabled if zero-copy is enabled. 19 | sc-zero-copy = {{ .StateCommit.ZeroCopy }} 20 | 21 | # AsyncCommitBuffer defines the size of asynchronous commit queue, this greatly improve block catching-up 22 | # performance, setting to 0 means synchronous commit. 23 | sc-async-commit-buffer = {{ .StateCommit.AsyncCommitBuffer }} 24 | 25 | # KeepRecent defines how many state-commit snapshots (besides the latest one) to keep 26 | # defaults to 1 to make sure ibc relayers work. 27 | sc-keep-recent = {{ .StateCommit.SnapshotKeepRecent }} 28 | 29 | # SnapshotInterval defines the block interval the snapshot is taken, default to 10000 blocks. 30 | sc-snapshot-interval = {{ .StateCommit.SnapshotInterval }} 31 | 32 | # SnapshotWriterLimit defines the max concurrency for taking commit store snapshot 33 | sc-snapshot-writer-limit = {{ .StateCommit.SnapshotWriterLimit }} 34 | 35 | [state-store] 36 | # Enable defines whether the state-store should be enabled for storing historical data. 37 | # Supporting historical queries or exporting state snapshot requires setting this to true 38 | # This config only take effect when SeiDB is enabled (sc-enable = true) 39 | ss-enable = {{ .StateStore.Enable }} 40 | 41 | # Defines the directory to store the state store db files 42 | # If not explicitly set, default to application home directory 43 | ss-db-directory = "{{ .StateStore.DBDirectory }}" 44 | 45 | # DBBackend defines the backend database used for state-store. 46 | # Supported backends: pebbledb, rocksdb 47 | # defaults to pebbledb (recommended) 48 | ss-backend = "{{ .StateStore.Backend }}" 49 | 50 | # AsyncWriteBuffer defines the async queue length for commits to be applied to State Store 51 | # Set <= 0 for synchronous writes, which means commits also need to wait for data to be persisted in State Store. 52 | # defaults to 100 for asynchronous writes 53 | ss-async-write-buffer = {{ .StateStore.AsyncWriteBuffer }} 54 | 55 | # KeepRecent defines the number of versions to keep in state store 56 | # Setting it to 0 means keep everything 57 | # Default to keep the last 100,000 blocks 58 | ss-keep-recent = {{ .StateStore.KeepRecent }} 59 | 60 | # PruneInterval defines the minimum interval in seconds + some random delay to trigger SS pruning. 61 | # It is recommended to trigger pruning less frequently with a large interval. 62 | # default to 600 seconds 63 | ss-prune-interval = {{ .StateStore.PruneIntervalSeconds }} 64 | 65 | # ImportNumWorkers defines the concurrency for state sync import 66 | # defaults to 1 67 | ss-import-num-workers = {{ .StateStore.ImportNumWorkers }} 68 | 69 | # HashRange defines the range of blocks after which a XOR hash is computed and stored 70 | # defaults to 1,000,000 blocks 71 | ss-hash-range = {{ .StateStore.HashRange }} 72 | 73 | ` 74 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/sei-protocol/sei-db 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/alitto/pond v1.8.3 7 | github.com/armon/go-metrics v0.4.1 8 | github.com/cockroachdb/pebble v0.0.0-20230819001538-1798fbf5956c 9 | github.com/confio/ics23/go v0.9.0 10 | github.com/cosmos/iavl v0.21.0-alpha.1.0.20230904092046-df3db2d96583 11 | github.com/gogo/protobuf v1.3.3 12 | github.com/ledgerwatch/erigon-lib v0.0.0-20230210071639-db0e7ed11263 13 | github.com/linxGnu/grocksdb v1.8.4 14 | github.com/spf13/cobra v1.6.1 15 | github.com/stretchr/testify v1.8.4 16 | github.com/tendermint/tm-db v0.6.8-0.20220519162814-e24b96538a12 17 | github.com/tidwall/btree v1.6.0 18 | github.com/tidwall/gjson v1.10.2 19 | github.com/tidwall/wal v1.1.7 20 | github.com/zbiljic/go-filelock v0.0.0-20170914061330-1dbf7103ab7d 21 | golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb 22 | modernc.org/sqlite v1.26.0 23 | ) 24 | 25 | require ( 26 | github.com/DataDog/zstd v1.4.5 // indirect 27 | github.com/beorn7/perks v1.0.1 // indirect 28 | github.com/cespare/xxhash v1.1.0 // indirect 29 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 30 | github.com/cockroachdb/errors v1.8.1 // indirect 31 | github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f // indirect 32 | github.com/cockroachdb/redact v1.0.8 // indirect 33 | github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 // indirect 34 | github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect 35 | github.com/cosmos/gorocksdb v1.2.0 // indirect 36 | github.com/davecgh/go-spew v1.1.1 // indirect 37 | github.com/dgraph-io/badger/v3 v3.2103.2 // indirect 38 | github.com/dgraph-io/ristretto v0.1.1 // indirect 39 | github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 // indirect 40 | github.com/dustin/go-humanize v1.0.1 // indirect 41 | github.com/fsnotify/fsnotify v1.6.0 // indirect 42 | github.com/golang/glog v1.1.0 // indirect 43 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 44 | github.com/golang/protobuf v1.5.3 // indirect 45 | github.com/golang/snappy v0.0.4 // indirect 46 | github.com/google/btree v1.1.2 // indirect 47 | github.com/google/flatbuffers v1.12.1 // indirect 48 | github.com/google/uuid v1.3.0 // indirect 49 | github.com/hashicorp/go-immutable-radix v1.3.1 // indirect 50 | github.com/hashicorp/golang-lru v0.5.5-0.20210104140557-80c98217689d // indirect 51 | github.com/inconshreveable/mousetrap v1.0.1 // indirect 52 | github.com/jmhodges/levigo v1.0.0 // indirect 53 | github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 // indirect 54 | github.com/klauspost/compress v1.16.3 // indirect 55 | github.com/kr/pretty v0.3.1 // indirect 56 | github.com/kr/text v0.2.0 // indirect 57 | github.com/mattn/go-isatty v0.0.19 // indirect 58 | github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect 59 | github.com/onsi/gomega v1.20.0 // indirect 60 | github.com/petermattis/goid v0.0.0-20230317030725-371a4b8eda08 // indirect 61 | github.com/pkg/errors v0.9.1 // indirect 62 | github.com/pmezard/go-difflib v1.0.0 // indirect 63 | github.com/prometheus/client_golang v1.14.0 // indirect 64 | github.com/prometheus/client_model v0.3.0 // indirect 65 | github.com/prometheus/common v0.42.0 // indirect 66 | github.com/prometheus/procfs v0.9.0 // indirect 67 | github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect 68 | github.com/rogpeppe/go-internal v1.11.0 // indirect 69 | github.com/spf13/pflag v1.0.5 // indirect 70 | github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d // indirect 71 | github.com/tendermint/tendermint v0.34.20 // indirect 72 | github.com/tidwall/match v1.1.1 // indirect 73 | github.com/tidwall/pretty v1.2.0 // indirect 74 | github.com/tidwall/tinylru v1.1.0 // indirect 75 | go.etcd.io/bbolt v1.3.7 // indirect 76 | go.opencensus.io v0.23.0 // indirect 77 | golang.org/x/crypto v0.14.0 // indirect 78 | golang.org/x/mod v0.11.0 // indirect 79 | golang.org/x/net v0.17.0 // indirect 80 | golang.org/x/sync v0.3.0 // indirect 81 | golang.org/x/sys v0.13.0 // indirect 82 | golang.org/x/tools v0.6.0 // indirect 83 | google.golang.org/protobuf v1.31.0 // indirect 84 | gopkg.in/yaml.v3 v3.0.1 // indirect 85 | lukechampine.com/uint128 v1.2.0 // indirect 86 | modernc.org/cc/v3 v3.40.0 // indirect 87 | modernc.org/ccgo/v3 v3.16.13 // indirect 88 | modernc.org/libc v1.24.1 // indirect 89 | modernc.org/mathutil v1.5.0 // indirect 90 | modernc.org/memory v1.6.0 // indirect 91 | modernc.org/opt v0.1.3 // indirect 92 | modernc.org/strutil v1.1.3 // indirect 93 | modernc.org/token v1.0.1 // indirect 94 | ) 95 | 96 | replace ( 97 | github.com/cosmos/iavl => github.com/sei-protocol/sei-iavl v0.1.8-0.20230726213826-031d03d26f2d 98 | github.com/gogo/protobuf => github.com/regen-network/protobuf v1.3.3-alpha.regen.1 99 | github.com/tendermint/tm-db => github.com/sei-protocol/tm-db v0.0.4 100 | ) 101 | -------------------------------------------------------------------------------- /proto/commit_info.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import "fmt" 4 | 5 | func (cid CommitID) String() string { 6 | return fmt.Sprintf("CommitID{%v:%X}", cid.Hash, cid.Version) 7 | } 8 | -------------------------------------------------------------------------------- /proto/memiavl/changelog.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package memiavl; 3 | 4 | import "gogoproto/gogo.proto"; 5 | import "iavl/changeset.proto"; 6 | import "memiavl/commit_info.proto"; 7 | 8 | // NamedChangeSet combine a tree name with the changeset 9 | message NamedChangeSet { 10 | iavl.ChangeSet changeset = 1 [(gogoproto.nullable) = false]; 11 | string name = 2; 12 | } 13 | 14 | // TreeNameUpgrade defines upgrade of tree names: 15 | // - New tree: { name: "tree" } 16 | // - Delete tree: { name: "tree", delete: true } 17 | // - Rename tree: { name: "new-tree", rename_from: "old-tree" } 18 | message TreeNameUpgrade { 19 | string name = 1; 20 | string rename_from = 2; 21 | bool delete = 3; 22 | } 23 | 24 | // MultiTreeMetadata stores the metadata for MultiTree 25 | message MultiTreeMetadata { 26 | CommitInfo commit_info = 1; 27 | int64 initial_version = 2; 28 | } 29 | 30 | // ChangelogEntry is a single entry in the changelog 31 | message ChangelogEntry { 32 | int64 version = 1; 33 | repeated NamedChangeSet changesets = 2; 34 | repeated TreeNameUpgrade upgrades = 3; 35 | } 36 | 37 | 38 | -------------------------------------------------------------------------------- /proto/memiavl/commit_info.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package memiavl; 3 | 4 | import "gogoproto/gogo.proto"; 5 | 6 | // CommitInfo defines commit information used by the multi-store when committing 7 | // a version/height. 8 | message CommitInfo { 9 | int64 version = 1; 10 | repeated StoreInfo store_infos = 2 [(gogoproto.nullable) = false]; 11 | } 12 | 13 | // StoreInfo defines store-specific commit information. It contains a reference 14 | // between a store name and the commit ID. 15 | message StoreInfo { 16 | string name = 1; 17 | CommitID commit_id = 2 [(gogoproto.nullable) = false]; 18 | } 19 | 20 | // CommitID defines the commitment information when a specific store is 21 | // committed. 22 | message CommitID { 23 | option (gogoproto.goproto_stringer) = false; 24 | 25 | int64 version = 1; 26 | bytes hash = 2; 27 | } 28 | -------------------------------------------------------------------------------- /sc/memiavl/README.md: -------------------------------------------------------------------------------- 1 | # MemIAVL 2 | 3 | ## Changelog 4 | * Oct 11 2023: 5 | * Forked from Cronos MemIAVL(https://github.com/crypto-org-chain/cronos/tree/v1.1.0-rc4/memiavl) 6 | 7 | ## The Design 8 | The idea of MemIAVL is to keep the whole chain state in memory as much as possible to speed up reads and writes. 9 | - MemIAVL uses a write-ahead-log(WAL) to persist the changeset from transaction commit to speed up writes. 10 | - Instead of updating and flushing nodes to disk, state changes at every height are actually only written to WAL file 11 | - MemIAVL snapshots are taken periodically and written to disk to materialize the tree at some given height H 12 | - Each snapshot is composed of 3 files per module, one for key/value pairs, one for leaf nodes and one for branch nodes 13 | - After snapshot is taken, the snapshot files are then loaded with mmap for faster reads and lazy loading via page cache. At the same time, older WAL files will be truncated till the snapshot height 14 | - Each MemIAVL tree is composed of 2 types of node: MemNode and Persistent Node 15 | - All nodes are persistent nodes to start with. Each persistent node maps to some data stored on file 16 | - During updates or insertion, persistent nodes will turn into MemNode 17 | - MemNodes are nodes stores only in memory for all future read and writes 18 | - If a node crash in the middle of commit, it will be able to load from the last snapshot and replay the WAL file to catch up to the last committed height 19 | 20 | ### Advantages 21 | - Better write amplification, we only need to write the change sets in real time which is much more compact than IAVL nodes, IAVL snapshot can be created in much lower frequency. 22 | - Better read amplification, the IAVL snapshot is a plain file, the nodes are referenced with offset, the read amplification is simply 1. 23 | - Better space amplification, the archived change sets are much more compact than current IAVL tree, in our test case, the ratio could be as large as 1:100. We don't need to keep too old IAVL snapshots, because versiondb will handle the historical key-value queries, IAVL tree only takes care of merkle proof generations for blocks within an unbonding period. In very rare cases that do need IAVL tree of very old version, you can always replay the change sets from the genesis. 24 | - Facilitate async commit which improves commit latency by huge amount 25 | 26 | ### Trade-offs 27 | - Performance can degrade when state size grows much larger than memory 28 | - MemIAVL makes historical proof much slower 29 | - Periodic snapshot creation is a very heavy operation and could become a bottleneck 30 | 31 | ### IAVL Snapshot 32 | 33 | IAVL snapshot is composed by four files: 34 | 35 | - `metadata`, 16bytes: 36 | 37 | ``` 38 | magic: 4 39 | format: 4 40 | version: 4 41 | root node index: 4 42 | ``` 43 | 44 | - `nodes`, array of fixed size(16+32bytes) nodes, the node format is like this: 45 | 46 | ``` 47 | # branch 48 | height : 1 49 | _padding : 3 50 | version : 4 51 | size : 4 52 | key node : 4 53 | hash : [32]byte 54 | 55 | # leaf 56 | height : 1 57 | _padding : 3 58 | version : 4 59 | key offset : 8 60 | hash : [32]byte 61 | ``` 62 | The node has fixed length, can be indexed directly. The nodes references each other with the node index, nodes are written with post-order depth-first traversal, so the root node is always placed at the end. 63 | 64 | For branch node, the `key node` field reference the smallest leaf node in the right branch, the key slice is fetched from there indirectly, the leaf nodes stores the `offset` into the `kvs` file, where the key and value slices can be built. 65 | 66 | The branch node's left/child node indexes are inferenced from existing information and properties of post-order traversal: 67 | 68 | ``` 69 | right child index = self index - 1 70 | left child index = key node - 1 71 | ``` 72 | 73 | The version/size/node indexes are encoded with 4 bytes, should be enough in foreseeable future, but could be changed to more bytes in the future. 74 | 75 | The implementation will read the mmap-ed content in a zero-copy way, won't use extra node cache, it will only rely on the OS page cache. 76 | 77 | - `kvs`, sequence of leaf node key-value pairs, the keys are ordered and no duplication. 78 | 79 | ``` 80 | keyLen: varint-uint64 81 | key 82 | valueLen: varint-uint64 83 | value 84 | *repeat* 85 | ``` 86 | -------------------------------------------------------------------------------- /sc/memiavl/benchmark_test.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/binary" 7 | "math/rand" 8 | "sort" 9 | "testing" 10 | 11 | iavlcache "github.com/cosmos/iavl/cache" 12 | "github.com/stretchr/testify/require" 13 | "github.com/tidwall/btree" 14 | ) 15 | 16 | func BenchmarkByteCompare(b *testing.B) { 17 | var x, y [32]byte 18 | for i := 0; i < b.N; i++ { 19 | _ = bytes.Compare(x[:], y[:]) 20 | } 21 | } 22 | 23 | func BenchmarkRandomGet(b *testing.B) { 24 | amount := 1000000 25 | items := genRandItems(amount) 26 | targetKey := items[500].key 27 | targetValue := items[500].value 28 | targetItem := itemT{key: targetKey} 29 | 30 | tree := New(0) 31 | for _, item := range items { 32 | tree.Set(item.key, item.value) 33 | } 34 | 35 | snapshotDir := b.TempDir() 36 | err := tree.WriteSnapshot(context.Background(), snapshotDir) 37 | require.NoError(b, err) 38 | snapshot, err := OpenSnapshot(snapshotDir) 39 | require.NoError(b, err) 40 | defer snapshot.Close() 41 | 42 | b.Run("memiavl", func(b *testing.B) { 43 | require.Equal(b, targetValue, tree.Get(targetKey)) 44 | 45 | b.ResetTimer() 46 | for i := 0; i < b.N; i++ { 47 | _ = tree.Get(targetKey) 48 | } 49 | }) 50 | b.Run("memiavl-disk", func(b *testing.B) { 51 | diskTree := NewFromSnapshot(snapshot, true, 0) 52 | require.Equal(b, targetValue, diskTree.Get(targetKey)) 53 | 54 | b.ResetTimer() 55 | for i := 0; i < b.N; i++ { 56 | _ = diskTree.Get(targetKey) 57 | } 58 | }) 59 | b.Run("memiavl-disk-cache-hit", func(b *testing.B) { 60 | diskTree := NewFromSnapshot(snapshot, true, 1) 61 | require.Equal(b, targetValue, diskTree.Get(targetKey)) 62 | 63 | b.ResetTimer() 64 | for i := 0; i < b.N; i++ { 65 | _ = diskTree.Get(targetKey) 66 | } 67 | }) 68 | b.Run("memiavl-disk-cache-miss", func(b *testing.B) { 69 | diskTree := NewFromSnapshot(snapshot, true, 0) 70 | // enforce an empty cache to emulate cache miss 71 | require.Equal(b, targetValue, diskTree.Get(targetKey)) 72 | 73 | b.ResetTimer() 74 | for i := 0; i < b.N; i++ { 75 | _ = diskTree.Get(targetKey) 76 | } 77 | }) 78 | b.Run("btree-degree-2", func(b *testing.B) { 79 | bt2 := btree.NewBTreeGOptions(lessG, btree.Options{ 80 | NoLocks: true, 81 | Degree: 2, 82 | }) 83 | for _, item := range items { 84 | bt2.Set(item) 85 | } 86 | v, _ := bt2.Get(targetItem) 87 | require.Equal(b, targetValue, v.value) 88 | 89 | b.ResetTimer() 90 | for i := 0; i < b.N; i++ { 91 | _, _ = bt2.Get(targetItem) 92 | } 93 | }) 94 | b.Run("btree-degree-32", func(b *testing.B) { 95 | bt32 := btree.NewBTreeGOptions(lessG, btree.Options{ 96 | NoLocks: true, 97 | Degree: 32, 98 | }) 99 | for _, item := range items { 100 | bt32.Set(item) 101 | } 102 | v, _ := bt32.Get(targetItem) 103 | require.Equal(b, targetValue, v.value) 104 | 105 | b.ResetTimer() 106 | for i := 0; i < b.N; i++ { 107 | _, _ = bt32.Get(targetItem) 108 | } 109 | }) 110 | b.Run("iavl-lru", func(b *testing.B) { 111 | cache := iavlcache.New(amount) 112 | for _, item := range items { 113 | cache.Add(NewIavlCacheNode(item.key, item.value)) 114 | } 115 | v := cache.Get(targetItem.key).(iavlCacheNode).value 116 | require.Equal(b, targetValue, v) 117 | 118 | b.ResetTimer() 119 | for i := 0; i < b.N; i++ { 120 | _ = cache.Get(targetKey).(iavlCacheNode).value 121 | } 122 | }) 123 | b.Run("go-map", func(b *testing.B) { 124 | m := make(map[string][]byte, amount) 125 | for _, item := range items { 126 | m[string(item.key)] = item.value 127 | } 128 | v := m[string(targetItem.key)] 129 | require.Equal(b, targetValue, v) 130 | 131 | b.ResetTimer() 132 | for i := 0; i < b.N; i++ { 133 | _ = m[string(targetKey)] 134 | } 135 | }) 136 | 137 | b.Run("binary-search", func(b *testing.B) { 138 | // the last benchmark sort the items in place 139 | sort.Slice(items, func(i, j int) bool { 140 | return bytes.Compare(items[i].key, items[j].key) < 0 141 | }) 142 | cmp := func(i int) bool { return bytes.Compare(items[i].key, targetKey) != -1 } 143 | i := sort.Search(len(items), cmp) 144 | require.Equal(b, targetValue, items[i].value) 145 | 146 | b.ResetTimer() 147 | for i := 0; i < b.N; i++ { 148 | n := sort.Search(len(items), cmp) 149 | _ = items[n].value 150 | } 151 | }) 152 | } 153 | 154 | func BenchmarkRandomSet(b *testing.B) { 155 | items := genRandItems(1000000) 156 | b.ResetTimer() 157 | b.Run("memiavl", func(b *testing.B) { 158 | for i := 0; i < b.N; i++ { 159 | tree := New(0) 160 | for _, item := range items { 161 | tree.Set(item.key, item.value) 162 | } 163 | } 164 | }) 165 | b.Run("tree2", func(b *testing.B) { 166 | for i := 0; i < b.N; i++ { 167 | bt := btree.NewBTreeGOptions(lessG, btree.Options{ 168 | NoLocks: true, 169 | Degree: 2, 170 | }) 171 | for _, item := range items { 172 | bt.Set(item) 173 | } 174 | } 175 | }) 176 | b.Run("tree32", func(b *testing.B) { 177 | for i := 0; i < b.N; i++ { 178 | bt := btree.NewBTreeGOptions(lessG, btree.Options{ 179 | NoLocks: true, 180 | Degree: 32, 181 | }) 182 | for _, item := range items { 183 | bt.Set(item) 184 | } 185 | } 186 | }) 187 | } 188 | 189 | type itemT struct { 190 | key, value []byte 191 | } 192 | 193 | func lessG(a, b itemT) bool { 194 | return bytes.Compare(a.key, b.key) == -1 195 | } 196 | 197 | func int64ToItemT(n uint64) itemT { 198 | var key, value [8]byte 199 | binary.BigEndian.PutUint64(key[:], n) 200 | binary.LittleEndian.PutUint64(value[:], n) 201 | return itemT{ 202 | key: key[:], 203 | value: value[:], 204 | } 205 | } 206 | 207 | func genRandItems(n int) []itemT { 208 | r := rand.New(rand.NewSource(0)) 209 | items := make([]itemT, n) 210 | itemsM := make(map[uint64]bool) 211 | for i := 0; i < n; i++ { 212 | for { 213 | key := uint64(r.Int63n(10000000000000000)) 214 | if !itemsM[key] { 215 | itemsM[key] = true 216 | items[i] = int64ToItemT(key) 217 | break 218 | } 219 | } 220 | } 221 | return items 222 | } 223 | 224 | type iavlCacheNode struct { 225 | key []byte 226 | value []byte 227 | } 228 | 229 | func NewIavlCacheNode(key, value []byte) iavlCacheNode { 230 | return iavlCacheNode{key, value} 231 | } 232 | 233 | func (n iavlCacheNode) GetKey() []byte { 234 | return n.key 235 | } 236 | 237 | func (n iavlCacheNode) GetCacheKey() []byte { 238 | return n.key 239 | } 240 | -------------------------------------------------------------------------------- /sc/memiavl/export.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "path/filepath" 8 | 9 | errorutils "github.com/sei-protocol/sei-db/common/errors" 10 | "github.com/sei-protocol/sei-db/common/logger" 11 | "github.com/sei-protocol/sei-db/config" 12 | "github.com/sei-protocol/sei-db/sc/types" 13 | ) 14 | 15 | // exportBufferSize is the number of nodes to buffer in the exporter. It improves throughput by 16 | // processing multiple nodes per context switch, but take care to avoid excessive memory usage, 17 | // especially since callers may export several IAVL stores in parallel (e.g. the Cosmos SDK). 18 | const exportBufferSize = 32 19 | 20 | type MultiTreeExporter struct { 21 | // only one of them is non-nil 22 | db *DB 23 | mtree *MultiTree 24 | 25 | iTree int 26 | exporter *Exporter 27 | } 28 | 29 | func NewMultiTreeExporter(dir string, version uint32, supportExportNonSnapshotVersion bool) (exporter *MultiTreeExporter, err error) { 30 | var ( 31 | db *DB 32 | mtree *MultiTree 33 | ) 34 | if supportExportNonSnapshotVersion { 35 | db, err = OpenDB(logger.NewNopLogger(), int64(version), Options{ 36 | Dir: dir, 37 | ZeroCopy: true, 38 | ReadOnly: true, 39 | SnapshotWriterLimit: config.DefaultSnapshotWriterLimit, 40 | }) 41 | if err != nil { 42 | return nil, fmt.Errorf("invalid height: %d, %w", version, err) 43 | } 44 | } else { 45 | curVersion, err := currentVersion(dir) 46 | if err != nil { 47 | return nil, fmt.Errorf("failed to load current version: %w", err) 48 | } 49 | if int64(version) > curVersion { 50 | return nil, fmt.Errorf("snapshot is not created yet: height: %d", version) 51 | } 52 | mtree, err = LoadMultiTree(filepath.Join(dir, snapshotName(int64(version))), true, 0) 53 | if err != nil { 54 | return nil, fmt.Errorf("snapshot don't exists: height: %d, %w", version, err) 55 | } 56 | } 57 | 58 | return &MultiTreeExporter{ 59 | db: db, 60 | mtree: mtree, 61 | }, nil 62 | } 63 | 64 | func (mte *MultiTreeExporter) trees() []NamedTree { 65 | if mte.db != nil { 66 | return mte.db.trees 67 | } 68 | return mte.mtree.trees 69 | } 70 | 71 | func (mte *MultiTreeExporter) Next() (interface{}, error) { 72 | if mte.exporter != nil { 73 | node, err := mte.exporter.Next() 74 | if err != nil { 75 | if errors.Is(err, errorutils.ErrorExportDone) { 76 | mte.exporter.Close() 77 | mte.exporter = nil 78 | return mte.Next() 79 | } 80 | return nil, err 81 | } 82 | return node, nil 83 | } 84 | 85 | trees := mte.trees() 86 | if mte.iTree >= len(trees) { 87 | return nil, errorutils.ErrorExportDone 88 | } 89 | tree := trees[mte.iTree] 90 | mte.exporter = tree.Export() 91 | mte.iTree++ 92 | return tree.Name, nil 93 | } 94 | 95 | func (mte *MultiTreeExporter) Close() error { 96 | if mte.exporter != nil { 97 | mte.exporter.Close() 98 | mte.exporter = nil 99 | } 100 | 101 | if mte.db != nil { 102 | return mte.db.Close() 103 | } 104 | if mte.mtree != nil { 105 | return mte.mtree.Close() 106 | } 107 | 108 | return nil 109 | } 110 | 111 | type exportWorker func(callback func(*types.SnapshotNode) bool) 112 | 113 | type Exporter struct { 114 | ch <-chan *types.SnapshotNode 115 | cancel context.CancelFunc 116 | } 117 | 118 | func newExporter(worker exportWorker) *Exporter { 119 | ctx, cancel := context.WithCancel(context.Background()) 120 | ch := make(chan *types.SnapshotNode, exportBufferSize) 121 | go func() { 122 | defer close(ch) 123 | worker(func(enode *types.SnapshotNode) bool { 124 | select { 125 | case ch <- enode: 126 | case <-ctx.Done(): 127 | return true 128 | } 129 | return false 130 | }) 131 | }() 132 | return &Exporter{ch, cancel} 133 | } 134 | 135 | func (e *Exporter) Next() (*types.SnapshotNode, error) { 136 | if exportNode, ok := <-e.ch; ok { 137 | return exportNode, nil 138 | } 139 | return nil, errorutils.ErrorExportDone 140 | } 141 | 142 | // Close closes the exporter. It is safe to call multiple times. 143 | func (e *Exporter) Close() { 144 | e.cancel() 145 | for range e.ch { 146 | // drain channel 147 | } 148 | 149 | } 150 | -------------------------------------------------------------------------------- /sc/memiavl/filelock.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "path/filepath" 5 | 6 | "github.com/zbiljic/go-filelock" 7 | ) 8 | 9 | type FileLock interface { 10 | Unlock() error 11 | Destroy() error 12 | } 13 | 14 | func LockFile(fname string) (FileLock, error) { 15 | path, err := filepath.Abs(fname) 16 | if err != nil { 17 | return nil, err 18 | } 19 | fl, err := filelock.New(path) 20 | if err != nil { 21 | return nil, err 22 | } 23 | if _, err := fl.TryLock(); err != nil { 24 | return nil, err 25 | } 26 | 27 | return fl, nil 28 | } 29 | -------------------------------------------------------------------------------- /sc/memiavl/import.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "math" 8 | "os" 9 | "path/filepath" 10 | 11 | "github.com/sei-protocol/sei-db/proto" 12 | "github.com/sei-protocol/sei-db/sc/types" 13 | ) 14 | 15 | var ( 16 | nodeChanSize = 10000 17 | bufIOSize = 64 * 1024 * 1024 18 | ) 19 | 20 | type MultiTreeImporter struct { 21 | dir string 22 | snapshotDir string 23 | height int64 24 | importer *TreeImporter 25 | fileLock FileLock 26 | } 27 | 28 | func NewMultiTreeImporter(dir string, height uint64) (*MultiTreeImporter, error) { 29 | if height > math.MaxUint32 { 30 | return nil, fmt.Errorf("version overflows uint32: %d", height) 31 | } 32 | 33 | var fileLock FileLock 34 | fileLock, err := LockFile(filepath.Join(dir, LockFileName)) 35 | if err != nil { 36 | return nil, fmt.Errorf("fail to lock db: %w", err) 37 | } 38 | 39 | return &MultiTreeImporter{ 40 | dir: dir, 41 | height: int64(height), 42 | snapshotDir: snapshotName(int64(height)), 43 | fileLock: fileLock, 44 | }, nil 45 | } 46 | 47 | func (mti *MultiTreeImporter) tmpDir() string { 48 | return filepath.Join(mti.dir, mti.snapshotDir+"-tmp") 49 | } 50 | 51 | func (mti *MultiTreeImporter) Add(item interface{}) error { 52 | switch item := item.(type) { 53 | case *types.SnapshotNode: 54 | mti.AddNode(item) 55 | return nil 56 | case string: 57 | return mti.AddTree(item) 58 | default: 59 | return fmt.Errorf("unknown item type: %T", item) 60 | } 61 | } 62 | 63 | func (mti *MultiTreeImporter) AddTree(name string) error { 64 | if mti.importer != nil { 65 | if err := mti.importer.Close(); err != nil { 66 | return err 67 | } 68 | } 69 | mti.importer = NewTreeImporter(filepath.Join(mti.tmpDir(), name), mti.height) 70 | return nil 71 | } 72 | 73 | func (mti *MultiTreeImporter) AddNode(node *types.SnapshotNode) { 74 | mti.importer.Add(node) 75 | } 76 | 77 | func (mti *MultiTreeImporter) Close() error { 78 | if mti.importer != nil { 79 | if err := mti.importer.Close(); err != nil { 80 | return err 81 | } 82 | mti.importer = nil 83 | } 84 | 85 | tmpDir := mti.tmpDir() 86 | if err := updateMetadataFile(tmpDir, mti.height); err != nil { 87 | return err 88 | } 89 | 90 | if err := os.Rename(tmpDir, filepath.Join(mti.dir, mti.snapshotDir)); err != nil { 91 | return err 92 | } 93 | 94 | if err := updateCurrentSymlink(mti.dir, mti.snapshotDir); err != nil { 95 | return err 96 | } 97 | return mti.fileLock.Unlock() 98 | } 99 | 100 | // TreeImporter import a single memiavl tree from state-sync snapshot 101 | type TreeImporter struct { 102 | nodesChan chan *types.SnapshotNode 103 | quitChan chan error 104 | } 105 | 106 | func NewTreeImporter(dir string, version int64) *TreeImporter { 107 | nodesChan := make(chan *types.SnapshotNode, nodeChanSize) 108 | quitChan := make(chan error) 109 | go func() { 110 | defer close(quitChan) 111 | quitChan <- doImport(dir, version, nodesChan) 112 | }() 113 | return &TreeImporter{nodesChan, quitChan} 114 | } 115 | 116 | func (ai *TreeImporter) Add(node *types.SnapshotNode) { 117 | ai.nodesChan <- node 118 | } 119 | 120 | func (ai *TreeImporter) Close() error { 121 | var err error 122 | // tolerate double close 123 | if ai.nodesChan != nil { 124 | close(ai.nodesChan) 125 | err = <-ai.quitChan 126 | } 127 | ai.nodesChan = nil 128 | ai.quitChan = nil 129 | return err 130 | } 131 | 132 | // doImport a stream of `types.SnapshotNode`s into a new snapshot. 133 | func doImport(dir string, version int64, nodes <-chan *types.SnapshotNode) (returnErr error) { 134 | if version > int64(math.MaxUint32) { 135 | return errors.New("version overflows uint32") 136 | } 137 | 138 | return writeSnapshot(context.Background(), dir, uint32(version), func(w *snapshotWriter) (uint32, error) { 139 | i := &importer{ 140 | snapshotWriter: *w, 141 | } 142 | 143 | for node := range nodes { 144 | if err := i.Add(node); err != nil { 145 | return 0, err 146 | } 147 | } 148 | 149 | switch len(i.leavesStack) { 150 | case 0: 151 | return 0, nil 152 | case 1: 153 | return i.leafCounter, nil 154 | default: 155 | return 0, fmt.Errorf("invalid node structure, found stack size %v after imported", len(i.leavesStack)) 156 | } 157 | }) 158 | } 159 | 160 | type importer struct { 161 | snapshotWriter 162 | 163 | // keep track of how many leaves has been written before the pending nodes 164 | leavesStack []uint32 165 | // keep track of the pending nodes 166 | nodeStack []*MemNode 167 | } 168 | 169 | func (i *importer) Add(n *types.SnapshotNode) error { 170 | if n.Version > int64(math.MaxUint32) { 171 | return errors.New("version overflows uint32") 172 | } 173 | 174 | if n.Height == 0 { 175 | node := &MemNode{ 176 | height: 0, 177 | size: 1, 178 | version: uint32(n.Version), 179 | key: n.Key, 180 | value: n.Value, 181 | } 182 | nodeHash := node.Hash() 183 | if err := i.writeLeaf(node.version, node.key, node.value, nodeHash); err != nil { 184 | return err 185 | } 186 | i.leavesStack = append(i.leavesStack, i.leafCounter) 187 | i.nodeStack = append(i.nodeStack, node) 188 | return nil 189 | } 190 | 191 | // branch node 192 | keyLeaf := i.leavesStack[len(i.leavesStack)-2] 193 | leftNode := i.nodeStack[len(i.nodeStack)-2] 194 | rightNode := i.nodeStack[len(i.nodeStack)-1] 195 | 196 | node := &MemNode{ 197 | height: uint8(n.Height), 198 | size: leftNode.size + rightNode.size, 199 | version: uint32(n.Version), 200 | key: n.Key, 201 | left: leftNode, 202 | right: rightNode, 203 | } 204 | nodeHash := node.Hash() 205 | 206 | // remove unnecessary reference to avoid memory leak 207 | node.left = nil 208 | node.right = nil 209 | 210 | preTrees := uint8(len(i.nodeStack) - 2) 211 | if err := i.writeBranch(node.version, uint32(node.size), node.height, preTrees, keyLeaf, nodeHash); err != nil { 212 | return err 213 | } 214 | 215 | i.leavesStack = i.leavesStack[:len(i.leavesStack)-2] 216 | i.leavesStack = append(i.leavesStack, i.leafCounter) 217 | 218 | i.nodeStack = i.nodeStack[:len(i.nodeStack)-2] 219 | i.nodeStack = append(i.nodeStack, node) 220 | return nil 221 | } 222 | 223 | func updateMetadataFile(dir string, height int64) (returnErr error) { 224 | entries, err := os.ReadDir(dir) 225 | if err != nil { 226 | return err 227 | } 228 | storeInfos := make([]proto.StoreInfo, 0, len(entries)) 229 | for _, e := range entries { 230 | if !e.IsDir() { 231 | continue 232 | } 233 | name := e.Name() 234 | snapshot, err := OpenSnapshot(filepath.Join(dir, name)) 235 | if err != nil { 236 | return err 237 | } 238 | defer func() { 239 | if err := snapshot.Close(); returnErr == nil { 240 | returnErr = err 241 | } 242 | }() 243 | storeInfos = append(storeInfos, proto.StoreInfo{ 244 | Name: name, 245 | CommitId: proto.CommitID{ 246 | Version: height, 247 | Hash: snapshot.RootHash(), 248 | }, 249 | }) 250 | } 251 | metadata := proto.MultiTreeMetadata{ 252 | CommitInfo: &proto.CommitInfo{ 253 | Version: height, 254 | StoreInfos: storeInfos, 255 | }, 256 | // initial version should correspond to the first rlog entry 257 | InitialVersion: height + 1, 258 | } 259 | bz, err := metadata.Marshal() 260 | if err != nil { 261 | return err 262 | } 263 | return WriteFileSync(filepath.Join(dir, MetadataFileName), bz) 264 | } 265 | -------------------------------------------------------------------------------- /sc/memiavl/iterator.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "bytes" 5 | 6 | "github.com/sei-protocol/sei-db/common/utils" 7 | dbm "github.com/tendermint/tm-db" 8 | ) 9 | 10 | var _ dbm.Iterator = (*Iterator)(nil) 11 | 12 | type Iterator struct { 13 | // domain of iteration, end is exclusive 14 | start, end []byte 15 | ascending bool 16 | zeroCopy bool 17 | 18 | // cache the next key-value pair 19 | key, value []byte 20 | 21 | valid bool 22 | 23 | stack []Node 24 | } 25 | 26 | func NewIterator(start, end []byte, ascending bool, root Node, zeroCopy bool) *Iterator { 27 | iter := &Iterator{ 28 | start: start, 29 | end: end, 30 | ascending: ascending, 31 | valid: true, 32 | zeroCopy: zeroCopy, 33 | } 34 | 35 | if root != nil { 36 | iter.stack = []Node{root} 37 | } 38 | 39 | // cache the first key-value 40 | iter.Next() 41 | return iter 42 | } 43 | 44 | func (iter *Iterator) Domain() ([]byte, []byte) { 45 | return iter.start, iter.end 46 | } 47 | 48 | // Valid implements dbm.Iterator. 49 | func (iter *Iterator) Valid() bool { 50 | return iter.valid 51 | } 52 | 53 | // Error implements dbm.Iterator 54 | func (iter *Iterator) Error() error { 55 | return nil 56 | } 57 | 58 | // Key implements dbm.Iterator 59 | func (iter *Iterator) Key() []byte { 60 | if !iter.zeroCopy { 61 | return utils.Clone(iter.key) 62 | } 63 | return iter.key 64 | } 65 | 66 | // Value implements dbm.Iterator 67 | func (iter *Iterator) Value() []byte { 68 | if !iter.zeroCopy { 69 | return utils.Clone(iter.value) 70 | } 71 | return iter.value 72 | } 73 | 74 | // Next implements dbm.Iterator 75 | func (iter *Iterator) Next() { 76 | for len(iter.stack) > 0 { 77 | // pop node 78 | node := iter.stack[len(iter.stack)-1] 79 | iter.stack = iter.stack[:len(iter.stack)-1] 80 | 81 | key := node.Key() 82 | startCmp := bytes.Compare(iter.start, key) 83 | afterStart := iter.start == nil || startCmp < 0 84 | beforeEnd := iter.end == nil || bytes.Compare(key, iter.end) < 0 85 | 86 | if node.IsLeaf() { 87 | startOrAfter := afterStart || startCmp == 0 88 | if startOrAfter && beforeEnd { 89 | iter.key = key 90 | iter.value = node.Value() 91 | return 92 | } 93 | } else { 94 | // push children to stack 95 | if iter.ascending { 96 | if beforeEnd { 97 | iter.stack = append(iter.stack, node.Right()) 98 | } 99 | if afterStart { 100 | iter.stack = append(iter.stack, node.Left()) 101 | } 102 | } else { 103 | if afterStart { 104 | iter.stack = append(iter.stack, node.Left()) 105 | } 106 | if beforeEnd { 107 | iter.stack = append(iter.stack, node.Right()) 108 | } 109 | } 110 | } 111 | } 112 | 113 | iter.valid = false 114 | } 115 | 116 | // Close implements dbm.Iterator 117 | func (iter *Iterator) Close() error { 118 | iter.valid = false 119 | iter.stack = nil 120 | return nil 121 | } 122 | -------------------------------------------------------------------------------- /sc/memiavl/iterator_test.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | dbm "github.com/tendermint/tm-db" 8 | ) 9 | 10 | func TestIterator(t *testing.T) { 11 | tree := New(0) 12 | require.Equal(t, ExpectItems[0], collectIter(tree.Iterator(nil, nil, true))) 13 | 14 | for _, changes := range ChangeSets { 15 | tree.ApplyChangeSet(changes) 16 | _, v, err := tree.SaveVersion(true) 17 | require.NoError(t, err) 18 | require.Equal(t, ExpectItems[v], collectIter(tree.Iterator(nil, nil, true))) 19 | require.Equal(t, reverse(ExpectItems[v]), collectIter(tree.Iterator(nil, nil, false))) 20 | } 21 | } 22 | 23 | func TestIteratorRange(t *testing.T) { 24 | tree := New(0) 25 | for _, changes := range ChangeSets[:6] { 26 | tree.ApplyChangeSet(changes) 27 | _, _, err := tree.SaveVersion(true) 28 | require.NoError(t, err) 29 | } 30 | 31 | expItems := []pair{ 32 | {[]byte("aello05"), []byte("world1")}, 33 | {[]byte("aello06"), []byte("world1")}, 34 | {[]byte("aello07"), []byte("world1")}, 35 | {[]byte("aello08"), []byte("world1")}, 36 | {[]byte("aello09"), []byte("world1")}, 37 | } 38 | require.Equal(t, expItems, collectIter(tree.Iterator([]byte("aello05"), []byte("aello10"), true))) 39 | require.Equal(t, reverse(expItems), collectIter(tree.Iterator([]byte("aello05"), []byte("aello10"), false))) 40 | } 41 | 42 | type pair struct { 43 | key, value []byte 44 | } 45 | 46 | func collectIter(iter dbm.Iterator) []pair { 47 | result := []pair{} 48 | for ; iter.Valid(); iter.Next() { 49 | result = append(result, pair{key: iter.Key(), value: iter.Value()}) 50 | } 51 | return result 52 | } 53 | 54 | func reverse[S ~[]E, E any](s S) S { 55 | r := make(S, len(s)) 56 | for i, j := 0, len(s)-1; i <= j; i, j = i+1, j-1 { 57 | r[i], r[j] = s[j], s[i] 58 | } 59 | return r 60 | } 61 | -------------------------------------------------------------------------------- /sc/memiavl/layout_little_endian.go: -------------------------------------------------------------------------------- 1 | //go:build !nativebyteorder 2 | // +build !nativebyteorder 3 | 4 | package memiavl 5 | 6 | import ( 7 | "encoding/binary" 8 | ) 9 | 10 | // Nodes is a continuously stored IAVL nodes 11 | type Nodes struct { 12 | data []byte 13 | } 14 | 15 | func NewNodes(data []byte) (Nodes, error) { 16 | return Nodes{data}, nil 17 | } 18 | 19 | func (nodes Nodes) Node(i uint32) NodeLayout { 20 | offset := int(i) * SizeNode 21 | return NodeLayout{data: (*[SizeNode]byte)(nodes.data[offset : offset+SizeNode])} 22 | } 23 | 24 | // see comment of `PersistedNode` 25 | type NodeLayout struct { 26 | data *[SizeNode]byte 27 | } 28 | 29 | func (node NodeLayout) Height() uint8 { 30 | return node.data[OffsetHeight] 31 | } 32 | 33 | func (node NodeLayout) PreTrees() uint8 { 34 | return node.data[OffsetPreTrees] 35 | } 36 | 37 | func (node NodeLayout) Version() uint32 { 38 | return binary.LittleEndian.Uint32(node.data[OffsetVersion : OffsetVersion+4]) 39 | } 40 | 41 | func (node NodeLayout) Size() uint32 { 42 | return binary.LittleEndian.Uint32(node.data[OffsetSize : OffsetSize+4]) 43 | } 44 | 45 | func (node NodeLayout) KeyLeaf() uint32 { 46 | return binary.LittleEndian.Uint32(node.data[OffsetKeyLeaf : OffsetKeyLeaf+4]) 47 | } 48 | 49 | func (node NodeLayout) Hash() []byte { 50 | return node.data[OffsetHash : OffsetHash+SizeHash] 51 | } 52 | 53 | // Leaves is a continuously stored IAVL nodes 54 | type Leaves struct { 55 | data []byte 56 | } 57 | 58 | func NewLeaves(data []byte) (Leaves, error) { 59 | return Leaves{data}, nil 60 | } 61 | 62 | func (leaves Leaves) Leaf(i uint32) LeafLayout { 63 | offset := int(i) * SizeLeaf 64 | return LeafLayout{data: (*[SizeLeaf]byte)(leaves.data[offset : offset+SizeLeaf])} 65 | } 66 | 67 | type LeafLayout struct { 68 | data *[SizeLeaf]byte 69 | } 70 | 71 | func (leaf LeafLayout) Version() uint32 { 72 | return binary.LittleEndian.Uint32(leaf.data[OffsetLeafVersion : OffsetLeafVersion+4]) 73 | } 74 | 75 | func (leaf LeafLayout) KeyLength() uint32 { 76 | return binary.LittleEndian.Uint32(leaf.data[OffsetLeafKeyLen : OffsetLeafKeyLen+4]) 77 | } 78 | 79 | func (leaf LeafLayout) KeyOffset() uint64 { 80 | return binary.LittleEndian.Uint64(leaf.data[OffsetLeafKeyOffset : OffsetLeafKeyOffset+8]) 81 | } 82 | 83 | func (leaf LeafLayout) Hash() []byte { 84 | return leaf.data[OffsetLeafHash : OffsetLeafHash+32] 85 | } 86 | -------------------------------------------------------------------------------- /sc/memiavl/layout_native.go: -------------------------------------------------------------------------------- 1 | //go:build nativebyteorder 2 | // +build nativebyteorder 3 | 4 | package memiavl 5 | 6 | import ( 7 | "errors" 8 | "unsafe" 9 | ) 10 | 11 | func init() { 12 | buf := [2]byte{} 13 | *(*uint16)(unsafe.Pointer(&buf[0])) = uint16(0xABCD) 14 | 15 | if buf != [2]byte{0xCD, 0xAB} { 16 | panic("native byte order is not little endian, please build without nativebyteorder") 17 | } 18 | } 19 | 20 | type NodeLayout = *nodeLayout 21 | 22 | // Nodes is a continuously stored IAVL nodes 23 | type Nodes struct { 24 | nodes []nodeLayout 25 | } 26 | 27 | func NewNodes(buf []byte) (Nodes, error) { 28 | // check alignment and size of the buffer 29 | p := unsafe.Pointer(unsafe.SliceData(buf)) 30 | if uintptr(p)%unsafe.Alignof(nodeLayout{}) != 0 { 31 | return Nodes{}, errors.New("input buffer is not aligned") 32 | } 33 | size := int(unsafe.Sizeof(nodeLayout{})) 34 | if len(buf)%size != 0 { 35 | return Nodes{}, errors.New("input buffer length is not correct") 36 | } 37 | nodes := unsafe.Slice((*nodeLayout)(p), len(buf)/size) 38 | return Nodes{nodes}, nil 39 | } 40 | 41 | func (nodes Nodes) Node(i uint32) NodeLayout { 42 | return &nodes.nodes[i] 43 | } 44 | 45 | // see comment of `PersistedNode` 46 | type nodeLayout struct { 47 | data [4]uint32 48 | hash [32]byte 49 | } 50 | 51 | func (node *nodeLayout) Height() uint8 { 52 | return uint8(node.data[0]) 53 | } 54 | 55 | func (node NodeLayout) PreTrees() uint8 { 56 | return uint8(node.data[0] >> 8) 57 | } 58 | 59 | func (node *nodeLayout) Version() uint32 { 60 | return node.data[1] 61 | } 62 | 63 | func (node *nodeLayout) Size() uint32 { 64 | return node.data[2] 65 | } 66 | 67 | func (node *nodeLayout) KeyLeaf() uint32 { 68 | return node.data[3] 69 | } 70 | 71 | func (node *nodeLayout) Hash() []byte { 72 | return node.hash[:] 73 | } 74 | 75 | type LeafLayout = *leafLayout 76 | 77 | // Nodes is a continuously stored IAVL nodes 78 | type Leaves struct { 79 | leaves []leafLayout 80 | } 81 | 82 | func NewLeaves(buf []byte) (Leaves, error) { 83 | // check alignment and size of the buffer 84 | p := unsafe.Pointer(unsafe.SliceData(buf)) 85 | if uintptr(p)%unsafe.Alignof(leafLayout{}) != 0 { 86 | return Leaves{}, errors.New("input buffer is not aligned") 87 | } 88 | size := int(unsafe.Sizeof(leafLayout{})) 89 | if len(buf)%size != 0 { 90 | return Leaves{}, errors.New("input buffer length is not correct") 91 | } 92 | leaves := unsafe.Slice((*leafLayout)(p), len(buf)/size) 93 | return Leaves{leaves}, nil 94 | } 95 | 96 | func (leaves Leaves) Leaf(i uint32) LeafLayout { 97 | return &leaves.leaves[i] 98 | } 99 | 100 | type leafLayout struct { 101 | version uint32 102 | keyLen uint32 103 | keyOffset uint64 104 | hash [32]byte 105 | } 106 | 107 | func (leaf *leafLayout) Version() uint32 { 108 | return leaf.version 109 | } 110 | 111 | func (leaf *leafLayout) KeyLength() uint32 { 112 | return leaf.keyLen 113 | } 114 | 115 | func (leaf *leafLayout) KeyOffset() uint64 { 116 | return leaf.keyOffset 117 | } 118 | 119 | func (leaf *leafLayout) Hash() []byte { 120 | return leaf.hash[:] 121 | } 122 | -------------------------------------------------------------------------------- /sc/memiavl/mem_node.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "io" 7 | ) 8 | 9 | type MemNode struct { 10 | height uint8 11 | size int64 12 | version uint32 13 | key []byte 14 | value []byte 15 | left Node 16 | right Node 17 | hash []byte 18 | } 19 | 20 | var _ Node = (*MemNode)(nil) 21 | 22 | func newLeafNode(key, value []byte, version uint32) *MemNode { 23 | return &MemNode{ 24 | key: key, value: value, version: version, size: 1, 25 | } 26 | } 27 | 28 | func (node *MemNode) Height() uint8 { 29 | return node.height 30 | } 31 | 32 | func (node *MemNode) IsLeaf() bool { 33 | return node.height == 0 34 | } 35 | 36 | func (node *MemNode) Size() int64 { 37 | return node.size 38 | } 39 | 40 | func (node *MemNode) Version() uint32 { 41 | return node.version 42 | } 43 | 44 | func (node *MemNode) Key() []byte { 45 | return node.key 46 | } 47 | 48 | func (node *MemNode) Value() []byte { 49 | return node.value 50 | } 51 | 52 | func (node *MemNode) Left() Node { 53 | return node.left 54 | } 55 | 56 | func (node *MemNode) Right() Node { 57 | return node.right 58 | } 59 | 60 | // Mutate clones the node if it's version is smaller than or equal to cowVersion, otherwise modify in-place 61 | func (node *MemNode) Mutate(version, cowVersion uint32) *MemNode { 62 | n := node 63 | if node.version <= cowVersion { 64 | cloned := *node 65 | n = &cloned 66 | } 67 | n.version = version 68 | n.hash = nil 69 | return n 70 | } 71 | 72 | func (node *MemNode) SafeHash() []byte { 73 | return node.Hash() 74 | } 75 | 76 | // Computes the hash of the node without computing its descendants. Must be 77 | // called on nodes which have descendant node hashes already computed. 78 | func (node *MemNode) Hash() []byte { 79 | if node == nil { 80 | return nil 81 | } 82 | if node.hash != nil { 83 | return node.hash 84 | } 85 | node.hash = HashNode(node) 86 | return node.hash 87 | } 88 | 89 | func (node *MemNode) updateHeightSize() { 90 | node.height = maxUInt8(node.left.Height(), node.right.Height()) + 1 91 | node.size = node.left.Size() + node.right.Size() 92 | } 93 | 94 | func (node *MemNode) calcBalance() int { 95 | return int(node.left.Height()) - int(node.right.Height()) 96 | } 97 | 98 | func calcBalance(node Node) int { 99 | return int(node.Left().Height()) - int(node.Right().Height()) 100 | } 101 | 102 | // Invariant: node is returned by `Mutate(version)`. 103 | // 104 | // S L 105 | // / \ => / \ 106 | // L S 107 | // / \ / \ 108 | // LR LR 109 | func (node *MemNode) rotateRight(version, cowVersion uint32) *MemNode { 110 | newSelf := node.left.Mutate(version, cowVersion) 111 | node.left = node.left.Right() 112 | newSelf.right = node 113 | node.updateHeightSize() 114 | newSelf.updateHeightSize() 115 | return newSelf 116 | } 117 | 118 | // Invariant: node is returned by `Mutate(version, cowVersion)`. 119 | // 120 | // S R 121 | // / \ => / \ 122 | // R S 123 | // / \ / \ 124 | // RL RL 125 | func (node *MemNode) rotateLeft(version, cowVersion uint32) *MemNode { 126 | newSelf := node.right.Mutate(version, cowVersion) 127 | node.right = node.right.Left() 128 | newSelf.left = node 129 | node.updateHeightSize() 130 | newSelf.updateHeightSize() 131 | return newSelf 132 | } 133 | 134 | // Invariant: node is returned by `Mutate(version, cowVersion)`. 135 | func (node *MemNode) reBalance(version, cowVersion uint32) *MemNode { 136 | balance := node.calcBalance() 137 | switch { 138 | case balance > 1: 139 | leftBalance := calcBalance(node.left) 140 | if leftBalance >= 0 { 141 | // left left 142 | return node.rotateRight(version, cowVersion) 143 | } 144 | // left right 145 | node.left = node.left.Mutate(version, cowVersion).rotateLeft(version, cowVersion) 146 | return node.rotateRight(version, cowVersion) 147 | case balance < -1: 148 | rightBalance := calcBalance(node.right) 149 | if rightBalance <= 0 { 150 | // right right 151 | return node.rotateLeft(version, cowVersion) 152 | } 153 | // right left 154 | node.right = node.right.Mutate(version, cowVersion).rotateRight(version, cowVersion) 155 | return node.rotateLeft(version, cowVersion) 156 | default: 157 | // nothing changed 158 | return node 159 | } 160 | } 161 | 162 | func (node *MemNode) Get(key []byte) ([]byte, uint32) { 163 | if node.IsLeaf() { 164 | switch bytes.Compare(node.key, key) { 165 | case -1: 166 | return nil, 1 167 | case 1: 168 | return nil, 0 169 | default: 170 | return node.value, 0 171 | } 172 | } 173 | 174 | if bytes.Compare(key, node.key) == -1 { 175 | return node.Left().Get(key) 176 | } 177 | right := node.Right() 178 | value, index := right.Get(key) 179 | return value, index + uint32(node.Size()) - uint32(right.Size()) 180 | } 181 | 182 | func (node *MemNode) GetByIndex(index uint32) ([]byte, []byte) { 183 | if node.IsLeaf() { 184 | if index == 0 { 185 | return node.key, node.value 186 | } 187 | return nil, nil 188 | } 189 | 190 | left := node.Left() 191 | leftSize := uint32(left.Size()) 192 | if index < leftSize { 193 | return left.GetByIndex(index) 194 | } 195 | 196 | right := node.Right() 197 | return right.GetByIndex(index - leftSize) 198 | } 199 | 200 | // EncodeBytes writes a varint length-prefixed byte slice to the writer, 201 | // it's used for hash computation, must be compactible with the official IAVL implementation. 202 | func EncodeBytes(w io.Writer, bz []byte) error { 203 | var buf [binary.MaxVarintLen64]byte 204 | n := binary.PutUvarint(buf[:], uint64(len(bz))) 205 | if _, err := w.Write(buf[0:n]); err != nil { 206 | return err 207 | } 208 | _, err := w.Write(bz) 209 | return err 210 | } 211 | 212 | func maxUInt8(a, b uint8) uint8 { 213 | if a > b { 214 | return a 215 | } 216 | return b 217 | } 218 | -------------------------------------------------------------------------------- /sc/memiavl/mmap.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/ledgerwatch/erigon-lib/mmap" 7 | "github.com/sei-protocol/sei-db/common/errors" 8 | ) 9 | 10 | // MmapFile manage the resources of a mmap-ed file 11 | type MmapFile struct { 12 | file *os.File 13 | data []byte 14 | // mmap handle for windows (this is used to close mmap) 15 | handle *[mmap.MaxMapSize]byte 16 | } 17 | 18 | // Open openes the file and create the mmap. 19 | // the mmap is created with flags: PROT_READ, MAP_SHARED, MADV_RANDOM. 20 | func NewMmap(path string) (*MmapFile, error) { 21 | file, err := os.Open(path) 22 | if err != nil { 23 | return nil, err 24 | } 25 | 26 | data, handle, err := Mmap(file) 27 | if err != nil { 28 | _ = file.Close() 29 | return nil, err 30 | } 31 | 32 | return &MmapFile{ 33 | file: file, 34 | data: data, 35 | handle: handle, 36 | }, nil 37 | } 38 | 39 | // Close closes the file and mmap handles 40 | func (m *MmapFile) Close() error { 41 | var err error 42 | if m.handle != nil { 43 | err = mmap.Munmap(m.data, m.handle) 44 | } 45 | return errors.Join(err, m.file.Close()) 46 | } 47 | 48 | // Data returns the mmap-ed buffer 49 | func (m *MmapFile) Data() []byte { 50 | return m.data 51 | } 52 | 53 | func Mmap(f *os.File) ([]byte, *[mmap.MaxMapSize]byte, error) { 54 | fi, err := f.Stat() 55 | if err != nil || fi.Size() == 0 { 56 | return nil, nil, err 57 | } 58 | 59 | return mmap.Mmap(f, int(fi.Size())) 60 | } 61 | -------------------------------------------------------------------------------- /sc/memiavl/node.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha256" 6 | "encoding/binary" 7 | "fmt" 8 | "io" 9 | ) 10 | 11 | // Node interface encapsulate the interface of both PersistedNode and MemNode. 12 | type Node interface { 13 | Height() uint8 14 | IsLeaf() bool 15 | Size() int64 16 | Version() uint32 17 | Key() []byte 18 | Value() []byte 19 | Left() Node 20 | Right() Node 21 | Hash() []byte 22 | 23 | // SafeHash returns byte slice that's safe to retain 24 | SafeHash() []byte 25 | 26 | // PersistedNode clone a new node, MemNode modify in place 27 | Mutate(version, cowVersion uint32) *MemNode 28 | 29 | // Get query the value for a key, it's put into interface because a specialized implementation is more efficient. 30 | Get(key []byte) ([]byte, uint32) 31 | GetByIndex(uint32) ([]byte, []byte) 32 | } 33 | 34 | // setRecursive do set operation. 35 | // it always do modification and return new `MemNode`, even if the value is the same. 36 | // also returns if it's an update or insertion, if update, the tree height and balance is not changed. 37 | func setRecursive(node Node, key, value []byte, version, cowVersion uint32) (*MemNode, bool) { 38 | if node == nil { 39 | return newLeafNode(key, value, version), true 40 | } 41 | 42 | nodeKey := node.Key() 43 | if node.IsLeaf() { 44 | switch bytes.Compare(key, nodeKey) { 45 | case -1: 46 | return &MemNode{ 47 | height: 1, 48 | size: 2, 49 | version: version, 50 | key: nodeKey, 51 | left: newLeafNode(key, value, version), 52 | right: node, 53 | }, false 54 | case 1: 55 | return &MemNode{ 56 | height: 1, 57 | size: 2, 58 | version: version, 59 | key: key, 60 | left: node, 61 | right: newLeafNode(key, value, version), 62 | }, false 63 | default: 64 | newNode := node.Mutate(version, cowVersion) 65 | newNode.value = value 66 | return newNode, true 67 | } 68 | } else { 69 | var ( 70 | newChild, newNode *MemNode 71 | updated bool 72 | ) 73 | if bytes.Compare(key, nodeKey) == -1 { 74 | newChild, updated = setRecursive(node.Left(), key, value, version, cowVersion) 75 | newNode = node.Mutate(version, cowVersion) 76 | newNode.left = newChild 77 | } else { 78 | newChild, updated = setRecursive(node.Right(), key, value, version, cowVersion) 79 | newNode = node.Mutate(version, cowVersion) 80 | newNode.right = newChild 81 | } 82 | 83 | if !updated { 84 | newNode.updateHeightSize() 85 | newNode = newNode.reBalance(version, cowVersion) 86 | } 87 | 88 | return newNode, updated 89 | } 90 | } 91 | 92 | // removeRecursive returns: 93 | // - (nil, origNode, nil) -> nothing changed in subtree 94 | // - (value, nil, newKey) -> leaf node is removed 95 | // - (value, new node, newKey) -> subtree changed 96 | func removeRecursive(node Node, key []byte, version, cowVersion uint32) ([]byte, Node, []byte) { 97 | if node == nil { 98 | return nil, nil, nil 99 | } 100 | 101 | if node.IsLeaf() { 102 | if bytes.Equal(node.Key(), key) { 103 | return node.Value(), nil, nil 104 | } 105 | return nil, node, nil 106 | } 107 | 108 | if bytes.Compare(key, node.Key()) == -1 { 109 | value, newLeft, newKey := removeRecursive(node.Left(), key, version, cowVersion) 110 | if value == nil { 111 | return nil, node, nil 112 | } 113 | if newLeft == nil { 114 | return value, node.Right(), node.Key() 115 | } 116 | newNode := node.Mutate(version, cowVersion) 117 | newNode.left = newLeft 118 | newNode.updateHeightSize() 119 | return value, newNode.reBalance(version, cowVersion), newKey 120 | } 121 | 122 | value, newRight, newKey := removeRecursive(node.Right(), key, version, cowVersion) 123 | if value == nil { 124 | return nil, node, nil 125 | } 126 | if newRight == nil { 127 | return value, node.Left(), nil 128 | } 129 | 130 | newNode := node.Mutate(version, cowVersion) 131 | newNode.right = newRight 132 | if newKey != nil { 133 | newNode.key = newKey 134 | } 135 | newNode.updateHeightSize() 136 | return value, newNode.reBalance(version, cowVersion), nil 137 | } 138 | 139 | // Writes the node's hash to the given `io.Writer`. This function recursively calls 140 | // children to update hashes. 141 | func writeHashBytes(node Node, w io.Writer) error { 142 | var ( 143 | n int 144 | buf [binary.MaxVarintLen64]byte 145 | ) 146 | 147 | n = binary.PutVarint(buf[:], int64(node.Height())) 148 | if _, err := w.Write(buf[0:n]); err != nil { 149 | return fmt.Errorf("writing height, %w", err) 150 | } 151 | n = binary.PutVarint(buf[:], node.Size()) 152 | if _, err := w.Write(buf[0:n]); err != nil { 153 | return fmt.Errorf("writing size, %w", err) 154 | } 155 | n = binary.PutVarint(buf[:], int64(node.Version())) 156 | if _, err := w.Write(buf[0:n]); err != nil { 157 | return fmt.Errorf("writing version, %w", err) 158 | } 159 | 160 | // Key is not written for inner nodes, unlike writeBytes. 161 | 162 | if node.IsLeaf() { 163 | if err := EncodeBytes(w, node.Key()); err != nil { 164 | return fmt.Errorf("writing key, %w", err) 165 | } 166 | 167 | // Indirection needed to provide proofs without values. 168 | // (e.g. ProofLeafNode.ValueHash) 169 | valueHash := sha256.Sum256(node.Value()) 170 | 171 | if err := EncodeBytes(w, valueHash[:]); err != nil { 172 | return fmt.Errorf("writing value, %w", err) 173 | } 174 | } else { 175 | if err := EncodeBytes(w, node.Left().Hash()); err != nil { 176 | return fmt.Errorf("writing left hash, %w", err) 177 | } 178 | if err := EncodeBytes(w, node.Right().Hash()); err != nil { 179 | return fmt.Errorf("writing right hash, %w", err) 180 | } 181 | } 182 | 183 | return nil 184 | } 185 | 186 | // HashNode computes the hash of the node. 187 | func HashNode(node Node) []byte { 188 | if node == nil { 189 | return nil 190 | } 191 | h := sha256.New() 192 | if err := writeHashBytes(node, h); err != nil { 193 | panic(err) 194 | } 195 | return h.Sum(nil) 196 | } 197 | 198 | // VerifyHash compare node's cached hash with computed one 199 | func VerifyHash(node Node) bool { 200 | return bytes.Equal(HashNode(node), node.Hash()) 201 | } 202 | -------------------------------------------------------------------------------- /sc/memiavl/opts.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "errors" 5 | 6 | "github.com/sei-protocol/sei-db/config" 7 | ) 8 | 9 | type Options struct { 10 | Dir string 11 | CreateIfMissing bool 12 | InitialVersion uint32 13 | ReadOnly bool 14 | // the initial stores when initialize the empty instance 15 | InitialStores []string 16 | // keep how many snapshots 17 | SnapshotKeepRecent uint32 18 | // how often to take a snapshot 19 | SnapshotInterval uint32 20 | // Buffer size for the asynchronous commit queue, -1 means synchronous commit, 21 | // default to 0. 22 | AsyncCommitBuffer int 23 | // ZeroCopy if true, the get and iterator methods could return a slice pointing to mmaped blob files. 24 | ZeroCopy bool 25 | // CacheSize defines the cache's max entry size for each memiavl store. 26 | CacheSize int 27 | // LoadForOverwriting if true rollbacks the state, specifically the OpenDB method will 28 | // truncate the versions after the `TargetVersion`, the `TargetVersion` becomes the latest version. 29 | // it do nothing if the target version is `0`. 30 | LoadForOverwriting bool 31 | 32 | // Limit the number of concurrent snapshot writers 33 | SnapshotWriterLimit int 34 | } 35 | 36 | func (opts Options) Validate() error { 37 | if opts.ReadOnly && opts.CreateIfMissing { 38 | return errors.New("can't create db in read-only mode") 39 | } 40 | 41 | if opts.ReadOnly && opts.LoadForOverwriting { 42 | return errors.New("can't rollback db in read-only mode") 43 | } 44 | 45 | return nil 46 | } 47 | 48 | func (opts *Options) FillDefaults() { 49 | if opts.SnapshotInterval <= 0 { 50 | opts.SnapshotInterval = config.DefaultSnapshotInterval 51 | } 52 | 53 | if opts.SnapshotWriterLimit <= 0 { 54 | opts.SnapshotWriterLimit = config.DefaultSnapshotWriterLimit 55 | } 56 | 57 | if opts.CacheSize < 0 { 58 | opts.CacheSize = config.DefaultCacheSize 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /sc/memiavl/persisted_node.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha256" 6 | "sort" 7 | 8 | "github.com/sei-protocol/sei-db/common/utils" 9 | ) 10 | 11 | const ( 12 | OffsetHeight = 0 13 | OffsetPreTrees = OffsetHeight + 1 14 | OffsetVersion = OffsetHeight + 4 15 | OffsetSize = OffsetVersion + 4 16 | OffsetKeyLeaf = OffsetSize + 4 17 | 18 | OffsetHash = OffsetKeyLeaf + 4 19 | SizeHash = sha256.Size 20 | SizeNodeWithoutHash = OffsetHash 21 | SizeNode = SizeNodeWithoutHash + SizeHash 22 | 23 | OffsetLeafVersion = 0 24 | OffsetLeafKeyLen = OffsetLeafVersion + 4 25 | OffsetLeafKeyOffset = OffsetLeafKeyLen + 4 26 | OffsetLeafHash = OffsetLeafKeyOffset + 8 27 | SizeLeafWithoutHash = OffsetLeafHash 28 | SizeLeaf = SizeLeafWithoutHash + SizeHash 29 | ) 30 | 31 | // PersistedNode is backed by serialized byte array, usually mmap-ed from disk file. 32 | // Encoding format (all integers are encoded in little endian): 33 | // 34 | // Branch node: 35 | // - height : 1 36 | // - preTrees : 1 37 | // - _padding : 2 38 | // - version : 4 39 | // - size : 4 40 | // - key node : 4 // node index of the smallest leaf in right branch 41 | // - hash : 32 42 | // Leaf node: 43 | // - version : 4 44 | // - key len : 4 45 | // - key offset : 8 46 | // - hash : 32 47 | type PersistedNode struct { 48 | snapshot *Snapshot 49 | isLeaf bool 50 | index uint32 51 | } 52 | 53 | var _ Node = PersistedNode{} 54 | 55 | func (node PersistedNode) branchNode() NodeLayout { 56 | return node.snapshot.nodesLayout.Node(node.index) 57 | } 58 | 59 | func (node PersistedNode) leafNode() LeafLayout { 60 | return node.snapshot.leavesLayout.Leaf(node.index) 61 | } 62 | 63 | func (node PersistedNode) Height() uint8 { 64 | if node.isLeaf { 65 | return 0 66 | } 67 | return node.branchNode().Height() 68 | } 69 | 70 | func (node PersistedNode) IsLeaf() bool { 71 | return node.isLeaf 72 | } 73 | 74 | func (node PersistedNode) Version() uint32 { 75 | if node.isLeaf { 76 | return node.leafNode().Version() 77 | } 78 | return node.branchNode().Version() 79 | } 80 | 81 | func (node PersistedNode) Size() int64 { 82 | if node.isLeaf { 83 | return 1 84 | } 85 | return int64(node.branchNode().Size()) 86 | } 87 | 88 | func (node PersistedNode) Key() []byte { 89 | if node.isLeaf { 90 | return node.snapshot.LeafKey(node.index) 91 | } 92 | index := node.branchNode().KeyLeaf() 93 | return node.snapshot.LeafKey(index) 94 | } 95 | 96 | // Value returns nil for non-leaf node. 97 | func (node PersistedNode) Value() []byte { 98 | if !node.isLeaf { 99 | return nil 100 | } 101 | _, value := node.snapshot.LeafKeyValue(node.index) 102 | return value 103 | } 104 | 105 | // Left result is not defined for leaf nodes. 106 | func (node PersistedNode) Left() Node { 107 | if node.isLeaf { 108 | panic("can't call Left on leaf node") 109 | } 110 | 111 | data := node.branchNode() 112 | preTrees := uint32(data.PreTrees()) 113 | startLeaf := getStartLeaf(node.index, data.Size(), preTrees) 114 | keyLeaf := data.KeyLeaf() 115 | if startLeaf+1 == keyLeaf { 116 | return PersistedNode{snapshot: node.snapshot, index: startLeaf, isLeaf: true} 117 | } 118 | return PersistedNode{snapshot: node.snapshot, index: getLeftBranch(keyLeaf, preTrees)} 119 | } 120 | 121 | // Right result is not defined for leaf nodes. 122 | func (node PersistedNode) Right() Node { 123 | if node.isLeaf { 124 | panic("can't call Right on leaf node") 125 | } 126 | 127 | data := node.branchNode() 128 | keyLeaf := data.KeyLeaf() 129 | preTrees := uint32(data.PreTrees()) 130 | if keyLeaf == getEndLeaf(node.index, preTrees) { 131 | return PersistedNode{snapshot: node.snapshot, index: keyLeaf, isLeaf: true} 132 | } 133 | return PersistedNode{snapshot: node.snapshot, index: node.index - 1} 134 | } 135 | 136 | func (node PersistedNode) SafeHash() []byte { 137 | return utils.Clone(node.Hash()) 138 | } 139 | 140 | func (node PersistedNode) Hash() []byte { 141 | if node.isLeaf { 142 | return node.leafNode().Hash() 143 | } 144 | return node.branchNode().Hash() 145 | } 146 | 147 | func (node PersistedNode) Mutate(version, _ uint32) *MemNode { 148 | if node.isLeaf { 149 | key, value := node.snapshot.LeafKeyValue(node.index) 150 | return &MemNode{ 151 | height: 0, 152 | size: 1, 153 | version: version, 154 | key: key, 155 | value: value, 156 | } 157 | } 158 | data := node.branchNode() 159 | return &MemNode{ 160 | height: data.Height(), 161 | size: int64(data.Size()), 162 | version: version, 163 | key: node.Key(), 164 | left: node.Left(), 165 | right: node.Right(), 166 | } 167 | } 168 | 169 | func (node PersistedNode) Get(key []byte) ([]byte, uint32) { 170 | var start, count uint32 171 | if node.isLeaf { 172 | start = node.index 173 | count = 1 174 | } else { 175 | data := node.branchNode() 176 | preTrees := uint32(data.PreTrees()) 177 | count = data.Size() 178 | start = getStartLeaf(node.index, count, preTrees) 179 | } 180 | 181 | // binary search in the leaf node array 182 | i := uint32(sort.Search(int(count), func(i int) bool { 183 | leafKey := node.snapshot.LeafKey(start + uint32(i)) 184 | return bytes.Compare(leafKey, key) >= 0 185 | })) 186 | 187 | leaf := i + start 188 | if leaf >= start+count { 189 | // return the next index if the key is greater than all keys in the node 190 | return nil, i 191 | } 192 | 193 | nodeKey, value := node.snapshot.LeafKeyValue(leaf) 194 | if !bytes.Equal(nodeKey, key) { 195 | return nil, i 196 | } 197 | 198 | return value, i 199 | } 200 | 201 | func (node PersistedNode) GetByIndex(leafIndex uint32) ([]byte, []byte) { 202 | if node.isLeaf { 203 | if leafIndex != 0 { 204 | return nil, nil 205 | } 206 | return node.snapshot.LeafKeyValue(node.index) 207 | } 208 | data := node.branchNode() 209 | preTrees := uint32(data.PreTrees()) 210 | startLeaf := getStartLeaf(node.index, data.Size(), preTrees) 211 | endLeaf := getEndLeaf(node.index, preTrees) 212 | 213 | i := startLeaf + leafIndex 214 | if i > endLeaf { 215 | return nil, nil 216 | } 217 | return node.snapshot.LeafKeyValue(i) 218 | } 219 | 220 | // getStartLeaf returns the index of the first leaf in the node. 221 | // 222 | // > start leaf = pre leaves 223 | // > = pre branches + pre trees 224 | // > = total branches - sub branches + pre trees 225 | // > = (index + 1) - (size - 1) + preTrees 226 | // > = index + 2 - size + preTrees 227 | func getStartLeaf(index, size, preTrees uint32) uint32 { 228 | return index + 2 - size + preTrees 229 | } 230 | 231 | // getEndLeaf returns the index of the last leaf in the node. 232 | // 233 | // > end leaf = start leaf + size - 1 234 | // > = (index + 2 - size + preTrees) + size - 1 235 | // > = index + 1 + preTrees 236 | func getEndLeaf(index, preTrees uint32) uint32 { 237 | return index + preTrees + 1 238 | } 239 | 240 | // getLeftBranch returns the index of the left branch of the node. 241 | // 242 | // > left branch = pre branches + left branches - 1 243 | // > = (total branches - sub branches) + (left leaves - 1) - 1 244 | // > = (total branches - sub branches) + (key leaf - start leaf - 1) - 1 245 | // > = (index+1 - (size-1)) + (key leaf - (index + 2 - size + preTrees) - 1) - 1 246 | // > = (index - size + 2) + key leaf - index - 2 + size - preTrees - 2 247 | // > = key leaf - preTrees - 2 248 | func getLeftBranch(keyLeaf, preTrees uint32) uint32 { 249 | return keyLeaf - preTrees - 2 250 | } 251 | -------------------------------------------------------------------------------- /sc/memiavl/proof.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "errors" 7 | "fmt" 8 | 9 | ics23 "github.com/confio/ics23/go" 10 | "github.com/cosmos/iavl" 11 | ) 12 | 13 | /* 14 | GetMembershipProof will produce a CommitmentProof that the given key (and queries value) exists in the iavl tree. 15 | If the key doesn't exist in the tree, this will return an error. 16 | */ 17 | func (t *Tree) GetMembershipProof(key []byte) (*ics23.CommitmentProof, error) { 18 | exist, err := t.createExistenceProof(key) 19 | if err != nil { 20 | return nil, err 21 | } 22 | proof := &ics23.CommitmentProof{ 23 | Proof: &ics23.CommitmentProof_Exist{ 24 | Exist: exist, 25 | }, 26 | } 27 | return proof, nil 28 | } 29 | 30 | // VerifyMembership returns true iff proof is an ExistenceProof for the given key. 31 | func (t *Tree) VerifyMembership(proof *ics23.CommitmentProof, key []byte) bool { 32 | val := t.Get(key) 33 | root := t.RootHash() 34 | return ics23.VerifyMembership(ics23.IavlSpec, root, proof, key, val) 35 | } 36 | 37 | /* 38 | GetNonMembershipProof will produce a CommitmentProof that the given key doesn't exist in the iavl tree. 39 | If the key exists in the tree, this will return an error. 40 | */ 41 | func (t *Tree) GetNonMembershipProof(key []byte) (*ics23.CommitmentProof, error) { 42 | // idx is one node right of what we want.... 43 | var err error 44 | idx, val := t.GetWithIndex(key) 45 | if val != nil { 46 | return nil, fmt.Errorf("cannot create NonExistanceProof when Key in State") 47 | } 48 | 49 | nonexist := &ics23.NonExistenceProof{ 50 | Key: key, 51 | } 52 | 53 | if idx >= 1 { 54 | leftkey, _ := t.GetByIndex(idx - 1) 55 | nonexist.Left, err = t.createExistenceProof(leftkey) 56 | if err != nil { 57 | return nil, err 58 | } 59 | } 60 | 61 | // this will be nil if nothing right of the queried key 62 | rightkey, _ := t.GetByIndex(idx) 63 | if rightkey != nil { 64 | nonexist.Right, err = t.createExistenceProof(rightkey) 65 | if err != nil { 66 | return nil, err 67 | } 68 | } 69 | 70 | proof := &ics23.CommitmentProof{ 71 | Proof: &ics23.CommitmentProof_Nonexist{ 72 | Nonexist: nonexist, 73 | }, 74 | } 75 | return proof, nil 76 | } 77 | 78 | // VerifyNonMembership returns true iff proof is a NonExistenceProof for the given key. 79 | func (t *Tree) VerifyNonMembership(proof *ics23.CommitmentProof, key []byte) bool { 80 | root := t.RootHash() 81 | return ics23.VerifyNonMembership(ics23.IavlSpec, root, proof, key) 82 | } 83 | 84 | // createExistenceProof will get the proof from the tree and convert the proof into a valid 85 | // existence proof, if that's what it is. 86 | func (t *Tree) createExistenceProof(key []byte) (*ics23.ExistenceProof, error) { 87 | path, node, err := pathToLeaf(t.root, key) 88 | return &ics23.ExistenceProof{ 89 | Key: node.Key(), 90 | Value: node.Value(), 91 | Leaf: convertLeafOp(int64(node.Version())), 92 | Path: convertInnerOps(path), 93 | }, err 94 | } 95 | 96 | func convertLeafOp(version int64) *ics23.LeafOp { 97 | // this is adapted from iavl/proof.go:proofLeafNode.Hash() 98 | prefix := convertVarIntToBytes(0) 99 | prefix = append(prefix, convertVarIntToBytes(1)...) 100 | prefix = append(prefix, convertVarIntToBytes(version)...) 101 | 102 | return &ics23.LeafOp{ 103 | Hash: ics23.HashOp_SHA256, 104 | PrehashValue: ics23.HashOp_SHA256, 105 | Length: ics23.LengthOp_VAR_PROTO, 106 | Prefix: prefix, 107 | } 108 | } 109 | 110 | // we cannot get the proofInnerNode type, so we need to do the whole path in one function 111 | func convertInnerOps(path iavl.PathToLeaf) []*ics23.InnerOp { 112 | steps := make([]*ics23.InnerOp, 0, len(path)) 113 | 114 | // lengthByte is the length prefix prepended to each of the sha256 sub-hashes 115 | var lengthByte byte = 0x20 116 | 117 | // we need to go in reverse order, iavl starts from root to leaf, 118 | // we want to go up from the leaf to the root 119 | for i := len(path) - 1; i >= 0; i-- { 120 | // this is adapted from iavl/proof.go:proofInnerNode.Hash() 121 | prefix := convertVarIntToBytes(int64(path[i].Height)) 122 | prefix = append(prefix, convertVarIntToBytes(path[i].Size)...) 123 | prefix = append(prefix, convertVarIntToBytes(path[i].Version)...) 124 | 125 | var suffix []byte 126 | if len(path[i].Left) > 0 { 127 | // length prefixed left side 128 | prefix = append(prefix, lengthByte) 129 | prefix = append(prefix, path[i].Left...) 130 | // prepend the length prefix for child 131 | prefix = append(prefix, lengthByte) 132 | } else { 133 | // prepend the length prefix for child 134 | prefix = append(prefix, lengthByte) 135 | // length-prefixed right side 136 | suffix = []byte{lengthByte} 137 | suffix = append(suffix, path[i].Right...) 138 | } 139 | 140 | op := &ics23.InnerOp{ 141 | Hash: ics23.HashOp_SHA256, 142 | Prefix: prefix, 143 | Suffix: suffix, 144 | } 145 | steps = append(steps, op) 146 | } 147 | return steps 148 | } 149 | 150 | func convertVarIntToBytes(orig int64) []byte { 151 | var buf [binary.MaxVarintLen64]byte 152 | n := binary.PutVarint(buf[:], orig) 153 | return buf[:n] 154 | } 155 | 156 | func pathToLeaf(node Node, key []byte) (iavl.PathToLeaf, Node, error) { 157 | var path iavl.PathToLeaf 158 | 159 | for { 160 | height := node.Height() 161 | if height == 0 { 162 | if bytes.Equal(node.Key(), key) { 163 | return path, node, nil 164 | } 165 | 166 | return path, node, errors.New("key does not exist") 167 | } 168 | 169 | if bytes.Compare(key, node.Key()) < 0 { 170 | // left side 171 | right := node.Right() 172 | path = append(path, iavl.ProofInnerNode{ 173 | Height: int8(height), 174 | Size: node.Size(), 175 | Version: int64(node.Version()), 176 | Left: nil, 177 | Right: right.Hash(), 178 | }) 179 | node = node.Left() 180 | continue 181 | } 182 | 183 | // right side 184 | left := node.Left() 185 | path = append(path, iavl.ProofInnerNode{ 186 | Height: int8(height), 187 | Size: node.Size(), 188 | Version: int64(node.Version()), 189 | Left: left.Hash(), 190 | Right: nil, 191 | }) 192 | node = node.Right() 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /sc/memiavl/proof_test.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "context" 5 | "strconv" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestProofs(t *testing.T) { 12 | // do a round test for each version in ChangeSets 13 | testCases := []struct { 14 | existKey []byte 15 | nonExistKey []byte 16 | }{ 17 | {[]byte("hello"), []byte("hello1")}, 18 | {[]byte("hello1"), []byte("hello2")}, 19 | {[]byte("hello2"), []byte("hell")}, 20 | {[]byte("hello00"), []byte("hell")}, 21 | {[]byte("hello00"), []byte("hello")}, 22 | {[]byte("aello00"), []byte("hello")}, 23 | {[]byte("hello1"), []byte("aello00")}, 24 | } 25 | 26 | tmpDir := t.TempDir() 27 | tree := New(0) 28 | 29 | for i, tc := range testCases { 30 | t.Run(strconv.Itoa(i), func(t *testing.T) { 31 | changes := ChangeSets[i] 32 | tree.ApplyChangeSet(changes) 33 | _, _, err := tree.SaveVersion(true) 34 | require.NoError(t, err) 35 | 36 | proof, err := tree.GetMembershipProof(tc.existKey) 37 | require.NoError(t, err) 38 | require.True(t, tree.VerifyMembership(proof, tc.existKey)) 39 | 40 | proof, err = tree.GetNonMembershipProof(tc.nonExistKey) 41 | require.NoError(t, err) 42 | require.True(t, tree.VerifyNonMembership(proof, tc.nonExistKey)) 43 | 44 | // test persisted tree 45 | require.NoError(t, tree.WriteSnapshot(context.Background(), tmpDir)) 46 | snapshot, err := OpenSnapshot(tmpDir) 47 | require.NoError(t, err) 48 | ptree := NewFromSnapshot(snapshot, true, 0) 49 | defer ptree.Close() 50 | 51 | proof, err = ptree.GetMembershipProof(tc.existKey) 52 | require.NoError(t, err) 53 | require.True(t, ptree.VerifyMembership(proof, tc.existKey)) 54 | 55 | proof, err = ptree.GetNonMembershipProof(tc.nonExistKey) 56 | require.NoError(t, err) 57 | require.True(t, ptree.VerifyNonMembership(proof, tc.nonExistKey)) 58 | }) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /sc/memiavl/snapshot_test.go: -------------------------------------------------------------------------------- 1 | package memiavl 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "testing" 7 | 8 | errorutils "github.com/sei-protocol/sei-db/common/errors" 9 | "github.com/sei-protocol/sei-db/common/logger" 10 | "github.com/sei-protocol/sei-db/proto" 11 | "github.com/sei-protocol/sei-db/sc/types" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestSnapshotEncodingRoundTrip(t *testing.T) { 16 | // setup test tree 17 | tree := New(0) 18 | for _, changes := range ChangeSets[:len(ChangeSets)-1] { 19 | tree.ApplyChangeSet(changes) 20 | _, _, err := tree.SaveVersion(true) 21 | require.NoError(t, err) 22 | } 23 | 24 | snapshotDir := t.TempDir() 25 | require.NoError(t, tree.WriteSnapshot(context.Background(), snapshotDir)) 26 | 27 | snapshot, err := OpenSnapshot(snapshotDir) 28 | require.NoError(t, err) 29 | 30 | tree2 := NewFromSnapshot(snapshot, true, 0) 31 | 32 | require.Equal(t, tree.Version(), tree2.Version()) 33 | require.Equal(t, tree.RootHash(), tree2.RootHash()) 34 | 35 | // verify all the node hashes in snapshot 36 | for i := 0; i < snapshot.nodesLen(); i++ { 37 | node := snapshot.Node(uint32(i)) 38 | require.Equal(t, node.Hash(), HashNode(node)) 39 | } 40 | 41 | require.NoError(t, snapshot.Close()) 42 | 43 | // test modify tree loaded from snapshot 44 | snapshot, err = OpenSnapshot(snapshotDir) 45 | require.NoError(t, err) 46 | tree3 := NewFromSnapshot(snapshot, true, 0) 47 | tree3.ApplyChangeSet(ChangeSets[len(ChangeSets)-1]) 48 | hash, v, err := tree3.SaveVersion(true) 49 | require.NoError(t, err) 50 | require.Equal(t, RefHashes[len(ChangeSets)-1], hash) 51 | require.Equal(t, len(ChangeSets), int(v)) 52 | require.NoError(t, snapshot.Close()) 53 | } 54 | 55 | func TestSnapshotExport(t *testing.T) { 56 | expNodes := []*types.SnapshotNode{ 57 | {Key: []byte("hello"), Value: []byte("world1"), Version: 2, Height: 0}, 58 | {Key: []byte("hello1"), Value: []byte("world1"), Version: 2, Height: 0}, 59 | {Key: []byte("hello1"), Value: nil, Version: 3, Height: 1}, 60 | {Key: []byte("hello2"), Value: []byte("world1"), Version: 3, Height: 0}, 61 | {Key: []byte("hello3"), Value: []byte("world1"), Version: 3, Height: 0}, 62 | {Key: []byte("hello3"), Value: nil, Version: 3, Height: 1}, 63 | {Key: []byte("hello2"), Value: nil, Version: 3, Height: 2}, 64 | } 65 | 66 | // setup test tree 67 | tree := New(0) 68 | for _, changes := range ChangeSets[:3] { 69 | tree.ApplyChangeSet(changes) 70 | _, _, err := tree.SaveVersion(true) 71 | require.NoError(t, err) 72 | } 73 | 74 | snapshotDir := t.TempDir() 75 | require.NoError(t, tree.WriteSnapshot(context.Background(), snapshotDir)) 76 | 77 | snapshot, err := OpenSnapshot(snapshotDir) 78 | require.NoError(t, err) 79 | 80 | var nodes []*types.SnapshotNode 81 | exporter := snapshot.Export() 82 | for { 83 | node, err := exporter.Next() 84 | if errors.Is(err, errorutils.ErrorExportDone) { 85 | break 86 | } 87 | require.NoError(t, err) 88 | nodes = append(nodes, node) 89 | } 90 | 91 | require.Equal(t, expNodes, nodes) 92 | } 93 | 94 | func TestSnapshotImportExport(t *testing.T) { 95 | // setup test tree 96 | tree := New(0) 97 | for _, changes := range ChangeSets { 98 | tree.ApplyChangeSet(changes) 99 | _, _, err := tree.SaveVersion(true) 100 | require.NoError(t, err) 101 | } 102 | 103 | snapshotDir := t.TempDir() 104 | require.NoError(t, tree.WriteSnapshot(context.Background(), snapshotDir)) 105 | snapshot, err := OpenSnapshot(snapshotDir) 106 | require.NoError(t, err) 107 | 108 | ch := make(chan *types.SnapshotNode) 109 | 110 | go func() { 111 | defer close(ch) 112 | 113 | exporter := snapshot.Export() 114 | for { 115 | node, err := exporter.Next() 116 | if err == errorutils.ErrorExportDone { 117 | break 118 | } 119 | require.NoError(t, err) 120 | ch <- node 121 | } 122 | }() 123 | 124 | snapshotDir2 := t.TempDir() 125 | err = doImport(snapshotDir2, tree.Version(), ch) 126 | require.NoError(t, err) 127 | 128 | snapshot2, err := OpenSnapshot(snapshotDir2) 129 | require.NoError(t, err) 130 | require.Equal(t, snapshot.RootNode().Hash(), snapshot2.RootNode().Hash()) 131 | 132 | // verify all the node hashes in snapshot 133 | for i := 0; i < snapshot2.nodesLen(); i++ { 134 | node := snapshot2.Node(uint32(i)) 135 | require.Equal(t, node.Hash(), HashNode(node)) 136 | } 137 | } 138 | 139 | func TestDBSnapshotRestore(t *testing.T) { 140 | db, err := OpenDB(logger.NewNopLogger(), 0, Options{ 141 | Dir: t.TempDir(), 142 | CreateIfMissing: true, 143 | InitialStores: []string{"test", "test2"}, 144 | AsyncCommitBuffer: -1, 145 | }) 146 | require.NoError(t, err) 147 | 148 | for _, changes := range ChangeSets { 149 | cs := []*proto.NamedChangeSet{ 150 | { 151 | Name: "test", 152 | Changeset: changes, 153 | }, 154 | { 155 | Name: "test2", 156 | Changeset: changes, 157 | }, 158 | } 159 | require.NoError(t, db.ApplyChangeSets(cs)) 160 | _, err := db.Commit() 161 | require.NoError(t, err) 162 | testSnapshotRoundTrip(t, db) 163 | } 164 | 165 | require.NoError(t, db.RewriteSnapshot(context.Background())) 166 | require.NoError(t, db.Reload()) 167 | require.Equal(t, len(ChangeSets), int(db.metadata.CommitInfo.Version)) 168 | testSnapshotRoundTrip(t, db) 169 | } 170 | 171 | func testSnapshotRoundTrip(t *testing.T, db *DB) { 172 | exporter, err := NewMultiTreeExporter(db.dir, uint32(db.Version()), true) 173 | require.NoError(t, err) 174 | 175 | restoreDir := t.TempDir() 176 | importer, err := NewMultiTreeImporter(restoreDir, uint64(db.Version())) 177 | require.NoError(t, err) 178 | 179 | for { 180 | item, err := exporter.Next() 181 | if err == errorutils.ErrorExportDone { 182 | break 183 | } 184 | require.NoError(t, err) 185 | require.NoError(t, importer.Add(item)) 186 | } 187 | 188 | require.NoError(t, importer.Close()) 189 | require.NoError(t, exporter.Close()) 190 | 191 | db2, err := OpenDB(logger.NewNopLogger(), 0, Options{Dir: restoreDir}) 192 | require.NoError(t, err) 193 | require.Equal(t, db.LastCommitInfo(), db2.LastCommitInfo()) 194 | 195 | // the imported db function normally 196 | _, err = db2.Commit() 197 | require.NoError(t, err) 198 | } 199 | -------------------------------------------------------------------------------- /sc/store.go: -------------------------------------------------------------------------------- 1 | package sc 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/common/logger" 7 | "github.com/sei-protocol/sei-db/common/utils" 8 | "github.com/sei-protocol/sei-db/config" 9 | "github.com/sei-protocol/sei-db/proto" 10 | "github.com/sei-protocol/sei-db/sc/memiavl" 11 | "github.com/sei-protocol/sei-db/sc/types" 12 | ) 13 | 14 | var _ types.Committer = (*CommitStore)(nil) 15 | 16 | type CommitStore struct { 17 | logger logger.Logger 18 | db *memiavl.DB 19 | opts memiavl.Options 20 | } 21 | 22 | func NewCommitStore(homeDir string, logger logger.Logger, config config.StateCommitConfig) *CommitStore { 23 | scDir := homeDir 24 | if config.Directory != "" { 25 | scDir = config.Directory 26 | } 27 | opts := memiavl.Options{ 28 | Dir: utils.GetCommitStorePath(scDir), 29 | ZeroCopy: config.ZeroCopy, 30 | AsyncCommitBuffer: config.AsyncCommitBuffer, 31 | SnapshotInterval: config.SnapshotInterval, 32 | SnapshotKeepRecent: config.SnapshotKeepRecent, 33 | SnapshotWriterLimit: config.SnapshotWriterLimit, 34 | CacheSize: config.CacheSize, 35 | CreateIfMissing: true, 36 | } 37 | commitStore := &CommitStore{ 38 | logger: logger, 39 | opts: opts, 40 | } 41 | return commitStore 42 | } 43 | 44 | func (cs *CommitStore) Initialize(initialStores []string) { 45 | cs.opts.InitialStores = initialStores 46 | } 47 | 48 | func (cs *CommitStore) SetInitialVersion(initialVersion int64) error { 49 | return cs.db.SetInitialVersion(initialVersion) 50 | } 51 | 52 | func (cs *CommitStore) Rollback(targetVersion int64) error { 53 | options := cs.opts 54 | options.LoadForOverwriting = true 55 | if cs.db != nil { 56 | cs.db.Close() 57 | } 58 | db, err := memiavl.OpenDB(cs.logger, targetVersion, options) 59 | if err != nil { 60 | return err 61 | } 62 | cs.db = db 63 | return nil 64 | } 65 | 66 | // copyExisting is for creating new memiavl object given existing folder 67 | func (cs *CommitStore) LoadVersion(targetVersion int64, copyExisting bool) (types.Committer, error) { 68 | cs.logger.Info(fmt.Sprintf("SeiDB load target memIAVL version %d, copyExisting = %v\n", targetVersion, copyExisting)) 69 | if copyExisting { 70 | opts := cs.opts 71 | opts.ReadOnly = copyExisting 72 | opts.CreateIfMissing = false 73 | db, err := memiavl.OpenDB(cs.logger, targetVersion, opts) 74 | if err != nil { 75 | return nil, err 76 | } 77 | return &CommitStore{ 78 | logger: cs.logger, 79 | db: db, 80 | opts: opts, 81 | }, nil 82 | } 83 | if cs.db != nil { 84 | cs.db.Close() 85 | } 86 | db, err := memiavl.OpenDB(cs.logger, targetVersion, cs.opts) 87 | if err != nil { 88 | return nil, err 89 | } 90 | cs.db = db 91 | return cs, nil 92 | } 93 | 94 | func (cs *CommitStore) Commit() (int64, error) { 95 | return cs.db.Commit() 96 | } 97 | 98 | func (cs *CommitStore) Version() int64 { 99 | return cs.db.Version() 100 | } 101 | 102 | func (cs *CommitStore) GetLatestVersion() (int64, error) { 103 | return memiavl.GetLatestVersion(cs.opts.Dir) 104 | } 105 | 106 | func (cs *CommitStore) GetEarliestVersion() (int64, error) { 107 | return memiavl.GetEarliestVersion(cs.opts.Dir) 108 | } 109 | 110 | func (cs *CommitStore) ApplyChangeSets(changesets []*proto.NamedChangeSet) error { 111 | return cs.db.ApplyChangeSets(changesets) 112 | } 113 | 114 | func (cs *CommitStore) ApplyUpgrades(upgrades []*proto.TreeNameUpgrade) error { 115 | return cs.db.ApplyUpgrades(upgrades) 116 | } 117 | 118 | func (cs *CommitStore) WorkingCommitInfo() *proto.CommitInfo { 119 | return cs.db.WorkingCommitInfo() 120 | } 121 | 122 | func (cs *CommitStore) LastCommitInfo() *proto.CommitInfo { 123 | return cs.db.LastCommitInfo() 124 | } 125 | 126 | func (cs *CommitStore) GetTreeByName(name string) types.Tree { 127 | return cs.db.TreeByName(name) 128 | } 129 | 130 | func (cs *CommitStore) Exporter(version int64) (types.Exporter, error) { 131 | exporter, err := memiavl.NewMultiTreeExporter(cs.opts.Dir, uint32(version), true) 132 | if err != nil { 133 | return nil, err 134 | } 135 | return exporter, nil 136 | } 137 | 138 | func (cs *CommitStore) Importer(version int64) (types.Importer, error) { 139 | treeImporter, err := memiavl.NewMultiTreeImporter(cs.opts.Dir, uint64(version)) 140 | if err != nil { 141 | return nil, err 142 | } 143 | return treeImporter, nil 144 | } 145 | 146 | func (cs *CommitStore) Close() error { 147 | return cs.db.Close() 148 | } 149 | -------------------------------------------------------------------------------- /sc/types/snapshot.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import "io" 4 | 5 | type Importer interface { 6 | AddTree(name string) error 7 | 8 | AddNode(node *SnapshotNode) 9 | 10 | io.Closer 11 | } 12 | 13 | type Exporter interface { 14 | Next() (interface{}, error) 15 | 16 | io.Closer 17 | } 18 | 19 | // SnapshotNode contains import/export node data. 20 | type SnapshotNode struct { 21 | Key []byte 22 | Value []byte 23 | Version int64 24 | Height int8 25 | } 26 | -------------------------------------------------------------------------------- /sc/types/store.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/sei-protocol/sei-db/proto" 7 | ) 8 | 9 | type Committer interface { 10 | Initialize(initialStores []string) 11 | 12 | Commit() (int64, error) 13 | 14 | Version() int64 15 | 16 | GetLatestVersion() (int64, error) 17 | 18 | GetEarliestVersion() (int64, error) 19 | 20 | ApplyChangeSets(cs []*proto.NamedChangeSet) error 21 | 22 | ApplyUpgrades(upgrades []*proto.TreeNameUpgrade) error 23 | 24 | WorkingCommitInfo() *proto.CommitInfo 25 | 26 | LastCommitInfo() *proto.CommitInfo 27 | 28 | LoadVersion(targetVersion int64, copyExisting bool) (Committer, error) 29 | 30 | Rollback(targetVersion int64) error 31 | 32 | SetInitialVersion(initialVersion int64) error 33 | 34 | GetTreeByName(name string) Tree 35 | 36 | Importer(version int64) (Importer, error) 37 | 38 | Exporter(version int64) (Exporter, error) 39 | 40 | io.Closer 41 | } 42 | -------------------------------------------------------------------------------- /sc/types/tree.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import ( 4 | "io" 5 | 6 | ics23 "github.com/confio/ics23/go" 7 | dbm "github.com/tendermint/tm-db" 8 | ) 9 | 10 | type Tree interface { 11 | Get(key []byte) []byte 12 | 13 | Has(key []byte) bool 14 | 15 | Set(key, value []byte) 16 | 17 | Remove(key []byte) 18 | 19 | Version() int64 20 | 21 | RootHash() []byte 22 | 23 | Iterator(start, end []byte, ascending bool) dbm.Iterator 24 | 25 | GetProof(key []byte) *ics23.CommitmentProof 26 | 27 | io.Closer 28 | } 29 | -------------------------------------------------------------------------------- /ss/pebbledb/batch.go: -------------------------------------------------------------------------------- 1 | package pebbledb 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | 7 | "github.com/cockroachdb/pebble" 8 | "github.com/sei-protocol/sei-db/common/errors" 9 | ) 10 | 11 | type Batch struct { 12 | storage *pebble.DB 13 | batch *pebble.Batch 14 | version int64 15 | } 16 | 17 | func NewBatch(storage *pebble.DB, version int64) (*Batch, error) { 18 | var versionBz [VersionSize]byte 19 | binary.LittleEndian.PutUint64(versionBz[:], uint64(version)) 20 | 21 | batch := storage.NewBatch() 22 | 23 | if err := batch.Set([]byte(latestVersionKey), versionBz[:], nil); err != nil { 24 | return nil, fmt.Errorf("failed to write PebbleDB batch: %w", err) 25 | } 26 | 27 | return &Batch{ 28 | storage: storage, 29 | batch: batch, 30 | version: version, 31 | }, nil 32 | } 33 | 34 | func (b *Batch) Size() int { 35 | return b.batch.Len() 36 | } 37 | 38 | func (b *Batch) Reset() { 39 | b.batch.Reset() 40 | } 41 | 42 | func (b *Batch) set(storeKey string, tombstone int64, key, value []byte) error { 43 | prefixedKey := MVCCEncode(prependStoreKey(storeKey, key), b.version) 44 | prefixedVal := MVCCEncode(value, tombstone) 45 | 46 | if err := b.batch.Set(prefixedKey, prefixedVal, nil); err != nil { 47 | return fmt.Errorf("failed to write PebbleDB batch: %w", err) 48 | } 49 | 50 | return nil 51 | } 52 | 53 | func (b *Batch) Set(storeKey string, key, value []byte) error { 54 | return b.set(storeKey, 0, key, value) 55 | } 56 | 57 | func (b *Batch) Delete(storeKey string, key []byte) error { 58 | return b.set(storeKey, b.version, key, []byte(tombstoneVal)) 59 | } 60 | 61 | func (b *Batch) Write() (err error) { 62 | defer func() { 63 | err = errors.Join(err, b.batch.Close()) 64 | }() 65 | 66 | return b.batch.Commit(defaultWriteOpts) 67 | } 68 | 69 | // For writing kv pairs in any order of version 70 | type RawBatch struct { 71 | storage *pebble.DB 72 | batch *pebble.Batch 73 | } 74 | 75 | func NewRawBatch(storage *pebble.DB) (*RawBatch, error) { 76 | batch := storage.NewBatch() 77 | 78 | return &RawBatch{ 79 | storage: storage, 80 | batch: batch, 81 | }, nil 82 | } 83 | 84 | func (b *RawBatch) Size() int { 85 | return b.batch.Len() 86 | } 87 | 88 | func (b *RawBatch) Reset() { 89 | b.batch.Reset() 90 | } 91 | 92 | func (b *RawBatch) set(storeKey string, tombstone int64, key, value []byte, version int64) error { 93 | prefixedKey := MVCCEncode(prependStoreKey(storeKey, key), version) 94 | prefixedVal := MVCCEncode(value, tombstone) 95 | 96 | if err := b.batch.Set(prefixedKey, prefixedVal, nil); err != nil { 97 | return fmt.Errorf("failed to write PebbleDB batch: %w", err) 98 | } 99 | 100 | return nil 101 | } 102 | 103 | func (b *RawBatch) Set(storeKey string, key, value []byte, version int64) error { 104 | return b.set(storeKey, 0, key, value, version) 105 | } 106 | 107 | func (b *RawBatch) Delete(storeKey string, key []byte, version int64) error { 108 | return b.set(storeKey, version, key, []byte(tombstoneVal), version) 109 | } 110 | 111 | // HardDelete physically removes the key by encoding it with the batch’s version 112 | // and calling the underlying pebble.Batch.Delete. 113 | func (b *Batch) HardDelete(storeKey string, key []byte) error { 114 | fullKey := MVCCEncode(prependStoreKey(storeKey, key), b.version) 115 | if err := b.batch.Delete(fullKey, nil); err != nil { 116 | return fmt.Errorf("failed to hard delete key: %w", err) 117 | } 118 | return nil 119 | } 120 | 121 | func (b *RawBatch) Write() (err error) { 122 | defer func() { 123 | err = errors.Join(err, b.batch.Close()) 124 | }() 125 | 126 | return b.batch.Commit(defaultWriteOpts) 127 | } 128 | -------------------------------------------------------------------------------- /ss/pebbledb/bench_test.go: -------------------------------------------------------------------------------- 1 | package pebbledb 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/sei-protocol/sei-db/config" 7 | sstest "github.com/sei-protocol/sei-db/ss/test" 8 | "github.com/sei-protocol/sei-db/ss/types" 9 | ) 10 | 11 | func BenchmarkDBBackend(b *testing.B) { 12 | s := &sstest.StorageBenchSuite{ 13 | NewDB: func(dir string) (types.StateStore, error) { 14 | return New(dir, config.DefaultStateStoreConfig()) 15 | }, 16 | BenchBackendName: "PebbleDB", 17 | } 18 | 19 | s.BenchmarkGet(b) 20 | s.BenchmarkApplyChangeset(b) 21 | s.BenchmarkIterate(b) 22 | } 23 | -------------------------------------------------------------------------------- /ss/pebbledb/comparator.go: -------------------------------------------------------------------------------- 1 | package pebbledb 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "fmt" 7 | 8 | "github.com/cockroachdb/pebble" 9 | "github.com/sei-protocol/sei-db/common/utils" 10 | ) 11 | 12 | // MVCCComparer returns a PebbleDB Comparer with encoding and decoding routines 13 | // for MVCC control, used to compare and store versioned keys. 14 | // 15 | // Note: This Comparer implementation is largely based on PebbleDB's internal 16 | // MVCC example, which can be found here: 17 | // https://github.com/cockroachdb/pebble/blob/master/cmd/pebble/mvcc.go 18 | var MVCCComparer = &pebble.Comparer{ 19 | Name: "ss_pebbledb_comparator", 20 | 21 | Compare: MVCCKeyCompare, 22 | 23 | AbbreviatedKey: func(k []byte) uint64 { 24 | key, _, ok := SplitMVCCKey(k) 25 | if !ok { 26 | return 0 27 | } 28 | 29 | return pebble.DefaultComparer.AbbreviatedKey(key) 30 | }, 31 | 32 | Equal: func(a, b []byte) bool { 33 | return MVCCKeyCompare(a, b) == 0 34 | }, 35 | 36 | Separator: func(dst, a, b []byte) []byte { 37 | aKey, _, ok := SplitMVCCKey(a) 38 | if !ok { 39 | return append(dst, a...) 40 | } 41 | 42 | bKey, _, ok := SplitMVCCKey(b) 43 | if !ok { 44 | return append(dst, a...) 45 | } 46 | 47 | // if the keys are the same just return a 48 | if bytes.Equal(aKey, bKey) { 49 | return append(dst, a...) 50 | } 51 | 52 | n := len(dst) 53 | 54 | // MVCC key comparison uses bytes.Compare on the roachpb.Key, which is the 55 | // same semantics as pebble.DefaultComparer, so reuse the latter's Separator 56 | // implementation. 57 | dst = pebble.DefaultComparer.Separator(dst, aKey, bKey) 58 | 59 | // Did we pick a separator different than aKey? If we did not, we can't do 60 | // better than a. 61 | buf := dst[n:] 62 | if bytes.Equal(aKey, buf) { 63 | return append(dst[:n], a...) 64 | } 65 | 66 | // The separator is > aKey, so we only need to add the timestamp sentinel. 67 | return append(dst, 0) 68 | }, 69 | 70 | ImmediateSuccessor: func(dst, a []byte) []byte { 71 | // The key `a` is guaranteed to be a bare prefix: It's a key without a version 72 | // — just a trailing 0-byte to signify the length of the version. For example 73 | // the user key "foo" is encoded as: "foo\0". We need to encode the immediate 74 | // successor to "foo", which in the natural byte ordering is "foo\0". Append 75 | // a single additional zero, to encode the user key "foo\0" with a zero-length 76 | // version. 77 | return append(append(dst, a...), 0) 78 | }, 79 | 80 | Successor: func(dst, a []byte) []byte { 81 | aKey, _, ok := SplitMVCCKey(a) 82 | if !ok { 83 | return append(dst, a...) 84 | } 85 | 86 | n := len(dst) 87 | 88 | // MVCC key comparison uses bytes.Compare on the roachpb.Key, which is the 89 | // same semantics as pebble.DefaultComparer, so reuse the latter's Successor 90 | // implementation. 91 | dst = pebble.DefaultComparer.Successor(dst, aKey) 92 | 93 | // Did we pick a successor different than aKey? If we did not, we can't do 94 | // better than a. 95 | buf := dst[n:] 96 | if bytes.Equal(aKey, buf) { 97 | return append(dst[:n], a...) 98 | } 99 | 100 | // The successor is > aKey, so we only need to add the timestamp sentinel. 101 | return append(dst, 0) 102 | }, 103 | 104 | FormatKey: func(k []byte) fmt.Formatter { 105 | return mvccKeyFormatter{key: k} 106 | }, 107 | 108 | Split: func(k []byte) int { 109 | key, _, ok := SplitMVCCKey(k) 110 | if !ok { 111 | return len(k) 112 | } 113 | 114 | // This matches the behavior of libroach/KeyPrefix. RocksDB requires that 115 | // keys generated via a SliceTransform be comparable with normal encoded 116 | // MVCC keys. Encoded MVCC keys have a suffix indicating the number of 117 | // bytes of timestamp data. MVCC keys without a timestamp have a suffix of 118 | // 0. We're careful in EncodeKey to make sure that the user-key always has 119 | // a trailing 0. If there is no timestamp this falls out naturally. If 120 | // there is a timestamp we prepend a 0 to the encoded timestamp data. 121 | return len(key) + 1 122 | }, 123 | } 124 | 125 | type mvccKeyFormatter struct { 126 | key []byte 127 | } 128 | 129 | func (f mvccKeyFormatter) Format(s fmt.State, verb rune) { 130 | k, vBz, ok := SplitMVCCKey(f.key) 131 | if ok { 132 | v, _ := decodeUint64Ascending(vBz) 133 | fmt.Fprintf(s, "%s/%d", k, v) 134 | } else { 135 | fmt.Fprintf(s, "%s", f.key) 136 | } 137 | } 138 | 139 | // SplitMVCCKey accepts an MVCC key and returns the "user" key, the MVCC version, 140 | // and a boolean indicating if the provided key is an MVCC key. 141 | // 142 | // Note, internally, we must make a copy of the provided mvccKey argument, which 143 | // typically comes from the Key() method as it's not safe. 144 | func SplitMVCCKey(mvccKey []byte) (key, version []byte, ok bool) { 145 | if len(mvccKey) == 0 { 146 | return nil, nil, false 147 | } 148 | 149 | mvccKeyCopy := utils.Clone(mvccKey) 150 | 151 | n := len(mvccKeyCopy) - 1 152 | tsLen := int(mvccKeyCopy[n]) 153 | if n < tsLen { 154 | return nil, nil, false 155 | } 156 | 157 | key = mvccKeyCopy[:n-tsLen] 158 | if tsLen > 0 { 159 | version = mvccKeyCopy[n-tsLen+1 : len(mvccKeyCopy)-1] 160 | } 161 | 162 | return key, version, true 163 | } 164 | 165 | // MVCCKeyCompare compares two MVCC keys. 166 | func MVCCKeyCompare(a, b []byte) int { 167 | aEnd := len(a) - 1 168 | bEnd := len(b) - 1 169 | if aEnd < 0 || bEnd < 0 { 170 | // This should never happen unless there is some sort of corruption of 171 | // the keys. This is a little bizarre, but the behavior exactly matches 172 | // engine/db.cc:DBComparator. 173 | return bytes.Compare(a, b) 174 | } 175 | 176 | // Compute the index of the separator between the key and the timestamp. 177 | aSep := aEnd - int(a[aEnd]) 178 | bSep := bEnd - int(b[bEnd]) 179 | if aSep < 0 || bSep < 0 { 180 | // This should never happen unless there is some sort of corruption of 181 | // the keys. This is a little bizarre, but the behavior exactly matches 182 | // engine/db.cc:DBComparator. 183 | return bytes.Compare(a, b) 184 | } 185 | 186 | // compare the "user key" part of the key 187 | if c := bytes.Compare(a[:aSep], b[:bSep]); c != 0 { 188 | return c 189 | } 190 | 191 | // compare the timestamp part of the key 192 | aTS := a[aSep:aEnd] 193 | bTS := b[bSep:bEnd] 194 | if len(aTS) == 0 { 195 | if len(bTS) == 0 { 196 | return 0 197 | } 198 | return -1 199 | } else if len(bTS) == 0 { 200 | return 1 201 | } 202 | 203 | return bytes.Compare(aTS, bTS) 204 | } 205 | 206 | // \x00[]<#version-bytes> 207 | func MVCCEncode(key []byte, version int64) (dst []byte) { 208 | dst = append(dst, key...) 209 | dst = append(dst, 0) 210 | 211 | if version != 0 { 212 | extra := byte(1 + 8) 213 | dst = encodeUint64Ascending(dst, uint64(version)) 214 | dst = append(dst, extra) 215 | } 216 | 217 | return dst 218 | } 219 | 220 | // encodeUint64Ascending encodes the uint64 value using a big-endian 8 byte 221 | // representation. The bytes are appended to the supplied buffer and 222 | // the final buffer is returned. 223 | func encodeUint64Ascending(dst []byte, v uint64) []byte { 224 | return append( 225 | dst, 226 | byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), 227 | byte(v>>24), byte(v>>16), byte(v>>8), byte(v), 228 | ) 229 | } 230 | 231 | // decodeUint64Ascending decodes a int64 from the input buffer, treating 232 | // the input as a big-endian 8 byte uint64 representation. The decoded int64 is 233 | // returned. 234 | func decodeUint64Ascending(b []byte) (int64, error) { 235 | if len(b) < 8 { 236 | return 0, fmt.Errorf("insufficient bytes to decode uint64 int value; expected 8; got %d", len(b)) 237 | } 238 | 239 | v := int64(binary.BigEndian.Uint64(b)) 240 | return v, nil 241 | } 242 | -------------------------------------------------------------------------------- /ss/pebbledb/db_test.go: -------------------------------------------------------------------------------- 1 | package pebbledb 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/sei-protocol/sei-db/config" 7 | sstest "github.com/sei-protocol/sei-db/ss/test" 8 | "github.com/sei-protocol/sei-db/ss/types" 9 | "github.com/stretchr/testify/suite" 10 | ) 11 | 12 | func TestStorageTestSuite(t *testing.T) { 13 | s := &sstest.StorageTestSuite{ 14 | NewDB: func(dir string, config config.StateStoreConfig) (types.StateStore, error) { 15 | return New(dir, config) 16 | }, 17 | Config: config.DefaultStateStoreConfig(), 18 | EmptyBatchSize: 12, 19 | } 20 | 21 | suite.Run(t, s) 22 | } 23 | -------------------------------------------------------------------------------- /ss/pebbledb_init.go: -------------------------------------------------------------------------------- 1 | package ss 2 | 3 | import ( 4 | "github.com/sei-protocol/sei-db/common/utils" 5 | "github.com/sei-protocol/sei-db/config" 6 | "github.com/sei-protocol/sei-db/ss/pebbledb" 7 | "github.com/sei-protocol/sei-db/ss/types" 8 | ) 9 | 10 | func init() { 11 | initializer := func(dir string, configs config.StateStoreConfig) (types.StateStore, error) { 12 | dbHome := utils.GetStateStorePath(dir, configs.Backend) 13 | if configs.DBDirectory != "" { 14 | dbHome = configs.DBDirectory 15 | } 16 | return pebbledb.New(dbHome, configs) 17 | } 18 | RegisterBackend(PebbleDBBackend, initializer) 19 | } 20 | -------------------------------------------------------------------------------- /ss/pruning/manager.go: -------------------------------------------------------------------------------- 1 | package pruning 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "time" 7 | 8 | "github.com/sei-protocol/sei-db/common/logger" 9 | "github.com/sei-protocol/sei-db/ss/types" 10 | ) 11 | 12 | type Manager struct { 13 | logger logger.Logger 14 | stateStore types.StateStore 15 | keepRecent int64 16 | pruneInterval int64 17 | started bool 18 | } 19 | 20 | // NewPruningManager creates a new pruning manager for state store 21 | // Pruning Manager will periodically prune state store based on keep-recent and prune-interval configs. 22 | func NewPruningManager( 23 | logger logger.Logger, 24 | stateStore types.StateStore, 25 | keepRecent int64, 26 | pruneInterval int64, 27 | ) *Manager { 28 | return &Manager{ 29 | logger: logger, 30 | stateStore: stateStore, 31 | keepRecent: keepRecent, 32 | pruneInterval: pruneInterval, 33 | } 34 | } 35 | 36 | func (m *Manager) Start() { 37 | if m.keepRecent <= 0 || m.pruneInterval <= 0 || m.started { 38 | return 39 | } 40 | m.started = true 41 | go func() { 42 | for { 43 | pruneStartTime := time.Now() 44 | latestVersion, _ := m.stateStore.GetLatestVersion() 45 | pruneVersion := latestVersion - m.keepRecent 46 | if pruneVersion > 0 { 47 | // prune all versions up to and including the pruneVersion 48 | if err := m.stateStore.Prune(pruneVersion); err != nil { 49 | m.logger.Error("failed to prune versions till", "version", pruneVersion, "err", err) 50 | } 51 | m.logger.Info(fmt.Sprintf("Pruned state store till version %d took %s\n", pruneVersion, time.Since(pruneStartTime))) 52 | } 53 | 54 | // Generate a random percentage (between 0% and 100%) of the fixed interval as a delay 55 | randomPercentage := rand.Float64() // Generate a random float between 0 and 1 56 | randomDelay := int64(float64(m.pruneInterval) * randomPercentage) 57 | time.Sleep(time.Duration(m.pruneInterval+randomDelay) * time.Second) 58 | } 59 | }() 60 | } 61 | -------------------------------------------------------------------------------- /ss/rocksdb/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sei-protocol/sei-db/a90aa3ade77e93ac53b10d0505f6f4180f233478/ss/rocksdb/README.md -------------------------------------------------------------------------------- /ss/rocksdb/batch.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package rocksdb 5 | 6 | import ( 7 | "encoding/binary" 8 | 9 | "github.com/linxGnu/grocksdb" 10 | ) 11 | 12 | type Batch struct { 13 | version int64 14 | ts [TimestampSize]byte 15 | storage *grocksdb.DB 16 | cfHandle *grocksdb.ColumnFamilyHandle 17 | batch *grocksdb.WriteBatch 18 | } 19 | 20 | // NewBatch creates a new versioned batch used for batch writes. The caller 21 | // must ensure to call Write() on the returned batch to commit the changes and to 22 | // destroy the batch when done. 23 | func NewBatch(db *Database, version int64) Batch { 24 | var ts [TimestampSize]byte 25 | binary.LittleEndian.PutUint64(ts[:], uint64(version)) 26 | 27 | batch := grocksdb.NewWriteBatch() 28 | batch.Put([]byte(latestVersionKey), ts[:]) 29 | 30 | return Batch{ 31 | version: version, 32 | ts: ts, 33 | storage: db.storage, 34 | cfHandle: db.cfHandle, 35 | batch: batch, 36 | } 37 | } 38 | 39 | func (b Batch) Size() int { 40 | return len(b.batch.Data()) 41 | } 42 | 43 | func (b Batch) Reset() { 44 | b.batch.Clear() 45 | } 46 | 47 | func (b Batch) Set(storeKey string, key, value []byte) error { 48 | prefixedKey := prependStoreKey(storeKey, key) 49 | b.batch.PutCFWithTS(b.cfHandle, prefixedKey, b.ts[:], value) 50 | return nil 51 | } 52 | 53 | func (b Batch) Delete(storeKey string, key []byte) error { 54 | prefixedKey := prependStoreKey(storeKey, key) 55 | b.batch.DeleteCFWithTS(b.cfHandle, prefixedKey, b.ts[:]) 56 | return nil 57 | } 58 | 59 | func (b Batch) Write() error { 60 | defer b.batch.Destroy() 61 | return b.storage.Write(defaultWriteOpts, b.batch) 62 | } 63 | -------------------------------------------------------------------------------- /ss/rocksdb/bench_test.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package rocksdb 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/sei-protocol/sei-db/config" 10 | sstest "github.com/sei-protocol/sei-db/ss/test" 11 | "github.com/sei-protocol/sei-db/ss/types" 12 | ) 13 | 14 | func BenchmarkDBBackend(b *testing.B) { 15 | s := &sstest.StorageBenchSuite{ 16 | NewDB: func(dir string) (types.StateStore, error) { 17 | return New(dir, config.DefaultStateStoreConfig()) 18 | }, 19 | BenchBackendName: "RocksDB", 20 | } 21 | 22 | s.BenchmarkGet(b) 23 | s.BenchmarkApplyChangeset(b) 24 | s.BenchmarkIterate(b) 25 | } 26 | -------------------------------------------------------------------------------- /ss/rocksdb/comparator.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package rocksdb 5 | 6 | import ( 7 | "bytes" 8 | "encoding/binary" 9 | 10 | "github.com/linxGnu/grocksdb" 11 | ) 12 | 13 | // CreateTSComparator should behavior identical with RocksDB builtin timestamp comparator. 14 | // We also use the same builtin comparator name so the builtin tools `ldb`/`sst_dump` 15 | // can work with the database. 16 | func CreateTSComparator() *grocksdb.Comparator { 17 | return grocksdb.NewComparatorWithTimestamp( 18 | "leveldb.BytewiseComparator.u64ts", 19 | TimestampSize, 20 | compare, 21 | compareTS, 22 | compareWithoutTS, 23 | ) 24 | } 25 | 26 | // compareTS compares timestamp as little endian encoded integers. 27 | // 28 | // NOTICE: The behavior must be identical to RocksDB builtin comparator 29 | // "leveldb.BytewiseComparator.u64ts". 30 | func compareTS(bz1 []byte, bz2 []byte) int { 31 | ts1 := binary.LittleEndian.Uint64(bz1) 32 | ts2 := binary.LittleEndian.Uint64(bz2) 33 | 34 | switch { 35 | case ts1 < ts2: 36 | return -1 37 | 38 | case ts1 > ts2: 39 | return 1 40 | 41 | default: 42 | return 0 43 | } 44 | } 45 | 46 | // compare compares two internal keys with timestamp suffix, larger timestamp 47 | // comes first. 48 | // 49 | // NOTICE: The behavior must be identical to RocksDB builtin comparator 50 | // "leveldb.BytewiseComparator.u64ts". 51 | func compare(a []byte, b []byte) int { 52 | ret := compareWithoutTS(a, true, b, true) 53 | if ret != 0 { 54 | return ret 55 | } 56 | 57 | // Compare timestamp. For the same user key with different timestamps, larger 58 | // (newer) timestamp comes first, which means seek operation will try to find 59 | // a version less than or equal to the target version. 60 | return -compareTS(a[len(a)-TimestampSize:], b[len(b)-TimestampSize:]) 61 | } 62 | 63 | // compareWithoutTS compares two internal keys without the timestamp part. 64 | // 65 | // NOTICE: the behavior must be identical to RocksDB builtin comparator 66 | // "leveldb.BytewiseComparator.u64ts". 67 | func compareWithoutTS(a []byte, aHasTS bool, b []byte, bHasTS bool) int { 68 | if aHasTS { 69 | a = a[:len(a)-TimestampSize] 70 | } 71 | if bHasTS { 72 | b = b[:len(b)-TimestampSize] 73 | } 74 | 75 | return bytes.Compare(a, b) 76 | } 77 | -------------------------------------------------------------------------------- /ss/rocksdb/db_test.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package rocksdb 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/sei-protocol/sei-db/config" 10 | sstest "github.com/sei-protocol/sei-db/ss/test" 11 | "github.com/sei-protocol/sei-db/ss/types" 12 | "github.com/stretchr/testify/suite" 13 | ) 14 | 15 | func TestStorageTestSuite(t *testing.T) { 16 | s := &sstest.StorageTestSuite{ 17 | NewDB: func(dir string) (types.StateStore, error) { 18 | return New(dir, config.DefaultStateStoreConfig()) 19 | }, 20 | EmptyBatchSize: 12, 21 | } 22 | 23 | suite.Run(t, s) 24 | } 25 | -------------------------------------------------------------------------------- /ss/rocksdb/iterator.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package rocksdb 5 | 6 | import ( 7 | "bytes" 8 | 9 | "github.com/linxGnu/grocksdb" 10 | "github.com/sei-protocol/sei-db/ss/types" 11 | ) 12 | 13 | var _ types.DBIterator = (*iterator)(nil) 14 | 15 | type iterator struct { 16 | source *grocksdb.Iterator 17 | prefix, start, end []byte 18 | reverse bool 19 | invalid bool 20 | } 21 | 22 | func NewRocksDBIterator(source *grocksdb.Iterator, prefix, start, end []byte, reverse bool) *iterator { 23 | if reverse { 24 | if end == nil { 25 | source.SeekToLast() 26 | } else { 27 | source.Seek(end) 28 | 29 | if source.Valid() { 30 | eoaKey := readOnlySlice(source.Key()) // end or after key 31 | if bytes.Compare(end, eoaKey) <= 0 { 32 | source.Prev() 33 | } 34 | } else { 35 | source.SeekToLast() 36 | } 37 | } 38 | } else { 39 | if start == nil { 40 | source.SeekToFirst() 41 | } else { 42 | source.Seek(start) 43 | } 44 | } 45 | 46 | return &iterator{ 47 | source: source, 48 | prefix: prefix, 49 | start: start, 50 | end: end, 51 | reverse: reverse, 52 | invalid: !source.Valid(), 53 | } 54 | } 55 | 56 | // Domain returns the domain of the iterator. The caller must not modify the 57 | // return values. 58 | func (itr *iterator) Domain() ([]byte, []byte) { 59 | start := itr.start 60 | if start != nil { 61 | start = start[len(itr.prefix):] 62 | if len(start) == 0 { 63 | start = nil 64 | } 65 | } 66 | 67 | end := itr.end 68 | if end != nil { 69 | end = end[len(itr.prefix):] 70 | if len(end) == 0 { 71 | end = nil 72 | } 73 | } 74 | 75 | return start, end 76 | } 77 | 78 | func (itr *iterator) Valid() bool { 79 | // once invalid, forever invalid 80 | if itr.invalid { 81 | return false 82 | } 83 | 84 | // if source has error, consider it invalid 85 | if err := itr.source.Err(); err != nil { 86 | itr.invalid = true 87 | return false 88 | } 89 | 90 | // if source is invalid, consider it invalid 91 | if !itr.source.Valid() { 92 | itr.invalid = true 93 | return false 94 | } 95 | 96 | // if key is at the end or past it, consider it invalid 97 | start := itr.start 98 | end := itr.end 99 | key := readOnlySlice(itr.source.Key()) 100 | 101 | if itr.reverse { 102 | if start != nil && bytes.Compare(key, start) < 0 { 103 | itr.invalid = true 104 | return false 105 | } 106 | } else { 107 | if end != nil && bytes.Compare(end, key) <= 0 { 108 | itr.invalid = true 109 | return false 110 | } 111 | } 112 | 113 | return true 114 | } 115 | 116 | func (itr *iterator) Key() []byte { 117 | itr.assertIsValid() 118 | return copyAndFreeSlice(itr.source.Key())[len(itr.prefix):] 119 | } 120 | 121 | func (itr *iterator) Value() []byte { 122 | itr.assertIsValid() 123 | return copyAndFreeSlice(itr.source.Value()) 124 | } 125 | 126 | func (itr iterator) Next() { 127 | if itr.invalid { 128 | return 129 | } 130 | 131 | if itr.reverse { 132 | itr.source.Prev() 133 | } else { 134 | itr.source.Next() 135 | } 136 | } 137 | 138 | func (itr *iterator) Error() error { 139 | return itr.source.Err() 140 | } 141 | 142 | func (itr *iterator) Close() error { 143 | itr.source.Close() 144 | itr.source = nil 145 | itr.invalid = true 146 | return nil 147 | } 148 | 149 | func (itr *iterator) assertIsValid() { 150 | if itr.invalid { 151 | panic("iterator is invalid") 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /ss/rocksdb/opts.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package rocksdb 5 | 6 | import ( 7 | "encoding/binary" 8 | "runtime" 9 | 10 | "github.com/linxGnu/grocksdb" 11 | ) 12 | 13 | const ( 14 | // CFNameStateStorage defines the RocksDB column family name for versioned state 15 | // storage. 16 | CFNameStateStorage = "state_storage" 17 | 18 | // CFNameDefault defines the RocksDB column family name for the default column. 19 | CFNameDefault = "default" 20 | ) 21 | 22 | // NewRocksDBOpts returns the options used for the RocksDB column family for use 23 | // in state storage. 24 | // 25 | // FIXME: We do not enable dict compression for SSTFileWriter, because otherwise 26 | // the file writer won't report correct file size. 27 | // Ref: https://github.com/facebook/rocksdb/issues/11146 28 | func NewRocksDBOpts(sstFileWriter bool) *grocksdb.Options { 29 | opts := grocksdb.NewDefaultOptions() 30 | opts.SetCreateIfMissing(true) 31 | opts.SetComparator(CreateTSComparator()) 32 | opts.IncreaseParallelism(runtime.NumCPU()) 33 | opts.OptimizeLevelStyleCompaction(512 * 1024 * 1024) 34 | opts.SetTargetFileSizeMultiplier(2) 35 | opts.SetLevelCompactionDynamicLevelBytes(true) 36 | 37 | // block based table options 38 | bbto := grocksdb.NewDefaultBlockBasedTableOptions() 39 | 40 | // 1G block cache 41 | bbto.SetBlockSize(32 * 1024) 42 | bbto.SetBlockCache(grocksdb.NewLRUCache(1 << 30)) 43 | 44 | bbto.SetFilterPolicy(grocksdb.NewRibbonHybridFilterPolicy(9.9, 1)) 45 | bbto.SetIndexType(grocksdb.KBinarySearchWithFirstKey) 46 | bbto.SetOptimizeFiltersForMemory(true) 47 | opts.SetBlockBasedTableFactory(bbto) 48 | 49 | // Improve sst file creation speed: compaction or sst file writer. 50 | opts.SetCompressionOptionsParallelThreads(4) 51 | 52 | if !sstFileWriter { 53 | // compression options at bottommost level 54 | opts.SetBottommostCompression(grocksdb.ZSTDCompression) 55 | 56 | compressOpts := grocksdb.NewDefaultCompressionOptions() 57 | compressOpts.MaxDictBytes = 112640 // 110k 58 | compressOpts.Level = 12 59 | 60 | opts.SetBottommostCompressionOptions(compressOpts, true) 61 | opts.SetBottommostCompressionOptionsZstdMaxTrainBytes(compressOpts.MaxDictBytes*100, true) 62 | } 63 | 64 | return opts 65 | } 66 | 67 | // OpenRocksDB opens a RocksDB database connection for versioned reading and writing. 68 | // It also returns a column family handle for versioning using user-defined timestamps. 69 | // The default column family is used for metadata, specifically key/value pairs 70 | // that are stored on another column family named with "state_storage", which has 71 | // user-defined timestamp enabled. 72 | func OpenRocksDB(dataDir string) (*grocksdb.DB, *grocksdb.ColumnFamilyHandle, error) { 73 | opts := grocksdb.NewDefaultOptions() 74 | opts.SetCreateIfMissing(true) 75 | opts.SetCreateIfMissingColumnFamilies(true) 76 | 77 | db, cfHandles, err := grocksdb.OpenDbColumnFamilies( 78 | opts, 79 | dataDir, 80 | []string{ 81 | CFNameDefault, 82 | CFNameStateStorage, 83 | }, 84 | []*grocksdb.Options{ 85 | opts, 86 | NewRocksDBOpts(false), 87 | }, 88 | ) 89 | if err != nil { 90 | return nil, nil, err 91 | } 92 | 93 | return db, cfHandles[1], nil 94 | } 95 | 96 | // OpenRocksDBAndTrimHistory opens a RocksDB handle similar to `OpenRocksDB`, 97 | // but it also trims the versions newer than target one, such that it can be used 98 | // for rollback. 99 | func OpenRocksDBAndTrimHistory(dataDir string, version int64) (*grocksdb.DB, *grocksdb.ColumnFamilyHandle, error) { 100 | var ts [TimestampSize]byte 101 | binary.LittleEndian.PutUint64(ts[:], uint64(version)) 102 | 103 | opts := grocksdb.NewDefaultOptions() 104 | opts.SetCreateIfMissing(true) 105 | opts.SetCreateIfMissingColumnFamilies(true) 106 | 107 | db, cfHandles, err := grocksdb.OpenDbAndTrimHistory( 108 | opts, 109 | dataDir, 110 | []string{ 111 | CFNameDefault, 112 | CFNameStateStorage, 113 | }, 114 | []*grocksdb.Options{ 115 | opts, 116 | NewRocksDBOpts(false), 117 | }, 118 | ts[:], 119 | ) 120 | if err != nil { 121 | return nil, nil, err 122 | } 123 | 124 | return db, cfHandles[1], nil 125 | } 126 | -------------------------------------------------------------------------------- /ss/rocksdb_init.go: -------------------------------------------------------------------------------- 1 | //go:build rocksdbBackend 2 | // +build rocksdbBackend 3 | 4 | package ss 5 | 6 | import ( 7 | "github.com/sei-protocol/sei-db/common/utils" 8 | "github.com/sei-protocol/sei-db/config" 9 | "github.com/sei-protocol/sei-db/ss/rocksdb" 10 | "github.com/sei-protocol/sei-db/ss/types" 11 | ) 12 | 13 | func init() { 14 | initializer := func(dir string, configs config.StateStoreConfig) (types.StateStore, error) { 15 | dbHome := utils.GetStateStorePath(dir, configs.Backend) 16 | if configs.DBDirectory != "" { 17 | dbHome = configs.DBDirectory 18 | } 19 | return rocksdb.New(dbHome, configs) 20 | } 21 | RegisterBackend(RocksDBBackend, initializer) 22 | } 23 | -------------------------------------------------------------------------------- /ss/sqlite/batch.go: -------------------------------------------------------------------------------- 1 | //go:build sqliteBackend 2 | // +build sqliteBackend 3 | 4 | package sqlite 5 | 6 | import ( 7 | "database/sql" 8 | "fmt" 9 | ) 10 | 11 | type batchAction int 12 | 13 | const ( 14 | batchActionSet batchAction = 0 15 | batchActionDel batchAction = 1 16 | ) 17 | 18 | type batchOp struct { 19 | action batchAction 20 | storeKey string 21 | key, value []byte 22 | } 23 | 24 | type Batch struct { 25 | tx *sql.Tx 26 | ops []batchOp 27 | size int 28 | version int64 29 | } 30 | 31 | func NewBatch(storage *sql.DB, version int64) (*Batch, error) { 32 | tx, err := storage.Begin() 33 | if err != nil { 34 | return nil, fmt.Errorf("failed to create SQL transaction: %w", err) 35 | } 36 | 37 | return &Batch{ 38 | tx: tx, 39 | ops: make([]batchOp, 0), 40 | version: version, 41 | }, nil 42 | } 43 | 44 | func (b *Batch) Size() int { 45 | return b.size 46 | } 47 | 48 | func (b *Batch) Reset() { 49 | b.ops = nil 50 | b.ops = make([]batchOp, 0) 51 | b.size = 0 52 | } 53 | 54 | func (b *Batch) Set(storeKey string, key, value []byte) error { 55 | b.size += len(key) + len(value) 56 | b.ops = append(b.ops, batchOp{action: batchActionSet, storeKey: storeKey, key: key, value: value}) 57 | return nil 58 | } 59 | 60 | func (b *Batch) Delete(storeKey string, key []byte) error { 61 | b.size += len(key) 62 | b.ops = append(b.ops, batchOp{action: batchActionDel, storeKey: storeKey, key: key}) 63 | return nil 64 | } 65 | 66 | func (b *Batch) Write() error { 67 | _, err := b.tx.Exec(latestVersionStmt, reservedStoreKey, keyLatestHeight, b.version, 0, b.version) 68 | if err != nil { 69 | return fmt.Errorf("failed to exec SQL statement: %w", err) 70 | } 71 | 72 | for _, op := range b.ops { 73 | switch op.action { 74 | case batchActionSet: 75 | _, err := b.tx.Exec(upsertStmt, op.storeKey, op.key, op.value, b.version, op.value) 76 | if err != nil { 77 | return fmt.Errorf("failed to exec SQL statement: %w", err) 78 | } 79 | 80 | case batchActionDel: 81 | _, err := b.tx.Exec(delStmt, b.version, op.storeKey, op.key, b.version) 82 | if err != nil { 83 | return fmt.Errorf("failed to exec SQL statement: %w", err) 84 | } 85 | } 86 | } 87 | 88 | if err := b.tx.Commit(); err != nil { 89 | return fmt.Errorf("failed to write SQL transaction: %w", err) 90 | } 91 | 92 | return nil 93 | } 94 | -------------------------------------------------------------------------------- /ss/sqlite/bench_test.go: -------------------------------------------------------------------------------- 1 | //go:build sqliteBackend 2 | // +build sqliteBackend 3 | 4 | package sqlite 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/sei-protocol/sei-db/config" 10 | sstest "github.com/sei-protocol/sei-db/ss/test" 11 | "github.com/sei-protocol/sei-db/ss/types" 12 | ) 13 | 14 | func BenchmarkDBBackend(b *testing.B) { 15 | s := &sstest.StorageBenchSuite{ 16 | NewDB: func(dir string) (types.StateStore, error) { 17 | return New(dir, config.DefaultStateStoreConfig()) 18 | }, 19 | BenchBackendName: "Sqlite", 20 | } 21 | 22 | s.BenchmarkGet(b) 23 | s.BenchmarkApplyChangeset(b) 24 | s.BenchmarkIterate(b) 25 | } 26 | -------------------------------------------------------------------------------- /ss/sqlite/db_test.go: -------------------------------------------------------------------------------- 1 | //go:build sqliteBackend 2 | // +build sqliteBackend 3 | 4 | package sqlite 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/sei-protocol/sei-db/config" 10 | sstest "github.com/sei-protocol/sei-db/ss/test" 11 | "github.com/sei-protocol/sei-db/ss/types" 12 | "github.com/stretchr/testify/suite" 13 | ) 14 | 15 | // TODO: Update Sqlite to latest 16 | func TestStorageTestSuite(t *testing.T) { 17 | s := &sstest.StorageTestSuite{ 18 | NewDB: func(dir string) (types.StateStore, error) { 19 | return New(dir, config.DefaultStateStoreConfig()) 20 | }, 21 | EmptyBatchSize: 0, 22 | } 23 | 24 | suite.Run(t, s) 25 | } 26 | -------------------------------------------------------------------------------- /ss/sqlite/iterator.go: -------------------------------------------------------------------------------- 1 | //go:build sqliteBackend 2 | // +build sqliteBackend 3 | 4 | package sqlite 5 | 6 | import ( 7 | "bytes" 8 | "database/sql" 9 | "fmt" 10 | "strings" 11 | 12 | "github.com/sei-protocol/sei-db/ss/types" 13 | "golang.org/x/exp/slices" 14 | // _ Import to register sqlite driver with database/sql. 15 | _ "modernc.org/sqlite" 16 | ) 17 | 18 | var _ types.DBIterator = (*iterator)(nil) 19 | 20 | type iterator struct { 21 | statement *sql.Stmt 22 | rows *sql.Rows 23 | key, val []byte 24 | start, end []byte 25 | valid bool 26 | err error 27 | } 28 | 29 | func newIterator(storage *sql.DB, storeKey string, targetVersion int64, start, end []byte, reverse bool) (*iterator, error) { 30 | var ( 31 | keyClause = []string{"store_key = ?", "version <= ?"} 32 | queryArgs []any 33 | ) 34 | 35 | switch { 36 | case len(start) > 0 && len(end) > 0: 37 | keyClause = append(keyClause, "key >= ?", "key < ?") 38 | queryArgs = []any{storeKey, targetVersion, start, end, targetVersion} 39 | 40 | case len(start) > 0 && len(end) == 0: 41 | keyClause = append(keyClause, "key >= ?") 42 | queryArgs = []any{storeKey, targetVersion, start, targetVersion} 43 | 44 | case len(start) == 0 && len(end) > 0: 45 | keyClause = append(keyClause, "key < ?") 46 | queryArgs = []any{storeKey, targetVersion, end, targetVersion} 47 | 48 | default: 49 | queryArgs = []any{storeKey, targetVersion, targetVersion} 50 | } 51 | 52 | orderBy := "ASC" 53 | if reverse { 54 | orderBy = "DESC" 55 | } 56 | 57 | // Note, this is not susceptible to SQL injection because placeholders are used 58 | // for parts of the query outside the store's direct control. 59 | stmt, err := storage.Prepare(fmt.Sprintf(` 60 | SELECT x.key, x.value 61 | FROM ( 62 | SELECT key, value, version, tombstone, 63 | row_number() OVER (PARTITION BY key ORDER BY version DESC) AS _rn 64 | FROM state_storage WHERE %s 65 | ) x 66 | WHERE x._rn = 1 AND (x.tombstone = 0 OR x.tombstone > ?) ORDER BY x.key %s; 67 | `, strings.Join(keyClause, " AND "), orderBy)) 68 | if err != nil { 69 | return nil, fmt.Errorf("failed to prepare SQL statement: %w", err) 70 | } 71 | 72 | rows, err := stmt.Query(queryArgs...) 73 | if err != nil { 74 | _ = stmt.Close() 75 | return nil, fmt.Errorf("failed to execute SQL query: %w", err) 76 | } 77 | 78 | itr := &iterator{ 79 | statement: stmt, 80 | rows: rows, 81 | start: start, 82 | end: end, 83 | valid: rows.Next(), 84 | } 85 | if !itr.valid { 86 | itr.err = fmt.Errorf("iterator invalid: %w", sql.ErrNoRows) 87 | return itr, nil 88 | } 89 | 90 | // read the first row 91 | itr.parseRow() 92 | if !itr.valid { 93 | return itr, nil 94 | } 95 | 96 | return itr, nil 97 | } 98 | 99 | func (itr *iterator) Close() error { 100 | _ = itr.statement.Close() 101 | itr.valid = false 102 | itr.statement = nil 103 | itr.rows = nil 104 | return nil 105 | } 106 | 107 | // Domain returns the domain of the iterator. The caller must not modify the 108 | // return values. 109 | func (itr *iterator) Domain() ([]byte, []byte) { 110 | return itr.start, itr.end 111 | } 112 | 113 | func (itr *iterator) Key() []byte { 114 | itr.assertIsValid() 115 | return slices.Clone(itr.key) 116 | } 117 | 118 | func (itr *iterator) Value() []byte { 119 | itr.assertIsValid() 120 | return slices.Clone(itr.val) 121 | } 122 | 123 | func (itr *iterator) Valid() bool { 124 | if !itr.valid || itr.rows.Err() != nil { 125 | itr.valid = false 126 | return itr.valid 127 | } 128 | 129 | // if key is at the end or past it, consider it invalid 130 | if end := itr.end; end != nil { 131 | if bytes.Compare(end, itr.Key()) <= 0 { 132 | itr.valid = false 133 | return itr.valid 134 | } 135 | } 136 | 137 | return true 138 | } 139 | 140 | func (itr *iterator) Next() { 141 | if itr.rows.Next() { 142 | itr.parseRow() 143 | itr.Valid() 144 | return 145 | } 146 | 147 | itr.valid = false 148 | } 149 | 150 | func (itr *iterator) Error() error { 151 | if err := itr.rows.Err(); err != nil { 152 | return err 153 | } 154 | 155 | return itr.err 156 | } 157 | 158 | func (itr *iterator) parseRow() { 159 | var ( 160 | key []byte 161 | value []byte 162 | ) 163 | if err := itr.rows.Scan(&key, &value); err != nil { 164 | itr.err = fmt.Errorf("failed to scan row: %s", err) 165 | itr.valid = false 166 | return 167 | } 168 | 169 | itr.key = key 170 | itr.val = value 171 | } 172 | 173 | func (itr *iterator) assertIsValid() { 174 | if !itr.valid { 175 | panic("iterator is invalid") 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /ss/sqlite_init.go: -------------------------------------------------------------------------------- 1 | //go:build sqliteBackend 2 | // +build sqliteBackend 3 | 4 | package ss 5 | 6 | import ( 7 | "github.com/sei-protocol/sei-db/common/utils" 8 | "github.com/sei-protocol/sei-db/config" 9 | "github.com/sei-protocol/sei-db/ss/sqlite" 10 | "github.com/sei-protocol/sei-db/ss/types" 11 | ) 12 | 13 | func init() { 14 | initializer := func(dir string, configs config.StateStoreConfig) (types.StateStore, error) { 15 | dbHome := utils.GetStateStorePath(dir, configs.Backend) 16 | if configs.DBDirectory != "" { 17 | dbHome = configs.DBDirectory 18 | } 19 | return sqlite.New(dbHome, configs) 20 | } 21 | RegisterBackend(SQLiteBackend, initializer) 22 | } 23 | -------------------------------------------------------------------------------- /ss/store.go: -------------------------------------------------------------------------------- 1 | package ss 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/common/logger" 7 | "github.com/sei-protocol/sei-db/common/utils" 8 | "github.com/sei-protocol/sei-db/config" 9 | "github.com/sei-protocol/sei-db/proto" 10 | "github.com/sei-protocol/sei-db/ss/pruning" 11 | "github.com/sei-protocol/sei-db/ss/types" 12 | "github.com/sei-protocol/sei-db/stream/changelog" 13 | ) 14 | 15 | type BackendType string 16 | 17 | const ( 18 | // RocksDBBackend represents rocksdb 19 | // - use rocksdb build tag 20 | RocksDBBackend BackendType = "rocksdb" 21 | 22 | // PebbleDBBackend represents pebbledb 23 | PebbleDBBackend BackendType = "pebbledb" 24 | 25 | // SQLiteBackend represents sqlite 26 | SQLiteBackend BackendType = "sqlite" 27 | ) 28 | 29 | type BackendInitializer func(dir string, config config.StateStoreConfig) (types.StateStore, error) 30 | 31 | var backends = map[BackendType]BackendInitializer{} 32 | 33 | func RegisterBackend(backendType BackendType, initializer BackendInitializer) { 34 | backends[backendType] = initializer 35 | } 36 | 37 | // NewStateStore Create a new state store with the specified backend type 38 | func NewStateStore(logger logger.Logger, homeDir string, ssConfig config.StateStoreConfig) (types.StateStore, error) { 39 | initializer, ok := backends[BackendType(ssConfig.Backend)] 40 | if !ok { 41 | return nil, fmt.Errorf("unsupported backend: %s", ssConfig.Backend) 42 | } 43 | stateStore, err := initializer(homeDir, ssConfig) 44 | if err != nil { 45 | return nil, err 46 | } 47 | // Handle auto recovery for DB running with async mode 48 | if ssConfig.DedicatedChangelog { 49 | changelogPath := utils.GetChangelogPath(utils.GetStateStorePath(homeDir, ssConfig.Backend)) 50 | if ssConfig.DBDirectory != "" { 51 | changelogPath = utils.GetChangelogPath(ssConfig.DBDirectory) 52 | } 53 | err := RecoverStateStore(logger, changelogPath, stateStore) 54 | if err != nil { 55 | return nil, err 56 | } 57 | } 58 | // Start the pruning manager for DB 59 | pruningManager := pruning.NewPruningManager(logger, stateStore, int64(ssConfig.KeepRecent), int64(ssConfig.PruneIntervalSeconds)) 60 | pruningManager.Start() 61 | return stateStore, nil 62 | } 63 | 64 | // RecoverStateStore will be called during initialization to recover the state from rlog 65 | func RecoverStateStore(logger logger.Logger, changelogPath string, stateStore types.StateStore) error { 66 | ssLatestVersion, err := stateStore.GetLatestVersion() 67 | logger.Info(fmt.Sprintf("Recovering from changelog %s at latest SS version %d", changelogPath, ssLatestVersion)) 68 | if err != nil { 69 | return err 70 | } 71 | if ssLatestVersion <= 0 { 72 | return nil 73 | } 74 | streamHandler, err := changelog.NewStream(logger, changelogPath, changelog.Config{}) 75 | if err != nil { 76 | return err 77 | } 78 | firstOffset, errFirst := streamHandler.FirstOffset() 79 | if firstOffset <= 0 || errFirst != nil { 80 | return err 81 | } 82 | lastOffset, errLast := streamHandler.LastOffset() 83 | if lastOffset <= 0 || errLast != nil { 84 | return err 85 | } 86 | lastEntry, errRead := streamHandler.ReadAt(lastOffset) 87 | if errRead != nil { 88 | return err 89 | } 90 | // Look backward to find where we should start replay from 91 | curVersion := lastEntry.Version 92 | curOffset := lastOffset 93 | for curVersion > ssLatestVersion && curOffset > firstOffset { 94 | curOffset-- 95 | curEntry, errRead := streamHandler.ReadAt(curOffset) 96 | if errRead != nil { 97 | return err 98 | } 99 | curVersion = curEntry.Version 100 | } 101 | // Replay from the offset where the offset where the version is larger than SS store latest version 102 | targetStartOffset := curOffset 103 | logger.Info(fmt.Sprintf("Start replaying changelog to recover StateStore from offset %d to %d", targetStartOffset, lastOffset)) 104 | if targetStartOffset < lastOffset { 105 | return streamHandler.Replay(targetStartOffset, lastOffset, func(index uint64, entry proto.ChangelogEntry) error { 106 | // commit to state store 107 | for _, cs := range entry.Changesets { 108 | if err := stateStore.ApplyChangeset(entry.Version, cs); err != nil { 109 | return err 110 | } 111 | } 112 | if err := stateStore.SetLatestVersion(entry.Version); err != nil { 113 | return err 114 | } 115 | return nil 116 | }) 117 | } 118 | return nil 119 | } 120 | -------------------------------------------------------------------------------- /ss/store_test.go: -------------------------------------------------------------------------------- 1 | package ss 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/cosmos/iavl" 10 | "github.com/sei-protocol/sei-db/common/logger" 11 | "github.com/sei-protocol/sei-db/config" 12 | "github.com/sei-protocol/sei-db/proto" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func TestNewStateStore(t *testing.T) { 17 | tempDir := os.TempDir() 18 | homeDir := filepath.Join(tempDir, "seidb") 19 | ssConfig := config.StateStoreConfig{ 20 | DedicatedChangelog: true, 21 | Backend: string(PebbleDBBackend), 22 | AsyncWriteBuffer: 50, 23 | KeepRecent: 500, 24 | } 25 | stateStore, err := NewStateStore(logger.NewNopLogger(), homeDir, ssConfig) 26 | require.NoError(t, err) 27 | for i := 1; i < 20; i++ { 28 | var changesets []*proto.NamedChangeSet 29 | kvPair := &iavl.KVPair{ 30 | Delete: false, 31 | Key: []byte(fmt.Sprintf("key%d", i)), 32 | Value: []byte(fmt.Sprintf("value%d", i)), 33 | } 34 | var pairs []*iavl.KVPair 35 | pairs = append(pairs, kvPair) 36 | cs := iavl.ChangeSet{Pairs: pairs} 37 | ncs := &proto.NamedChangeSet{ 38 | Name: "storeA", 39 | Changeset: cs, 40 | } 41 | changesets = append(changesets, ncs) 42 | err := stateStore.ApplyChangesetAsync(int64(i), changesets) 43 | require.NoError(t, err) 44 | } 45 | // Closing the state store without waiting for data to be fully flushed 46 | err = stateStore.Close() 47 | require.NoError(t, err) 48 | 49 | // Reopen a new state store 50 | stateStore, err = NewStateStore(logger.NewNopLogger(), homeDir, ssConfig) 51 | require.NoError(t, err) 52 | 53 | // Make sure key and values can be found 54 | for i := 1; i < 20; i++ { 55 | value, err := stateStore.Get("storeA", int64(i), []byte(fmt.Sprintf("key%d", i))) 56 | require.NoError(t, err) 57 | require.Equal(t, fmt.Sprintf("value%d", i), string(value)) 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /ss/test/storage_bench_suite.go: -------------------------------------------------------------------------------- 1 | package sstest 2 | 3 | import ( 4 | "bytes" 5 | "math/rand" 6 | "sort" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/require" 10 | 11 | "github.com/cosmos/iavl" 12 | "github.com/sei-protocol/sei-db/proto" 13 | "github.com/sei-protocol/sei-db/ss/types" 14 | ) 15 | 16 | // StorageBenchSuite defines a reusable benchmark suite for all storage backends. 17 | type StorageBenchSuite struct { 18 | BenchBackendName string 19 | NewDB func(dir string) (types.StateStore, error) 20 | } 21 | 22 | var rng = rand.New(rand.NewSource(567320)) 23 | 24 | func (s *StorageBenchSuite) BenchmarkGet(b *testing.B) { 25 | numKeyVals := 10000 26 | keys := make([][]byte, numKeyVals) 27 | vals := make([][]byte, numKeyVals) 28 | for i := 0; i < numKeyVals; i++ { 29 | key := make([]byte, 128) 30 | val := make([]byte, 128) 31 | 32 | _, err := rng.Read(key) 33 | require.NoError(b, err) 34 | _, err = rng.Read(val) 35 | require.NoError(b, err) 36 | 37 | keys[i] = key 38 | vals[i] = val 39 | } 40 | 41 | db, err := s.NewDB(b.TempDir()) 42 | require.NoError(b, err) 43 | defer db.Close() 44 | 45 | cs := &iavl.ChangeSet{} 46 | cs.Pairs = []*iavl.KVPair{} 47 | 48 | for i := 0; i < numKeyVals; i++ { 49 | cs.Pairs = append(cs.Pairs, &iavl.KVPair{Key: keys[i], Value: vals[i]}) 50 | } 51 | 52 | ncs := &proto.NamedChangeSet{ 53 | Name: storeKey1, 54 | Changeset: *cs, 55 | } 56 | 57 | require.NoError(b, db.ApplyChangeset(1, ncs)) 58 | 59 | b.Run(s.BenchBackendName, func(b *testing.B) { 60 | b.ResetTimer() 61 | 62 | for i := 0; i < b.N; i++ { 63 | b.StopTimer() 64 | key := keys[rng.Intn(len(keys))] 65 | 66 | b.StartTimer() 67 | _, err = db.Get(storeKey1, 1, key) 68 | require.NoError(b, err) 69 | } 70 | }) 71 | } 72 | 73 | func (s *StorageBenchSuite) BenchmarkApplyChangeset(b *testing.B) { 74 | db, err := s.NewDB(b.TempDir()) 75 | require.NoError(b, err) 76 | defer db.Close() 77 | 78 | b.Run(s.BenchBackendName, func(b *testing.B) { 79 | b.ResetTimer() 80 | 81 | for i := 0; i < b.N; i++ { 82 | b.StopTimer() 83 | 84 | cs := &iavl.ChangeSet{} 85 | cs.Pairs = []*iavl.KVPair{} 86 | 87 | for j := 0; j < 1000; j++ { 88 | key := make([]byte, 128) 89 | val := make([]byte, 128) 90 | 91 | _, err = rng.Read(key) 92 | require.NoError(b, err) 93 | _, err = rng.Read(val) 94 | require.NoError(b, err) 95 | 96 | cs.Pairs = append(cs.Pairs, &iavl.KVPair{Key: key, Value: val}) 97 | } 98 | 99 | ncs := &proto.NamedChangeSet{ 100 | Name: storeKey1, 101 | Changeset: *cs, 102 | } 103 | b.StartTimer() 104 | require.NoError(b, db.ApplyChangeset(int64(b.N+1), ncs)) 105 | } 106 | }) 107 | } 108 | 109 | func (s *StorageBenchSuite) BenchmarkIterate(b *testing.B) { 110 | numKeyVals := 1_000_000 111 | keys := make([][]byte, numKeyVals) 112 | vals := make([][]byte, numKeyVals) 113 | for i := 0; i < numKeyVals; i++ { 114 | key := make([]byte, 128) 115 | val := make([]byte, 128) 116 | 117 | _, err := rng.Read(key) 118 | require.NoError(b, err) 119 | _, err = rng.Read(val) 120 | require.NoError(b, err) 121 | 122 | keys[i] = key 123 | vals[i] = val 124 | 125 | } 126 | 127 | db, err := s.NewDB(b.TempDir()) 128 | require.NoError(b, err) 129 | defer db.Close() 130 | 131 | b.StopTimer() 132 | 133 | cs := &iavl.ChangeSet{} 134 | cs.Pairs = []*iavl.KVPair{} 135 | for i := 0; i < numKeyVals; i++ { 136 | cs.Pairs = append(cs.Pairs, &iavl.KVPair{Key: keys[i], Value: vals[i]}) 137 | } 138 | ncs := &proto.NamedChangeSet{ 139 | Name: storeKey1, 140 | Changeset: *cs, 141 | } 142 | 143 | require.NoError(b, db.ApplyChangeset(1, ncs)) 144 | 145 | sort.Slice(keys, func(i, j int) bool { 146 | return bytes.Compare(keys[i], keys[j]) < 0 147 | }) 148 | 149 | b.Run(s.BenchBackendName, func(b *testing.B) { 150 | b.ResetTimer() 151 | 152 | for i := 0; i < b.N; i++ { 153 | b.StopTimer() 154 | 155 | itr, err := db.Iterator(storeKey1, 1, keys[0], nil) 156 | require.NoError(b, err) 157 | 158 | b.StartTimer() 159 | 160 | for ; itr.Valid(); itr.Next() { 161 | _ = itr.Key() 162 | _ = itr.Value() 163 | } 164 | 165 | require.NoError(b, itr.Error()) 166 | } 167 | }) 168 | } 169 | -------------------------------------------------------------------------------- /ss/test/utils.go: -------------------------------------------------------------------------------- 1 | package sstest 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/cosmos/iavl" 7 | "github.com/sei-protocol/sei-db/proto" 8 | "github.com/sei-protocol/sei-db/ss/types" 9 | ) 10 | 11 | // Fills the db with multiple keys each with different versions 12 | // TODO: Return just changeset so it can be altered after return 13 | func FillData(db types.StateStore, numKeys int, versions int) error { 14 | if numKeys <= 0 || versions <= 0 { 15 | panic("numKeys and versions must be greater than 0") 16 | } 17 | 18 | for i := int64(1); i < int64(versions+1); i++ { 19 | cs := &iavl.ChangeSet{} 20 | cs.Pairs = []*iavl.KVPair{} 21 | 22 | for j := 0; j < numKeys; j++ { 23 | key := fmt.Sprintf("key%03d", j) 24 | val := fmt.Sprintf("val%03d-%03d", j, i) 25 | cs.Pairs = append(cs.Pairs, &iavl.KVPair{Key: []byte(key), Value: []byte(val)}) 26 | } 27 | 28 | ncs := &proto.NamedChangeSet{ 29 | Name: storeKey1, 30 | Changeset: *cs, 31 | } 32 | 33 | err := db.ApplyChangeset(i, ncs) 34 | if err != nil { 35 | return err 36 | } 37 | 38 | } 39 | 40 | return nil 41 | } 42 | 43 | // Helper for creating the changeset and applying it to db 44 | func DBApplyChangeset(db types.StateStore, version int64, storeKey string, key, val [][]byte) error { 45 | if len(key) != len(val) { 46 | panic("length of keys must match length of vals") 47 | } 48 | 49 | cs := &iavl.ChangeSet{} 50 | cs.Pairs = []*iavl.KVPair{} 51 | for j := 0; j < len(key); j++ { 52 | cs.Pairs = append(cs.Pairs, &iavl.KVPair{Key: key[j], Value: val[j]}) 53 | } 54 | 55 | ncs := &proto.NamedChangeSet{ 56 | Name: storeKey, 57 | Changeset: *cs, 58 | } 59 | 60 | return db.ApplyChangeset(version, ncs) 61 | } 62 | 63 | // Helper for creating the changeset and applying it to db 64 | func DBApplyDeleteChangeset(db types.StateStore, version int64, storeKey string, key [][]byte) error { 65 | cs := &iavl.ChangeSet{} 66 | cs.Pairs = []*iavl.KVPair{} 67 | for j := 0; j < len(key); j++ { 68 | cs.Pairs = append(cs.Pairs, &iavl.KVPair{Key: key[j], Delete: true}) 69 | } 70 | 71 | ncs := &proto.NamedChangeSet{ 72 | Name: storeKey, 73 | Changeset: *cs, 74 | } 75 | 76 | return db.ApplyChangeset(version, ncs) 77 | } 78 | -------------------------------------------------------------------------------- /ss/types/store.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | import ( 4 | "io" 5 | 6 | "github.com/sei-protocol/sei-db/proto" 7 | ) 8 | 9 | // StateStore is a versioned, embedded Key-Value Store, 10 | // which allows efficient reads, writes, iteration over a specific version 11 | type StateStore interface { 12 | Get(storeKey string, version int64, key []byte) ([]byte, error) 13 | Has(storeKey string, version int64, key []byte) (bool, error) 14 | Iterator(storeKey string, version int64, start, end []byte) (DBIterator, error) 15 | ReverseIterator(storeKey string, version int64, start, end []byte) (DBIterator, error) 16 | RawIterate(storeKey string, fn func([]byte, []byte, int64) bool) (bool, error) 17 | GetLatestVersion() (int64, error) 18 | SetLatestVersion(version int64) error 19 | GetEarliestVersion() (int64, error) 20 | SetEarliestVersion(version int64, ignoreVersion bool) error 21 | GetLatestMigratedKey() ([]byte, error) 22 | SetLatestMigratedKey(key []byte) error 23 | GetLatestMigratedModule() (string, error) 24 | SetLatestMigratedModule(module string) error 25 | WriteBlockRangeHash(storeKey string, beginBlockRange, endBlockRange int64, hash []byte) error 26 | DeleteKeysAtVersion(module string, version int64) error 27 | 28 | // ApplyChangeset Persist the change set of a block, 29 | // the `changeSet` should be ordered by (storeKey, key), 30 | // the version should be latest version plus one. 31 | ApplyChangeset(version int64, cs *proto.NamedChangeSet) error 32 | 33 | // ApplyChangesetAsync Write changesets into WAL file first and apply later for async writes 34 | ApplyChangesetAsync(version int64, changesets []*proto.NamedChangeSet) error 35 | 36 | // Import the initial state of the store 37 | Import(version int64, ch <-chan SnapshotNode) error 38 | 39 | // Import the kv entries into the store in any order of version 40 | RawImport(ch <-chan RawSnapshotNode) error 41 | 42 | // Prune attempts to prune all versions up to and including the provided 43 | // version argument. The operation should be idempotent. An error should be 44 | // returned upon failure. 45 | Prune(version int64) error 46 | 47 | // Closer releases associated resources. It should NOT be idempotent. It must 48 | // only be called once and any call after may panic. 49 | io.Closer 50 | } 51 | 52 | type DBIterator interface { 53 | // Domain returns the start (inclusive) and end (exclusive) limits of the iterator. 54 | // CONTRACT: start, end readonly []byte 55 | Domain() (start []byte, end []byte) 56 | 57 | // Valid returns whether the current iterator is valid. Once invalid, the Iterator remains 58 | // invalid forever. 59 | Valid() bool 60 | 61 | // Next moves the iterator to the next key in the database, as defined by order of iteration. 62 | // If Valid returns false, this method will panic. 63 | Next() 64 | 65 | // Key returns the key at the current position. Panics if the iterator is invalid. 66 | // CONTRACT: key readonly []byte 67 | Key() (key []byte) 68 | 69 | // Value returns the value at the current position. Panics if the iterator is invalid. 70 | // CONTRACT: value readonly []byte 71 | Value() (value []byte) 72 | 73 | // Error returns the last error encountered by the iterator, if any. 74 | Error() error 75 | 76 | // Close closes the iterator, relasing any allocated resources. 77 | Close() error 78 | } 79 | 80 | type SnapshotNode struct { 81 | StoreKey string 82 | Key []byte 83 | Value []byte 84 | } 85 | 86 | type RawSnapshotNode struct { 87 | StoreKey string 88 | Key []byte 89 | Value []byte 90 | Version int64 91 | } 92 | 93 | func GetRawSnapshotNode(node SnapshotNode, version int64) RawSnapshotNode { 94 | return RawSnapshotNode{ 95 | StoreKey: node.StoreKey, 96 | Key: node.Key, 97 | Value: node.Value, 98 | Version: version, 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /ss/util/iterator.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | // IterateWithPrefix returns the begin and end keys for an iterator over a domain 4 | // and prefix. 5 | func IterateWithPrefix(prefix, begin, end []byte) ([]byte, []byte) { 6 | if len(prefix) == 0 { 7 | return begin, end 8 | } 9 | 10 | begin = cloneAppend(prefix, begin) 11 | 12 | if end == nil { 13 | end = CopyIncr(prefix) 14 | } else { 15 | end = cloneAppend(prefix, end) 16 | } 17 | 18 | return begin, end 19 | } 20 | 21 | func cloneAppend(front, tail []byte) (res []byte) { 22 | res = make([]byte, len(front)+len(tail)) 23 | 24 | n := copy(res, front) 25 | copy(res[n:], tail) 26 | 27 | return res 28 | } 29 | 30 | func CopyIncr(bz []byte) []byte { 31 | if len(bz) == 0 { 32 | panic("copyIncr expects non-zero bz length") 33 | } 34 | 35 | ret := make([]byte, len(bz)) 36 | copy(ret, bz) 37 | 38 | for i := len(bz) - 1; i >= 0; i-- { 39 | if ret[i] < byte(0xFF) { 40 | ret[i]++ 41 | return ret 42 | } 43 | 44 | ret[i] = byte(0x00) 45 | 46 | if i == 0 { 47 | // overflow 48 | return nil 49 | } 50 | } 51 | 52 | return nil 53 | } 54 | -------------------------------------------------------------------------------- /ss/util/modules.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | var Modules = []string{ 4 | "aclaccesscontrol", 5 | "authz", 6 | "acc", 7 | "bank", 8 | "capability", 9 | "distribution", 10 | "epoch", 11 | "evidence", 12 | "evm", 13 | "feegrant", 14 | "gov", 15 | "ibc", 16 | "mint", 17 | "oracle", 18 | "params", 19 | "slashing", 20 | "staking", 21 | "tokenfactory", 22 | "transfer", 23 | "upgrade", 24 | "wasm"} 25 | -------------------------------------------------------------------------------- /ss/util/types.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | // HashCalculator defines the interface for calculating chained state hash. 4 | type HashCalculator interface { 5 | HashSingle(data []byte) []byte 6 | HashTwo(dataA []byte, dataB []byte) []byte 7 | ComputeHashes() [][]byte 8 | } 9 | -------------------------------------------------------------------------------- /ss/util/xor_hasher.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/binary" 6 | "sync" 7 | 8 | "github.com/sei-protocol/sei-db/ss/types" 9 | ) 10 | 11 | var _ HashCalculator = (*XorHashCalculator)(nil) 12 | 13 | // XorHashCalculator is the hash calculator backed by XoR hash. 14 | type XorHashCalculator struct { 15 | NumBlocksPerWorker int64 16 | NumOfWorkers int 17 | DataCh chan types.RawSnapshotNode 18 | } 19 | 20 | // NewXorHashCalculator create a new XorHashCalculator. 21 | func NewXorHashCalculator(numBlocksPerWorker int64, numWorkers int, data chan types.RawSnapshotNode) XorHashCalculator { 22 | return XorHashCalculator{ 23 | NumBlocksPerWorker: numBlocksPerWorker, 24 | NumOfWorkers: numWorkers, 25 | DataCh: data, 26 | } 27 | } 28 | 29 | // HashSingle computes the hash of a single data element. 30 | func (x XorHashCalculator) HashSingle(data []byte) []byte { 31 | hash := sha256.Sum256(data) 32 | return hash[:] 33 | } 34 | 35 | // HashTwo computes the hash of a two data elements, performs XOR between two byte slices of equal size. 36 | func (x XorHashCalculator) HashTwo(dataA []byte, dataB []byte) []byte { 37 | if len(dataA) != len(dataB) { 38 | panic("Expecting both data to have equal length for computing a XoR hash") 39 | } 40 | result := make([]byte, len(dataA)) 41 | for i := range dataA { 42 | result[i] = dataA[i] ^ dataB[i] 43 | } 44 | return result 45 | } 46 | 47 | func (x XorHashCalculator) ComputeHashes() [][]byte { 48 | var wg sync.WaitGroup 49 | allChannels := make([]chan types.RawSnapshotNode, x.NumOfWorkers) 50 | allHashes := make([][]byte, x.NumOfWorkers) 51 | // First calculate each sub hash in a separate goroutine 52 | for i := 0; i < x.NumOfWorkers; i++ { 53 | wg.Add(1) 54 | subsetChan := make(chan types.RawSnapshotNode, 1000) 55 | go func(index int, data chan types.RawSnapshotNode) { 56 | defer wg.Done() 57 | var hashResult []byte 58 | for item := range data { 59 | entryHash := x.HashSingle(Serialize(item)) 60 | if hashResult == nil { 61 | hashResult = entryHash 62 | } else { 63 | hashResult = x.HashTwo(hashResult, entryHash) 64 | } 65 | } 66 | allHashes[index] = hashResult 67 | }(i, subsetChan) 68 | allChannels[i] = subsetChan 69 | } 70 | // Push all the data to its corresponding channel based on version 71 | for data := range x.DataCh { 72 | index := data.Version / x.NumBlocksPerWorker 73 | allChannels[index] <- data 74 | } 75 | // Close all sub channels 76 | for _, subChan := range allChannels { 77 | close(subChan) 78 | } 79 | // Wait for all workers to complete 80 | wg.Wait() 81 | // Now modify sub hashes to hash again with previous hash 82 | for i := 1; i < len(allHashes); i++ { 83 | if len(allHashes[i-1]) > 0 && len(allHashes[i]) > 0 { 84 | allHashes[i] = x.HashTwo(allHashes[i-1], allHashes[i]) 85 | } else if len(allHashes[i-1]) > 0 && len(allHashes[i]) == 0 { 86 | allHashes[i] = allHashes[i-1] 87 | } 88 | } 89 | return allHashes 90 | } 91 | 92 | func Serialize(node types.RawSnapshotNode) []byte { 93 | keySize := len(node.Key) 94 | valueSize := len(node.Value) 95 | versionSize := 8 96 | buf := make([]byte, keySize+valueSize+versionSize) 97 | copy(buf[:keySize], node.Key) 98 | offset := keySize 99 | copy(buf[offset:offset+valueSize], node.Value) 100 | offset += valueSize 101 | binary.LittleEndian.PutUint64(buf[offset:offset+versionSize], uint64(node.Version)) 102 | return buf 103 | } 104 | -------------------------------------------------------------------------------- /stream/changelog/changelog_test.go: -------------------------------------------------------------------------------- 1 | package changelog 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "testing" 8 | 9 | "github.com/cosmos/iavl" 10 | "github.com/sei-protocol/sei-db/common/logger" 11 | "github.com/sei-protocol/sei-db/proto" 12 | "github.com/stretchr/testify/require" 13 | "github.com/tidwall/wal" 14 | ) 15 | 16 | var ( 17 | ChangeSets = []iavl.ChangeSet{ 18 | {Pairs: MockKVPairs("hello", "world")}, 19 | {Pairs: MockKVPairs("hello1", "world1", "hello2", "world2")}, 20 | {Pairs: MockKVPairs("hello3", "world3")}, 21 | } 22 | ) 23 | 24 | func TestOpenAndCorruptedTail(t *testing.T) { 25 | opts := &wal.Options{ 26 | LogFormat: wal.JSON, 27 | } 28 | dir := t.TempDir() 29 | 30 | testCases := []struct { 31 | name string 32 | logs []byte 33 | lastIndex uint64 34 | }{ 35 | {"failure-1", []byte("\n"), 0}, 36 | {"failure-2", []byte(`{}` + "\n"), 0}, 37 | {"failure-3", []byte(`{"index":"1"}` + "\n"), 0}, 38 | {"failure-4", []byte(`{"index":"1","data":"?"}`), 0}, 39 | {"failure-5", []byte(`{"index":1,"data":"?"}` + "\n" + `{"index":"1","data":"?"}`), 1}, 40 | } 41 | 42 | for _, tc := range testCases { 43 | t.Run(tc.name, func(t *testing.T) { 44 | os.WriteFile(filepath.Join(dir, "00000000000000000001"), tc.logs, 0o600) 45 | 46 | _, err := wal.Open(dir, opts) 47 | require.Equal(t, wal.ErrCorrupt, err) 48 | 49 | log, err := open(dir, opts) 50 | require.NoError(t, err) 51 | 52 | lastIndex, err := log.LastIndex() 53 | require.NoError(t, err) 54 | require.Equal(t, tc.lastIndex, lastIndex) 55 | }) 56 | } 57 | } 58 | 59 | func TestReplay(t *testing.T) { 60 | changelog := prepareTestData(t) 61 | var total = 0 62 | err := changelog.Replay(1, 2, func(index uint64, entry proto.ChangelogEntry) error { 63 | total++ 64 | switch index { 65 | case 1: 66 | require.Equal(t, "test", entry.Changesets[0].Name) 67 | require.Equal(t, []byte("hello"), entry.Changesets[0].Changeset.Pairs[0].Key) 68 | require.Equal(t, []byte("world"), entry.Changesets[0].Changeset.Pairs[0].Value) 69 | case 2: 70 | require.Equal(t, []byte("hello1"), entry.Changesets[0].Changeset.Pairs[0].Key) 71 | require.Equal(t, []byte("world1"), entry.Changesets[0].Changeset.Pairs[0].Value) 72 | require.Equal(t, []byte("hello2"), entry.Changesets[0].Changeset.Pairs[1].Key) 73 | require.Equal(t, []byte("world2"), entry.Changesets[0].Changeset.Pairs[1].Value) 74 | default: 75 | require.Fail(t, fmt.Sprintf("unexpected index %d", index)) 76 | } 77 | return nil 78 | }) 79 | require.NoError(t, err) 80 | require.Equal(t, 2, total) 81 | err = changelog.Close() 82 | require.NoError(t, err) 83 | } 84 | 85 | func TestRandomRead(t *testing.T) { 86 | changelog := prepareTestData(t) 87 | entry, err := changelog.ReadAt(2) 88 | require.NoError(t, err) 89 | require.Equal(t, []byte("hello1"), entry.Changesets[0].Changeset.Pairs[0].Key) 90 | require.Equal(t, []byte("world1"), entry.Changesets[0].Changeset.Pairs[0].Value) 91 | require.Equal(t, []byte("hello2"), entry.Changesets[0].Changeset.Pairs[1].Key) 92 | require.Equal(t, []byte("world2"), entry.Changesets[0].Changeset.Pairs[1].Value) 93 | entry, err = changelog.ReadAt(1) 94 | require.NoError(t, err) 95 | require.Equal(t, []byte("hello"), entry.Changesets[0].Changeset.Pairs[0].Key) 96 | require.Equal(t, []byte("world"), entry.Changesets[0].Changeset.Pairs[0].Value) 97 | entry, err = changelog.ReadAt(3) 98 | require.NoError(t, err) 99 | require.Equal(t, []byte("hello3"), entry.Changesets[0].Changeset.Pairs[0].Key) 100 | require.Equal(t, []byte("world3"), entry.Changesets[0].Changeset.Pairs[0].Value) 101 | } 102 | 103 | func prepareTestData(t *testing.T) *Stream { 104 | dir := t.TempDir() 105 | changelog, err := NewStream(logger.NewNopLogger(), dir, Config{}) 106 | require.NoError(t, err) 107 | writeTestData(changelog) 108 | return changelog 109 | } 110 | 111 | func writeTestData(changelog *Stream) { 112 | for i, changes := range ChangeSets { 113 | cs := []*proto.NamedChangeSet{ 114 | { 115 | Name: "test", 116 | Changeset: changes, 117 | }, 118 | } 119 | entry := &proto.ChangelogEntry{} 120 | entry.Changesets = cs 121 | _ = changelog.Write(uint64(i+1), *entry) 122 | } 123 | } 124 | 125 | func TestSynchronousWrite(t *testing.T) { 126 | changelog := prepareTestData(t) 127 | lastIndex, err := changelog.LastOffset() 128 | require.NoError(t, err) 129 | require.Equal(t, uint64(3), lastIndex) 130 | 131 | } 132 | 133 | func TestAsyncWrite(t *testing.T) { 134 | dir := t.TempDir() 135 | changelog, err := NewStream(logger.NewNopLogger(), dir, Config{WriteBufferSize: 10}) 136 | require.NoError(t, err) 137 | for i, changes := range ChangeSets { 138 | cs := []*proto.NamedChangeSet{ 139 | { 140 | Name: "test", 141 | Changeset: changes, 142 | }, 143 | } 144 | entry := &proto.ChangelogEntry{} 145 | entry.Changesets = cs 146 | err := changelog.Write(uint64(i+1), *entry) 147 | require.NoError(t, err) 148 | lastIndex, err := changelog.LastOffset() 149 | require.NoError(t, err) 150 | // Writes happen async, so lastIndex should not move yet 151 | require.Greater(t, uint64(3), lastIndex) 152 | } 153 | err = changelog.Close() 154 | require.NoError(t, err) 155 | changelog, err = NewStream(logger.NewNopLogger(), dir, Config{WriteBufferSize: 10}) 156 | require.NoError(t, err) 157 | lastIndex, err := changelog.LastOffset() 158 | require.NoError(t, err) 159 | require.Equal(t, uint64(3), lastIndex) 160 | } 161 | -------------------------------------------------------------------------------- /stream/changelog/subscriber.go: -------------------------------------------------------------------------------- 1 | package changelog 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/proto" 7 | "github.com/sei-protocol/sei-db/stream/types" 8 | ) 9 | 10 | var _ types.Subscriber[proto.ChangelogEntry] = (*Subscriber)(nil) 11 | 12 | type Subscriber struct { 13 | maxPendingSize int 14 | chPendingEntries chan proto.ChangelogEntry 15 | errSignal chan error 16 | stopSignal chan struct{} 17 | processFn func(entry proto.ChangelogEntry) error 18 | } 19 | 20 | func NewSubscriber( 21 | maxPendingSize int, 22 | processFn func(entry proto.ChangelogEntry) error, 23 | ) *Subscriber { 24 | subscriber := &Subscriber{ 25 | maxPendingSize: maxPendingSize, 26 | processFn: processFn, 27 | } 28 | 29 | return subscriber 30 | } 31 | 32 | func (s *Subscriber) Start() { 33 | if s.maxPendingSize > 0 { 34 | s.startAsyncProcessing() 35 | } 36 | } 37 | 38 | func (s *Subscriber) ProcessEntry(entry proto.ChangelogEntry) error { 39 | if s.maxPendingSize <= 0 { 40 | return s.processFn(entry) 41 | } 42 | s.chPendingEntries <- entry 43 | return s.CheckError() 44 | } 45 | 46 | func (s *Subscriber) startAsyncProcessing() { 47 | if s.chPendingEntries == nil { 48 | s.chPendingEntries = make(chan proto.ChangelogEntry, s.maxPendingSize) 49 | s.errSignal = make(chan error) 50 | go func() { 51 | defer close(s.errSignal) 52 | for { 53 | select { 54 | case entry := <-s.chPendingEntries: 55 | if err := s.processFn(entry); err != nil { 56 | s.errSignal <- err 57 | } 58 | case <-s.stopSignal: 59 | return 60 | } 61 | } 62 | }() 63 | } 64 | } 65 | 66 | func (s *Subscriber) Close() error { 67 | if s.chPendingEntries != nil { 68 | return nil 69 | } 70 | s.stopSignal <- struct{}{} 71 | close(s.chPendingEntries) 72 | err := s.CheckError() 73 | s.chPendingEntries = nil 74 | s.errSignal = nil 75 | return err 76 | } 77 | 78 | func (s *Subscriber) CheckError() error { 79 | select { 80 | case err := <-s.errSignal: 81 | // async wal writing failed, we need to abort the state machine 82 | return fmt.Errorf("subscriber failed unexpectedly: %w", err) 83 | default: 84 | } 85 | return nil 86 | } 87 | -------------------------------------------------------------------------------- /stream/changelog/utils.go: -------------------------------------------------------------------------------- 1 | package changelog 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "os" 7 | "path/filepath" 8 | "unsafe" 9 | 10 | "github.com/cosmos/iavl" 11 | "github.com/tidwall/gjson" 12 | "github.com/tidwall/wal" 13 | ) 14 | 15 | func LogPath(dir string) string { 16 | return filepath.Join(dir, "changelog") 17 | } 18 | 19 | // GetLastIndex returns the last written index of the replay log 20 | func GetLastIndex(dir string) (index uint64, err error) { 21 | rlog, err := open(dir, nil) 22 | if err != nil { 23 | return 0, err 24 | } 25 | defer rlog.Close() 26 | return rlog.LastIndex() 27 | } 28 | 29 | // truncateCorruptedTail truncates the corrupted tail 30 | func truncateCorruptedTail(path string, format wal.LogFormat) error { 31 | data, err := os.ReadFile(path) 32 | if err != nil { 33 | return err 34 | } 35 | var pos int 36 | for len(data) > 0 { 37 | var n int 38 | if format == wal.JSON { 39 | n, err = loadNextJSONEntry(data) 40 | } else { 41 | n, err = loadNextBinaryEntry(data) 42 | } 43 | if err == wal.ErrCorrupt { 44 | break 45 | } 46 | if err != nil { 47 | return err 48 | } 49 | data = data[n:] 50 | pos += n 51 | } 52 | if pos != len(data) { 53 | return os.Truncate(path, int64(pos)) 54 | } 55 | return nil 56 | } 57 | 58 | // loadNextJSONEntry loads json data like {"index":number,"data":string} 59 | func loadNextJSONEntry(data []byte) (n int, err error) { 60 | idx := bytes.IndexByte(data, '\n') 61 | if idx == -1 { 62 | return 0, wal.ErrCorrupt 63 | } 64 | line := data[:idx] 65 | dres := gjson.Get(*(*string)(unsafe.Pointer(&line)), "data") 66 | if dres.Type != gjson.String { 67 | return 0, wal.ErrCorrupt 68 | } 69 | return idx + 1, nil 70 | } 71 | 72 | // loadNextBinaryEntry loads binary data like data_size + data 73 | func loadNextBinaryEntry(data []byte) (n int, err error) { 74 | size, n := binary.Uvarint(data) 75 | if n <= 0 { 76 | return 0, wal.ErrCorrupt 77 | } 78 | if uint64(len(data)-n) < size { 79 | return 0, wal.ErrCorrupt 80 | } 81 | return n + int(size), nil 82 | } 83 | 84 | func channelBatchRecv[T any](ch <-chan *T) []*T { 85 | // block if channel is empty 86 | item := <-ch 87 | if item == nil { 88 | // channel is closed 89 | return nil 90 | } 91 | 92 | remaining := len(ch) 93 | result := make([]*T, 0, remaining+1) 94 | result = append(result, item) 95 | for i := 0; i < remaining; i++ { 96 | result = append(result, <-ch) 97 | } 98 | return result 99 | } 100 | 101 | func MockKVPairs(kvPairs ...string) []*iavl.KVPair { 102 | result := make([]*iavl.KVPair, len(kvPairs)/2) 103 | for i := 0; i < len(kvPairs); i += 2 { 104 | result[i/2] = &iavl.KVPair{ 105 | Key: []byte(kvPairs[i]), 106 | Value: []byte(kvPairs[i+1]), 107 | } 108 | } 109 | return result 110 | } 111 | -------------------------------------------------------------------------------- /stream/types/types.go: -------------------------------------------------------------------------------- 1 | package types 2 | 3 | type Stream[T any] interface { 4 | // Write will write a new entry to the log at the given index. 5 | Write(offset uint64, entry T) error 6 | 7 | // CheckError check the error signal of async writes 8 | CheckError() error 9 | 10 | // TruncateBefore will remove all entries that are before the provided `offset` 11 | TruncateBefore(offset uint64) error 12 | 13 | // TruncateAfter will remove all entries that are after the provided `offset` 14 | TruncateAfter(offset uint64) error 15 | 16 | // ReadAt will read the replay log at the given index 17 | ReadAt(offset uint64) (*T, error) 18 | 19 | // FirstOffset returns the first written index of the log 20 | FirstOffset() (offset uint64, err error) 21 | 22 | // LastOffset returns the last written index of the log 23 | LastOffset() (offset uint64, err error) 24 | 25 | // Replay will read the replay the log and process each entry with the provided function 26 | Replay(start uint64, end uint64, processFn func(index uint64, entry T) error) error 27 | 28 | Close() error 29 | } 30 | 31 | type Subscriber[T any] interface { 32 | // Start starts the subscriber processing goroutine 33 | Start() 34 | 35 | // ProcessEntry will process a new entry either sync or async 36 | ProcessEntry(entry T) error 37 | 38 | // Close will close the subscriber and stop the goroutine 39 | Close() error 40 | } 41 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | UNAME_S := $(shell uname -s) 2 | 3 | ifeq ($(UNAME_S),Darwin) 4 | ROCKSDB_PATH := $(shell brew --prefix rocksdb) 5 | SNAPPY_PATH := $(shell brew --prefix snappy) 6 | LZ4_PATH := $(shell brew --prefix lz4) 7 | ZSTD_PATH := $(shell brew --prefix zstd) 8 | endif 9 | 10 | # NOTE: Requires to sudo apt install -y librocksdb-dev libsnappy-dev liblz4-dev libzstd-dev + sudo apt install -y zlib1g-dev + clone rocksdb + DEBUG_LEVEL=0 make shared_lib install-shared 11 | ifeq ($(UNAME_S),Linux) 12 | ROCKSDB_PATH := /usr/local 13 | SNAPPY_PATH := /usr 14 | LZ4_PATH := /usr 15 | ZSTD_PATH := /usr 16 | endif 17 | 18 | install: 19 | go install ./cmd/seidb 20 | .PHONY: install 21 | 22 | install-all: 23 | CGO_CFLAGS="-I$(ROCKSDB_PATH)/include" CGO_LDFLAGS="-L$(ROCKSDB_PATH)/lib -L$(SNAPPY_PATH)/lib -L$(LZ4_PATH)/lib -L$(ZSTD_PATH)/lib -lrocksdb -lstdc++ -lm -lz -lsnappy -llz4 -lzstd" go install -tags "rocksdbBackend sqliteBackend" ./cmd/seidb 24 | .PHONY: install-all 25 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sei-protocol/sei-db/a90aa3ade77e93ac53b10d0505f6f4180f233478/tools/README.md -------------------------------------------------------------------------------- /tools/cmd/seidb/benchmark/generate.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "fmt" 5 | "io/fs" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | 10 | "github.com/sei-protocol/sei-db/tools/utils" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | func GenerateCmd() *cobra.Command { 15 | generateCmd := &cobra.Command{ 16 | Use: "generate", 17 | Short: "Generate uses the iavl viewer logic to write out the raw keys and values from the kb for each module", 18 | Run: generate, 19 | } 20 | 21 | generateCmd.PersistentFlags().StringP("leveldb-dir", "l", "/root/.sei/data/application.db", "Level db dir") 22 | generateCmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 23 | generateCmd.PersistentFlags().StringP("modules", "m", "", "Comma separated modules to export") 24 | generateCmd.PersistentFlags().IntP("version", "v", 0, "Database Version") 25 | generateCmd.PersistentFlags().IntP("chunk-size", "c", 1000, "KV File Chunk Size") 26 | 27 | return generateCmd 28 | } 29 | 30 | func generate(cmd *cobra.Command, _ []string) { 31 | levelDBDir, _ := cmd.Flags().GetString("leveldb-dir") 32 | outputDir, _ := cmd.Flags().GetString("output-dir") 33 | modules, _ := cmd.Flags().GetString("modules") 34 | version, _ := cmd.Flags().GetInt("version") 35 | chunkSize, _ := cmd.Flags().GetInt("chunk-size") 36 | 37 | if outputDir == "" { 38 | panic("Must provide output dir when generating raw kv data") 39 | } 40 | 41 | // Default to all modules 42 | exportModules := []string{ 43 | "dex", "wasm", "accesscontrol", "oracle", "epoch", "mint", "acc", "bank", "crisis", "feegrant", "staking", "distribution", "slashing", "gov", "params", "ibc", "upgrade", "evidence", "transfer", "tokenfactory", 44 | } 45 | if modules != "" { 46 | exportModules = strings.Split(modules, ",") 47 | } 48 | GenerateData(levelDBDir, exportModules, outputDir, version, chunkSize) 49 | } 50 | 51 | // Outputs the raw keys and values for all modules at a height to a file 52 | func GenerateData(dbDir string, modules []string, outputDir string, version int, chunkSize int) { 53 | // Create output directory 54 | err := os.MkdirAll(outputDir, fs.ModePerm) 55 | if err != nil { 56 | panic(err) 57 | } 58 | 59 | // Generate raw kv data for each module 60 | db, err := utils.OpenDB(dbDir) 61 | if err != nil { 62 | panic(err) 63 | } 64 | for _, module := range modules { 65 | fmt.Printf("Generating Raw Keys and Values for %s module at version %d\n", module, version) 66 | 67 | modulePrefix := fmt.Sprintf("s/k:%s/", module) 68 | tree, err := utils.ReadTree(db, version, []byte(modulePrefix)) 69 | if err != nil { 70 | fmt.Fprintf(os.Stderr, "Error reading data: %s\n", err) 71 | return 72 | } 73 | treeHash, err := tree.Hash() 74 | if err != nil { 75 | fmt.Fprintf(os.Stderr, "Error hashing tree: %s\n", err) 76 | return 77 | } 78 | 79 | fmt.Printf("Tree hash is %X, tree size is %d\n", treeHash, tree.ImmutableTree().Size()) 80 | 81 | outputFileNamePattern := filepath.Join(outputDir, module) 82 | utils.WriteTreeDataToFile(tree, outputFileNamePattern, chunkSize) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /tools/cmd/seidb/benchmark/iteration.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/common/logger" 7 | "github.com/sei-protocol/sei-db/config" 8 | "github.com/sei-protocol/sei-db/ss" 9 | "github.com/sei-protocol/sei-db/tools/dbbackend" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | func DBIterationCmd() *cobra.Command { 14 | benchmarkForwardIterationCmd := &cobra.Command{ 15 | Use: "benchmark-iteration", 16 | Short: "Benchmark iteration is designed to measure forward iteration performance of different db backends", 17 | Run: executeForwardIteration, 18 | } 19 | 20 | benchmarkForwardIterationCmd.PersistentFlags().StringP("db-backend", "d", "", "DB Backend") 21 | benchmarkForwardIterationCmd.PersistentFlags().StringP("raw-kv-input-dir", "r", "", "Input Directory for benchmark which contains the raw kv data") 22 | benchmarkForwardIterationCmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 23 | benchmarkForwardIterationCmd.PersistentFlags().IntP("concurrency", "c", 1, "Concurrency while writing to db") 24 | benchmarkForwardIterationCmd.PersistentFlags().Int64P("max-operations", "p", 1000, "Max operations to run") 25 | benchmarkForwardIterationCmd.PersistentFlags().IntP("num-versions", "v", 1, "number of versions in db") 26 | benchmarkForwardIterationCmd.PersistentFlags().IntP("iteration-steps", "i", 10, "Number of steps to run per iteration") 27 | 28 | return benchmarkForwardIterationCmd 29 | } 30 | 31 | func executeForwardIteration(cmd *cobra.Command, args []string) { 32 | dbBackend, _ := cmd.Flags().GetString("db-backend") 33 | rawKVInputDir, _ := cmd.Flags().GetString("raw-kv-input-dir") 34 | outputDir, _ := cmd.Flags().GetString("output-dir") 35 | numVersions, _ := cmd.Flags().GetInt("num-versions") 36 | concurrency, _ := cmd.Flags().GetInt("concurrency") 37 | maxOps, _ := cmd.Flags().GetInt64("max-operations") 38 | iterationSteps, _ := cmd.Flags().GetInt("iteration-steps") 39 | 40 | if dbBackend == "" { 41 | panic("Must provide db backend when benchmarking") 42 | } 43 | 44 | if rawKVInputDir == "" { 45 | panic("Must provide raw kv input dir when benchmarking") 46 | } 47 | 48 | if outputDir == "" { 49 | panic("Must provide output dir") 50 | } 51 | 52 | _, isAcceptedBackend := ValidDBBackends[dbBackend] 53 | if !isAcceptedBackend { 54 | panic(fmt.Sprintf("Unsupported db backend: %s\n", dbBackend)) 55 | } 56 | 57 | DBIteration(rawKVInputDir, numVersions, outputDir, dbBackend, concurrency, maxOps, iterationSteps) 58 | } 59 | 60 | // BenchmarkDBIteration read latencies and throughput of db backend 61 | func DBIteration(inputKVDir string, numVersions int, outputDir string, dbBackend string, concurrency int, maxOps int64, iterationSteps int) { 62 | // Iterate over db at directory 63 | fmt.Printf("Iterating Over DB at %s\n", outputDir) 64 | ssConfig := config.DefaultStateStoreConfig() 65 | ssConfig.Backend = dbBackend 66 | backend, err := ss.NewStateStore(logger.NewNopLogger(), outputDir, ssConfig) 67 | if err != nil { 68 | panic(err) 69 | } 70 | dbbackend.BenchmarkDBForwardIteration(backend, inputKVDir, numVersions, concurrency, maxOps, iterationSteps) 71 | backend.Close() 72 | } 73 | -------------------------------------------------------------------------------- /tools/cmd/seidb/benchmark/random_read.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "fmt" 5 | "io/fs" 6 | "os" 7 | 8 | "github.com/sei-protocol/sei-db/common/logger" 9 | "github.com/sei-protocol/sei-db/config" 10 | "github.com/sei-protocol/sei-db/ss" 11 | "github.com/sei-protocol/sei-db/tools/dbbackend" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | func DBRandomReadCmd() *cobra.Command { 16 | benchmarkReadCmd := &cobra.Command{ 17 | Use: "benchmark-read", 18 | Short: "Benchmark read is designed to measure read performance of different db backends", 19 | Run: executeRandomRead, 20 | } 21 | 22 | benchmarkReadCmd.PersistentFlags().StringP("db-backend", "d", "", "DB Backend") 23 | benchmarkReadCmd.PersistentFlags().StringP("raw-kv-input-dir", "r", "", "Input Directory for benchmark which contains the raw kv data") 24 | benchmarkReadCmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 25 | benchmarkReadCmd.PersistentFlags().IntP("concurrency", "c", 1, "Concurrency while writing to db") 26 | benchmarkReadCmd.PersistentFlags().Int64P("max-operations", "p", 1000, "Max operations to run") 27 | benchmarkReadCmd.PersistentFlags().IntP("num-versions", "v", 1, "number of versions in db") 28 | 29 | return benchmarkReadCmd 30 | } 31 | 32 | func executeRandomRead(cmd *cobra.Command, args []string) { 33 | dbBackend, _ := cmd.Flags().GetString("db-backend") 34 | rawKVInputDir, _ := cmd.Flags().GetString("raw-kv-input-dir") 35 | outputDir, _ := cmd.Flags().GetString("output-dir") 36 | numVersions, _ := cmd.Flags().GetInt("num-versions") 37 | concurrency, _ := cmd.Flags().GetInt("concurrency") 38 | maxOps, _ := cmd.Flags().GetInt64("max-operations") 39 | 40 | if dbBackend == "" { 41 | panic("Must provide db backend when benchmarking") 42 | } 43 | 44 | if rawKVInputDir == "" { 45 | panic("Must provide raw kv input dir when benchmarking") 46 | } 47 | 48 | if outputDir == "" { 49 | panic("Must provide output dir") 50 | } 51 | 52 | _, isAcceptedBackend := ValidDBBackends[dbBackend] 53 | if !isAcceptedBackend { 54 | panic(fmt.Sprintf("Unsupported db backend: %s\n", dbBackend)) 55 | } 56 | 57 | DBRandomRead(rawKVInputDir, numVersions, outputDir, dbBackend, concurrency, maxOps) 58 | } 59 | 60 | // BenchmarkRead read latencies and throughput of db backend 61 | func DBRandomRead(inputKVDir string, numVersions int, outputDir string, dbBackend string, concurrency int, maxOps int64) { 62 | // Create output directory 63 | err := os.MkdirAll(outputDir, fs.ModePerm) 64 | if err != nil { 65 | panic(err) 66 | } 67 | // Iterate over files in directory 68 | fmt.Printf("Reading Raw Keys and Values from %s\n", inputKVDir) 69 | ssConfig := config.DefaultStateStoreConfig() 70 | ssConfig.Backend = dbBackend 71 | backend, err := ss.NewStateStore(logger.NewNopLogger(), outputDir, ssConfig) 72 | if err != nil { 73 | panic(err) 74 | } 75 | dbbackend.BenchmarkDBRead(backend, inputKVDir, numVersions, concurrency, maxOps) 76 | backend.Close() 77 | } 78 | -------------------------------------------------------------------------------- /tools/cmd/seidb/benchmark/reverse_iteration.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/common/logger" 7 | "github.com/sei-protocol/sei-db/config" 8 | "github.com/sei-protocol/sei-db/ss" 9 | "github.com/sei-protocol/sei-db/tools/dbbackend" 10 | "github.com/spf13/cobra" 11 | ) 12 | 13 | func DBReverseIterationCmd() *cobra.Command { 14 | benchmarkReverseIterationCmd := &cobra.Command{ 15 | Use: "benchmark-reverse-iteration", 16 | Short: "Benchmark reverse iteration is designed to measure reverse iteration performance of different db backends", 17 | Run: executeReverseIteration, 18 | } 19 | 20 | benchmarkReverseIterationCmd.PersistentFlags().StringP("db-backend", "d", "", "DB Backend") 21 | benchmarkReverseIterationCmd.PersistentFlags().StringP("raw-kv-input-dir", "r", "", "Input Directory for benchmark which contains the raw kv data") 22 | benchmarkReverseIterationCmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 23 | benchmarkReverseIterationCmd.PersistentFlags().IntP("concurrency", "c", 1, "Concurrency while writing to db") 24 | benchmarkReverseIterationCmd.PersistentFlags().Int64P("max-operations", "p", 1000, "Max operations to run") 25 | benchmarkReverseIterationCmd.PersistentFlags().IntP("num-versions", "v", 1, "number of versions in db") 26 | benchmarkReverseIterationCmd.PersistentFlags().IntP("iteration-steps", "i", 10, "Number of steps to run per iteration") 27 | 28 | return benchmarkReverseIterationCmd 29 | } 30 | 31 | func executeReverseIteration(cmd *cobra.Command, args []string) { 32 | dbBackend, _ := cmd.Flags().GetString("db-backend") 33 | rawKVInputDir, _ := cmd.Flags().GetString("raw-kv-input-dir") 34 | outputDir, _ := cmd.Flags().GetString("output-dir") 35 | numVersions, _ := cmd.Flags().GetInt("num-versions") 36 | concurrency, _ := cmd.Flags().GetInt("concurrency") 37 | maxOps, _ := cmd.Flags().GetInt64("max-operations") 38 | iterationSteps, _ := cmd.Flags().GetInt("iteration-steps") 39 | 40 | if dbBackend == "" { 41 | panic("Must provide db backend when benchmarking") 42 | } 43 | 44 | if rawKVInputDir == "" { 45 | panic("Must provide raw kv input dir when benchmarking") 46 | } 47 | 48 | if outputDir == "" { 49 | panic("Must provide output dir") 50 | } 51 | 52 | _, isAcceptedBackend := ValidDBBackends[dbBackend] 53 | if !isAcceptedBackend { 54 | panic(fmt.Sprintf("Unsupported db backend: %s\n", dbBackend)) 55 | } 56 | 57 | DBReverseIteration(rawKVInputDir, numVersions, outputDir, dbBackend, concurrency, maxOps, iterationSteps) 58 | } 59 | 60 | // BenchmarkDBReverseIteration reverse iteration performance of db backend 61 | func DBReverseIteration(inputKVDir string, numVersions int, outputDir string, dbBackend string, concurrency int, maxOps int64, iterationSteps int) { 62 | // Reverse Iterate over db at directory 63 | fmt.Printf("Iterating Over DB at %s\n", outputDir) 64 | ssConfig := config.DefaultStateStoreConfig() 65 | ssConfig.Backend = dbBackend 66 | backend, err := ss.NewStateStore(logger.NewNopLogger(), outputDir, ssConfig) 67 | if err != nil { 68 | panic(err) 69 | } 70 | dbbackend.BenchmarkDBReverseIteration(backend, inputKVDir, numVersions, concurrency, maxOps, iterationSteps) 71 | backend.Close() 72 | } 73 | -------------------------------------------------------------------------------- /tools/cmd/seidb/benchmark/types.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | const RocksDBBackendName = "rocksdb" 4 | const PebbleDBBackendName = "pebbledb" 5 | 6 | var ( 7 | 8 | // TODO: Will include rocksdb, pebbledb and sqlite in future PR's 9 | ValidDBBackends = map[string]bool{ 10 | RocksDBBackendName: true, 11 | PebbleDBBackendName: true, 12 | } 13 | ) 14 | -------------------------------------------------------------------------------- /tools/cmd/seidb/benchmark/write.go: -------------------------------------------------------------------------------- 1 | package benchmark 2 | 3 | import ( 4 | "fmt" 5 | "io/fs" 6 | "os" 7 | 8 | "github.com/sei-protocol/sei-db/common/logger" 9 | "github.com/sei-protocol/sei-db/config" 10 | "github.com/sei-protocol/sei-db/ss" 11 | "github.com/sei-protocol/sei-db/tools/dbbackend" 12 | "github.com/spf13/cobra" 13 | ) 14 | 15 | func DBWriteCmd() *cobra.Command { 16 | benchmarkWriteCmd := &cobra.Command{ 17 | Use: "benchmark-write", 18 | Short: "Benchmark write is designed to measure write performance of different db backends", 19 | Run: executeWrite, 20 | } 21 | 22 | benchmarkWriteCmd.PersistentFlags().StringP("db-backend", "d", "", "DB Backend") 23 | benchmarkWriteCmd.PersistentFlags().StringP("raw-kv-input-dir", "r", "", "Input Directory for benchmark which contains the raw kv data") 24 | benchmarkWriteCmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 25 | benchmarkWriteCmd.PersistentFlags().IntP("concurrency", "c", 1, "Concurrency while writing to db") 26 | benchmarkWriteCmd.PersistentFlags().IntP("batch-size", "b", 1, "batch size for db writes") 27 | benchmarkWriteCmd.PersistentFlags().IntP("num-versions", "v", 1, "number of versions in db") 28 | 29 | return benchmarkWriteCmd 30 | } 31 | 32 | func executeWrite(cmd *cobra.Command, args []string) { 33 | dbBackend, _ := cmd.Flags().GetString("db-backend") 34 | rawKVInputDir, _ := cmd.Flags().GetString("raw-kv-input-dir") 35 | outputDir, _ := cmd.Flags().GetString("output-dir") 36 | numVersions, _ := cmd.Flags().GetInt("num-versions") 37 | concurrency, _ := cmd.Flags().GetInt("concurrency") 38 | batchSize, _ := cmd.Flags().GetInt("batch-size") 39 | 40 | if dbBackend == "" { 41 | panic("Must provide db backend when benchmarking") 42 | } 43 | 44 | if rawKVInputDir == "" { 45 | panic("Must provide raw kv input dir when benchmarking") 46 | } 47 | 48 | if outputDir == "" { 49 | panic("Must provide output dir") 50 | } 51 | 52 | _, isAcceptedBackend := ValidDBBackends[dbBackend] 53 | if !isAcceptedBackend { 54 | panic(fmt.Sprintf("Unsupported db backend: %s\n", dbBackend)) 55 | } 56 | 57 | DBWrite(rawKVInputDir, numVersions, outputDir, dbBackend, concurrency, batchSize) 58 | } 59 | 60 | // BenchmarkWrite write latencies and throughput of db backend 61 | func DBWrite(inputKVDir string, numVersions int, outputDir string, dbBackend string, concurrency int, batchSize int) { 62 | // Create output directory 63 | err := os.MkdirAll(outputDir, fs.ModePerm) 64 | if err != nil { 65 | panic(err) 66 | } 67 | // Iterate over files in directory 68 | fmt.Printf("Reading Raw Keys and Values from %s\n", inputKVDir) 69 | ssConfig := config.DefaultStateStoreConfig() 70 | ssConfig.Backend = dbBackend 71 | backend, err := ss.NewStateStore(logger.NewNopLogger(), outputDir, ssConfig) 72 | if err != nil { 73 | panic(err) 74 | } 75 | dbbackend.BenchmarkDBWrite(backend, inputKVDir, numVersions, concurrency, batchSize) 76 | backend.Close() 77 | } 78 | -------------------------------------------------------------------------------- /tools/cmd/seidb/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/sei-protocol/sei-db/tools/cmd/seidb/benchmark" 8 | "github.com/sei-protocol/sei-db/tools/cmd/seidb/operations" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | func main() { 13 | rootCmd := &cobra.Command{ 14 | Use: "seidb", 15 | Short: "A tool to generate raw key value data from a node as well as benchmark different backends", 16 | } 17 | 18 | rootCmd.AddCommand( 19 | benchmark.GenerateCmd(), 20 | benchmark.DBWriteCmd(), 21 | benchmark.DBRandomReadCmd(), 22 | benchmark.DBIterationCmd(), 23 | benchmark.DBReverseIterationCmd(), 24 | operations.DumpDbCmd(), 25 | operations.PruneCmd(), 26 | operations.DumpIAVLCmd(), 27 | operations.StateSizeCmd(), 28 | operations.ReplayChangelogCmd()) 29 | if err := rootCmd.Execute(); err != nil { 30 | fmt.Println(err) 31 | os.Exit(1) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tools/cmd/seidb/operations/dump_db.go: -------------------------------------------------------------------------------- 1 | package operations 2 | 3 | import ( 4 | "fmt" 5 | "io/fs" 6 | "os" 7 | 8 | "github.com/sei-protocol/sei-db/common/logger" 9 | "github.com/sei-protocol/sei-db/config" 10 | "github.com/sei-protocol/sei-db/ss" 11 | "github.com/sei-protocol/sei-db/tools/cmd/seidb/benchmark" 12 | "github.com/sei-protocol/sei-db/tools/utils" 13 | "github.com/spf13/cobra" 14 | ) 15 | 16 | const outputFileName = "db_dump.kv" 17 | 18 | func DumpDbCmd() *cobra.Command { 19 | dumpDbCmd := &cobra.Command{ 20 | Use: "dump-db", 21 | Short: "For a given State Store DB, dump-db iterates over all keys and values for a specific store and writes them to a file", 22 | Run: executeDumpDB, 23 | } 24 | 25 | dumpDbCmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 26 | dumpDbCmd.PersistentFlags().StringP("db-dir", "d", "", "Database Directory") 27 | // TODO: Accept multiple modules. Can pass empty to iterate over all stores 28 | dumpDbCmd.PersistentFlags().StringP("module", "m", "", "Module to export. Leave empty to export all") 29 | dumpDbCmd.PersistentFlags().StringP("db-backend", "b", "", "DB Backend") 30 | 31 | return dumpDbCmd 32 | } 33 | 34 | func executeDumpDB(cmd *cobra.Command, _ []string) { 35 | outputDir, _ := cmd.Flags().GetString("output-dir") 36 | module, _ := cmd.Flags().GetString("module") 37 | dbDir, _ := cmd.Flags().GetString("db-dir") 38 | dbBackend, _ := cmd.Flags().GetString("db-backend") 39 | 40 | if dbDir == "" { 41 | panic("Must provide database dir") 42 | } 43 | 44 | if dbBackend == "" { 45 | panic("Must provide db backend") 46 | } 47 | 48 | _, isAcceptedBackend := benchmark.ValidDBBackends[dbBackend] 49 | if !isAcceptedBackend { 50 | panic(fmt.Sprintf("Unsupported db backend: %s\n", dbBackend)) 51 | } 52 | 53 | if outputDir == "" { 54 | panic("Must provide output dir when generating db export") 55 | } 56 | 57 | DumpDbData(dbBackend, module, outputDir, dbDir) 58 | } 59 | 60 | // Outputs the raw keys and values for all modules at a height to a file 61 | func DumpDbData(dbBackend string, module string, outputDir string, dbDir string) { 62 | // Create output directory 63 | err := os.MkdirAll(outputDir, fs.ModePerm) 64 | if err != nil { 65 | panic(err) 66 | } 67 | 68 | // Create output file 69 | currentFile, err := utils.CreateFile(outputDir, outputFileName) 70 | if err != nil { 71 | panic(err) 72 | } 73 | defer currentFile.Close() 74 | 75 | // TODO: Defer Close Db 76 | ssConfig := config.DefaultStateStoreConfig() 77 | ssConfig.Backend = dbBackend 78 | backend, err := ss.NewStateStore(logger.NewNopLogger(), outputDir, ssConfig) 79 | if err != nil { 80 | panic(err) 81 | } 82 | 83 | fmt.Printf("Writing db data to %s...\n", outputFileName) 84 | 85 | // Callback to write db entries to file 86 | _, err = backend.RawIterate(module, func(key, value []byte, version int64) bool { 87 | _, err = currentFile.WriteString(fmt.Sprintf("Key: %X Val: %X Version: %d\n", key, value, version)) 88 | if err != nil { 89 | panic(err) 90 | } 91 | return false 92 | }) 93 | if err != nil { 94 | panic(err) 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /tools/cmd/seidb/operations/dump_iavl.go: -------------------------------------------------------------------------------- 1 | package operations 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/common/logger" 7 | "github.com/sei-protocol/sei-db/sc/memiavl" 8 | "github.com/sei-protocol/sei-db/tools/utils" 9 | "github.com/spf13/cobra" 10 | ) 11 | 12 | func DumpIAVLCmd() *cobra.Command { 13 | cmd := &cobra.Command{ 14 | Use: "dump-iavl", 15 | Short: "Iterate and dump memIAVL data", 16 | Run: executeDumpIAVL, 17 | } 18 | 19 | cmd.PersistentFlags().StringP("db-dir", "d", "", "Database Directory") 20 | cmd.PersistentFlags().StringP("output-dir", "o", "", "Output Directory") 21 | cmd.PersistentFlags().Int64("height", 0, "Block Height") 22 | cmd.PersistentFlags().StringP("module", "m", "", "Module to export. Default to export all") 23 | return cmd 24 | } 25 | 26 | func executeDumpIAVL(cmd *cobra.Command, _ []string) { 27 | module, _ := cmd.Flags().GetString("module") 28 | dbDir, _ := cmd.Flags().GetString("db-dir") 29 | outputDir, _ := cmd.Flags().GetString("output-dir") 30 | height, _ := cmd.Flags().GetInt64("height") 31 | 32 | if dbDir == "" { 33 | panic("Must provide database dir") 34 | } 35 | 36 | if outputDir == "" { 37 | panic("Must provide output dir") 38 | } 39 | 40 | opts := memiavl.Options{ 41 | Dir: dbDir, 42 | ZeroCopy: true, 43 | CreateIfMissing: false, 44 | } 45 | db, err := memiavl.OpenDB(logger.NewNopLogger(), height, opts) 46 | if err != nil { 47 | panic(err) 48 | } 49 | defer db.Close() 50 | err = DumpIAVLData(module, db, outputDir) 51 | if err != nil { 52 | panic(err) 53 | } 54 | } 55 | 56 | // DumpIAVLData print the raw keys and values for given module at given height for memIAVL tree 57 | func DumpIAVLData(module string, db *memiavl.DB, outputDir string) error { 58 | modules := []string{} 59 | if module == "" { 60 | modules = AllModules 61 | } else { 62 | modules = append(modules, module) 63 | } 64 | 65 | for _, moduleName := range modules { 66 | tree := db.TreeByName(moduleName) 67 | if tree == nil { 68 | fmt.Printf("Tree does not exist for module %s \n", moduleName) 69 | } else { 70 | fmt.Printf("Dumping module: %s \n", moduleName) 71 | currentFile, err := utils.CreateFile(outputDir, moduleName) 72 | if err != nil { 73 | return err 74 | } 75 | _, err = currentFile.WriteString(fmt.Sprintf("Tree %s has version %d and root hash: %X \n", moduleName, tree.Version(), tree.RootHash())) 76 | if err != nil { 77 | return nil 78 | } 79 | tree.ScanPostOrder(func(node memiavl.Node) bool { 80 | if node.IsLeaf() { 81 | _, err := currentFile.WriteString(fmt.Sprintf("Key: %X, Value: %X \n", node.Key(), node.Value())) 82 | if err != nil { 83 | panic(err) 84 | } 85 | } 86 | return true 87 | }) 88 | currentFile.Close() 89 | fmt.Printf("Finished dumping module: %s \n", moduleName) 90 | } 91 | } 92 | return nil 93 | } 94 | -------------------------------------------------------------------------------- /tools/cmd/seidb/operations/module.go: -------------------------------------------------------------------------------- 1 | package operations 2 | 3 | var AllModules = []string{ 4 | "evm", "dex", "wasm", "aclaccesscontrol", "oracle", "epoch", "mint", "acc", "bank", "crisis", "feegrant", "staking", "distribution", "slashing", "gov", "params", "ibc", "upgrade", "evidence", "transfer", "tokenfactory", 5 | } 6 | -------------------------------------------------------------------------------- /tools/cmd/seidb/operations/prune.go: -------------------------------------------------------------------------------- 1 | package operations 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/sei-protocol/sei-db/common/logger" 7 | 8 | "github.com/sei-protocol/sei-db/config" 9 | "github.com/sei-protocol/sei-db/ss" 10 | "github.com/sei-protocol/sei-db/tools/cmd/seidb/benchmark" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | func PruneCmd() *cobra.Command { 15 | pruneDbCmd := &cobra.Command{ 16 | Use: "prune", 17 | Short: "Prune a db at a given height", 18 | Run: executePrune, 19 | } 20 | 21 | pruneDbCmd.PersistentFlags().StringP("db-dir", "d", "", "Database Directory") 22 | pruneDbCmd.PersistentFlags().StringP("db-backend", "b", "", "DB Backend") 23 | pruneDbCmd.PersistentFlags().Int64P("version", "v", 0, "Version to prune at") 24 | 25 | return pruneDbCmd 26 | } 27 | 28 | func executePrune(cmd *cobra.Command, _ []string) { 29 | dbDir, _ := cmd.Flags().GetString("db-dir") 30 | dbBackend, _ := cmd.Flags().GetString("db-backend") 31 | version, _ := cmd.Flags().GetInt64("version") 32 | 33 | if dbDir == "" { 34 | panic("Must provide database dir") 35 | } 36 | 37 | if dbBackend == "" { 38 | panic("Must provide db backend") 39 | } 40 | 41 | _, isAcceptedBackend := benchmark.ValidDBBackends[dbBackend] 42 | if !isAcceptedBackend { 43 | panic(fmt.Sprintf("Unsupported db backend: %s\n", dbBackend)) 44 | } 45 | 46 | if version == 0 { 47 | panic("Must provide prune version") 48 | } 49 | 50 | PruneDB(dbBackend, dbDir, version) 51 | } 52 | 53 | // Prunes DB at given height 54 | func PruneDB(dbBackend string, dbDir string, version int64) { 55 | // TODO: Defer Close Db 56 | ssConfig := config.DefaultStateStoreConfig() 57 | ssConfig.Backend = dbBackend 58 | backend, err := ss.NewStateStore(logger.NewNopLogger(), dbDir, ssConfig) 59 | if err != nil { 60 | panic(err) 61 | } 62 | 63 | fmt.Printf("Pruning %s db at path %s at height %d...\n", dbBackend, dbDir, version) 64 | 65 | // Callback to write db entries to file 66 | err = backend.Prune(version) 67 | if err != nil { 68 | panic(err) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /tools/cmd/seidb/operations/replay_changelog.go: -------------------------------------------------------------------------------- 1 | package operations 2 | 3 | import ( 4 | "fmt" 5 | "path/filepath" 6 | 7 | "github.com/sei-protocol/sei-db/common/logger" 8 | "github.com/sei-protocol/sei-db/config" 9 | "github.com/sei-protocol/sei-db/proto" 10 | "github.com/sei-protocol/sei-db/ss" 11 | "github.com/sei-protocol/sei-db/ss/types" 12 | "github.com/sei-protocol/sei-db/stream/changelog" 13 | "github.com/spf13/cobra" 14 | ) 15 | 16 | var ssStore types.StateStore 17 | var dryRun = true 18 | 19 | func ReplayChangelogCmd() *cobra.Command { 20 | dumpDbCmd := &cobra.Command{ 21 | Use: "replay-changelog", 22 | Short: "Scan the changelog to replay and recover pebbledb data", 23 | Run: executeReplayChangelog, 24 | } 25 | 26 | dumpDbCmd.PersistentFlags().StringP("db-dir", "d", "", "Database Directory") 27 | dumpDbCmd.PersistentFlags().Int64P("start-offset", "s", 0, "Start offset, default to earliest offset") 28 | dumpDbCmd.PersistentFlags().Int64P("end-offset", "e", 0, "End offset, default to latest offset") 29 | dumpDbCmd.PersistentFlags().Bool("no-dry-run", false, "Whether to dry run or re-apply the changelog to DB") 30 | 31 | return dumpDbCmd 32 | } 33 | 34 | func executeReplayChangelog(cmd *cobra.Command, _ []string) { 35 | dbDir, _ := cmd.Flags().GetString("db-dir") 36 | start, _ := cmd.Flags().GetUint64("start-offset") 37 | end, _ := cmd.Flags().GetUint64("end-offset") 38 | noDryRun, _ := cmd.Flags().GetBool("no-dry-run") 39 | if dbDir == "" { 40 | panic("Must provide database dir") 41 | } 42 | 43 | logDir := filepath.Join(dbDir, "changelog") 44 | stream, err := changelog.NewStream(logger.NewNopLogger(), logDir, changelog.Config{}) 45 | if err != nil { 46 | panic(err) 47 | } 48 | 49 | // use first available offset 50 | if start <= 0 { 51 | startOffset, err := stream.FirstOffset() 52 | if err != nil { 53 | panic(err) 54 | } 55 | start = startOffset 56 | } 57 | 58 | if end <= 0 { 59 | // use latest offset 60 | endOffset, err := stream.LastOffset() 61 | if err != nil { 62 | panic(err) 63 | } 64 | end = endOffset 65 | } 66 | 67 | // open the database if this is not a dry run 68 | if noDryRun { 69 | dryRun = false 70 | ssConfig := config.DefaultStateStoreConfig() 71 | ssConfig.KeepRecent = 0 72 | ssConfig.DBDirectory = dbDir 73 | ssStore, err = ss.NewStateStore(logger.NewNopLogger(), dbDir, ssConfig) 74 | if err != nil { 75 | panic(err) 76 | } 77 | } 78 | 79 | // replay the changelog 80 | err = stream.Replay(start, end, processChangelogEntry) 81 | if err != nil { 82 | panic(err) 83 | } 84 | 85 | // close the database 86 | if ssStore != nil { 87 | ssStore.Close() 88 | } 89 | 90 | } 91 | 92 | func processChangelogEntry(index uint64, entry proto.ChangelogEntry) error { 93 | fmt.Printf("Offset: %d, Height: %d\n", index, entry.Version) 94 | for _, changeset := range entry.Changesets { 95 | storeName := changeset.Name 96 | for _, kv := range changeset.Changeset.Pairs { 97 | if dryRun { 98 | fmt.Printf("store: %s, key: %X\n", storeName, kv.Key) 99 | } 100 | } 101 | if ssStore != nil { 102 | fmt.Printf("Re-applied changeset for height %d\n", entry.Version) 103 | err := ssStore.ApplyChangeset(entry.Version, changeset) 104 | if err != nil { 105 | return err 106 | } 107 | } 108 | } 109 | return nil 110 | } 111 | -------------------------------------------------------------------------------- /tools/cmd/seidb/operations/state_size.go: -------------------------------------------------------------------------------- 1 | package operations 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "sort" 7 | "strings" 8 | 9 | "github.com/sei-protocol/sei-db/common/logger" 10 | "github.com/sei-protocol/sei-db/sc/memiavl" 11 | "github.com/spf13/cobra" 12 | ) 13 | 14 | func StateSizeCmd() *cobra.Command { 15 | cmd := &cobra.Command{ 16 | Use: "state-size", 17 | Short: "Print analytical results for state size", 18 | Run: executeStateSize, 19 | } 20 | 21 | cmd.PersistentFlags().StringP("db-dir", "d", "", "Database Directory") 22 | cmd.PersistentFlags().Int64("height", 0, "Block Height") 23 | cmd.PersistentFlags().StringP("module", "m", "", "Module to export. Default to export all") 24 | return cmd 25 | } 26 | 27 | func executeStateSize(cmd *cobra.Command, _ []string) { 28 | module, _ := cmd.Flags().GetString("module") 29 | dbDir, _ := cmd.Flags().GetString("db-dir") 30 | height, _ := cmd.Flags().GetInt64("height") 31 | if dbDir == "" { 32 | panic("Must provide database dir") 33 | } 34 | 35 | opts := memiavl.Options{ 36 | Dir: dbDir, 37 | ZeroCopy: true, 38 | CreateIfMissing: false, 39 | } 40 | db, err := memiavl.OpenDB(logger.NewNopLogger(), height, opts) 41 | if err != nil { 42 | panic(err) 43 | } 44 | defer db.Close() 45 | err = PrintStateSize(module, db) 46 | if err != nil { 47 | panic(err) 48 | } 49 | } 50 | 51 | // PrintStateSize print the raw keys and values for given module at given height for memIAVL tree 52 | func PrintStateSize(module string, db *memiavl.DB) error { 53 | modules := []string{} 54 | if module == "" { 55 | modules = AllModules 56 | } else { 57 | modules = append(modules, module) 58 | } 59 | 60 | for _, moduleName := range modules { 61 | tree := db.TreeByName(moduleName) 62 | totalNumKeys := 0 63 | totalKeySize := 0 64 | totalValueSize := 0 65 | totalSize := 0 66 | if tree == nil { 67 | fmt.Printf("Tree does not exist for module %s \n", moduleName) 68 | } else { 69 | fmt.Printf("Calculating for module: %s \n", moduleName) 70 | keySizeByPrefix := map[string]int64{} 71 | valueSizeByPrefix := map[string]int64{} 72 | numKeysByPrefix := map[string]int64{} 73 | tree.ScanPostOrder(func(node memiavl.Node) bool { 74 | if node.IsLeaf() { 75 | totalNumKeys++ 76 | keySize := len(node.Key()) 77 | valueSize := len(node.Value()) 78 | totalKeySize += keySize 79 | totalValueSize += valueSize 80 | totalSize += keySize + valueSize 81 | prefix := fmt.Sprintf("%X", node.Key()) 82 | prefix = prefix[:2] 83 | keySizeByPrefix[prefix] += int64(keySize) 84 | valueSizeByPrefix[prefix] += int64(valueSize) 85 | numKeysByPrefix[prefix]++ 86 | } 87 | return true 88 | }) 89 | fmt.Printf("Module %s total numKeys:%d, total keySize:%d, total valueSize:%d, totalSize: %d \n", moduleName, totalNumKeys, totalKeySize, totalValueSize, totalSize) 90 | prefixKeyResult, _ := json.MarshalIndent(keySizeByPrefix, "", " ") 91 | fmt.Printf("Module %s prefix key size breakdown (bytes): %s \n", moduleName, prefixKeyResult) 92 | prefixValueResult, _ := json.MarshalIndent(valueSizeByPrefix, "", " ") 93 | fmt.Printf("Module %s prefix value size breakdown (bytes): %s \n", moduleName, prefixValueResult) 94 | numKeysResult, _ := json.MarshalIndent(numKeysByPrefix, "", " ") 95 | fmt.Printf("Module %s prefix num of keys breakdown: %s \n", moduleName, numKeysResult) 96 | 97 | // Print top 20 contracts by total size 98 | numToShow := 20 99 | if valueSizeByPrefix["03"] > 0 || keySizeByPrefix["03"] > 0 { 100 | type contractSizeEntry struct { 101 | Address string 102 | KeySize int64 103 | ValueSize int64 104 | TotalSize int64 105 | KeyCount int 106 | } 107 | 108 | contractSizes := make(map[string]*contractSizeEntry) 109 | 110 | // Scan again to collect per-contract statistics 111 | tree.ScanPostOrder(func(node memiavl.Node) bool { 112 | if node.IsLeaf() { 113 | prefix := fmt.Sprintf("%X", node.Key()) 114 | if prefix[:2] == "03" { 115 | // Extract contract address from key (assuming it follows after "03") 116 | addr := prefix[2:42] // Adjust indices based on your key format 117 | if _, exists := contractSizes[addr]; !exists { 118 | contractSizes[addr] = &contractSizeEntry{Address: addr} 119 | } 120 | entry := contractSizes[addr] 121 | entry.KeySize += int64(len(node.Key())) 122 | entry.ValueSize += int64(len(node.Value())) 123 | entry.TotalSize = entry.KeySize + entry.ValueSize 124 | entry.KeyCount++ 125 | } 126 | } 127 | return true 128 | }) 129 | 130 | // Convert map to slice 131 | var sortedContracts []contractSizeEntry 132 | for _, entry := range contractSizes { 133 | sortedContracts = append(sortedContracts, *entry) 134 | } 135 | 136 | // Sort by total size in descending order 137 | sort.Slice(sortedContracts, func(i, j int) bool { 138 | return sortedContracts[i].TotalSize > sortedContracts[j].TotalSize 139 | }) 140 | 141 | fmt.Printf("\nDetailed breakdown for 0x03 prefix (top 20 contracts by total size):\n") 142 | fmt.Printf("%-42s %15s %15s %15s %10s\n", "Contract Address", "Key Size", "Value Size", "Total Size", "Key Count") 143 | fmt.Printf("%s\n", strings.Repeat("-", 100)) 144 | 145 | if len(sortedContracts) < numToShow { 146 | numToShow = len(sortedContracts) 147 | } 148 | for i := 0; i < numToShow; i++ { 149 | contract := sortedContracts[i] 150 | fmt.Printf("0x%-40s %15d %15d %15d %10d\n", 151 | contract.Address, 152 | contract.KeySize, 153 | contract.ValueSize, 154 | contract.TotalSize, 155 | contract.KeyCount) 156 | } 157 | } 158 | } 159 | } 160 | return nil 161 | } 162 | -------------------------------------------------------------------------------- /tools/dbbackend/backend.go: -------------------------------------------------------------------------------- 1 | package dbbackend 2 | 3 | type DBBackend interface { 4 | BenchmarkDBWrite(inputKVDir string, numVersions int, outputDBPath string, concurrency int, batchSize int) 5 | BenchmarkDBRead(inputKVDir string, numVersions int, outputDBPath string, concurrency int, maxOps int64) 6 | BenchmarkDBForwardIteration(inputKVDir string, numVersions int, outputDBPath string, concurrency int, maxOps int64, iterationSteps int) 7 | BenchmarkDBReverseIteration(inputKVDir string, numVersions int, outputDBPath string, concurrency int, maxOps int64, iterationSteps int) 8 | } 9 | --------------------------------------------------------------------------------