├── devel ├── .gitignore ├── eth-block-meta │ ├── clean.sql │ ├── schema.sql │ ├── substreams.dev.yaml │ └── start.sh ├── clickhouse-server │ ├── config.xml │ └── users.xml ├── substreams-sink-sql ├── up.sh └── tutorial │ └── start.sh ├── .sfreleaser ├── buf.work.yaml ├── db_proto ├── test │ └── substreams │ │ ├── full-block │ │ ├── src │ │ │ ├── pb │ │ │ │ ├── google.protobuf.rs │ │ │ │ ├── .last_generated_hash │ │ │ │ ├── sf.substreams.index.v1.rs │ │ │ │ ├── sf.substreams.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── sf.substreams.sink.sql.v1.rs │ │ │ │ └── sf.substreams.sink.sql.service.v1.rs │ │ │ └── lib.rs │ │ ├── .gitignore │ │ ├── buf.gen.yaml │ │ ├── README.md │ │ ├── Cargo.toml │ │ └── substreams.yaml │ │ └── order │ │ ├── generator.json │ │ ├── .gitignore │ │ ├── buf.gen.yaml │ │ ├── src │ │ ├── pb │ │ │ ├── sf.substreams.solana.v1.rs │ │ │ ├── sf.substreams.rs │ │ │ ├── sf.substreams.sink.sql.schema.v1.rs │ │ │ └── mod.rs │ │ └── lib.rs │ │ ├── README.md │ │ ├── Cargo.toml │ │ ├── substreams.yaml │ │ └── proto │ │ └── test │ │ └── relations │ │ └── relations.proto ├── sql │ ├── inserter.go │ ├── postgres │ │ ├── inserter.go │ │ ├── accumulator_inserter.go │ │ ├── types.go │ │ └── row_inserter.go │ ├── context.go │ ├── constraint.go │ ├── utils.go │ ├── dialect.go │ └── schema │ │ ├── column.go │ │ ├── table.go │ │ └── schema.go ├── proto │ └── utils.go ├── stats │ └── stats.go └── sinker_factory.go ├── proto ├── buf.yaml ├── buf.lock ├── utils.go └── sf │ └── substreams │ └── sink │ └── sql │ ├── v1 │ └── deprecated.proto │ └── services │ └── v1 │ └── services.proto ├── buf.gen.yaml ├── db_changes ├── bundler │ ├── writer │ │ ├── init_test.go │ │ ├── interface.go │ │ ├── common.go │ │ ├── types.go │ │ └── buffered_test.go │ ├── encoder.go │ ├── stats.go │ ├── bundler_test.go │ └── bundler.go ├── db │ ├── init_test.go │ ├── metrics.go │ ├── user.go │ ├── ops_test.go │ ├── dialect.go │ ├── types.go │ ├── dialect_clickhouse_test.go │ ├── dsn_test.go │ ├── flush.go │ ├── types_enum.go │ ├── testing.go │ ├── operations.go │ ├── dsn.go │ ├── operations_test.go │ └── cursor.go ├── state │ ├── interface.go │ └── file.go └── sinker │ ├── metrics.go │ ├── factory.go │ ├── setup.go │ └── stats.go ├── .gitignore ├── cmd └── substreams-sink-sql │ ├── logging.go │ ├── setup.go │ ├── main.go │ ├── create_user.go │ ├── common_flags.go │ ├── run.go │ └── generate_csv.go ├── substreams.yaml ├── Dockerfile ├── services ├── runner.go └── dbt.go ├── .github └── workflows │ ├── build.yml │ └── docker.yml ├── shared.go ├── docker-compose.yml ├── bytes └── encoding.go └── tests └── integration └── substreams_grpc_mock_test.go /devel/.gitignore: -------------------------------------------------------------------------------- 1 | data/ -------------------------------------------------------------------------------- /.sfreleaser: -------------------------------------------------------------------------------- 1 | global: 2 | language: golang 3 | variant: application 4 | -------------------------------------------------------------------------------- /buf.work.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | directories: 3 | - proto 4 | - db_proto/test/substreams/order/proto -------------------------------------------------------------------------------- /devel/eth-block-meta/clean.sql: -------------------------------------------------------------------------------- 1 | drop table if exists block_meta; 2 | drop table if exists cursors; 3 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/google.protobuf.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // @@protoc_insertion_point(module) 3 | -------------------------------------------------------------------------------- /db_proto/sql/inserter.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | type Inserter interface { 4 | Insert(table string, values []any) error 5 | } 6 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/.last_generated_hash: -------------------------------------------------------------------------------- 1 | 6bcb4ea9d4b388b71e1d07a521cd0734fcf2ae1739c85b9a741e5f113a302388 -------------------------------------------------------------------------------- /db_proto/test/substreams/order/generator.json: -------------------------------------------------------------------------------- 1 | { 2 | "generator": "sol-minimal", 3 | "state": { 4 | "name": "order", 5 | "chainName": "" 6 | } 7 | } -------------------------------------------------------------------------------- /proto/buf.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | name: buf.build/streamingfast/substreams-sink-sql 3 | deps: 4 | - buf.build/streamingfast/substreams 5 | lint: 6 | use: 7 | - DEFAULT 8 | breaking: 9 | use: 10 | - FILE 11 | -------------------------------------------------------------------------------- /buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | managed: 3 | enabled: true 4 | plugins: 5 | - name: go 6 | out: pb 7 | opt: paths=source_relative 8 | 9 | - name: connect-go 10 | out: pb 11 | opt: paths=source_relative 12 | -------------------------------------------------------------------------------- /db_proto/sql/postgres/inserter.go: -------------------------------------------------------------------------------- 1 | package postgres 2 | 3 | type pgInserter interface { 4 | insert(table string, values []any, database *Database) error 5 | } 6 | 7 | type pgFlusher interface { 8 | flush(database *Database) error 9 | } 10 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/.gitignore: -------------------------------------------------------------------------------- 1 | # substreams auth file 2 | .substreams.env 3 | 4 | # Compiled source files 5 | target/ 6 | 7 | # Sink data when running any sinker 8 | sink-data/ 9 | 10 | # The spkg packed by the subtreams cli 11 | *.spkg -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/.gitignore: -------------------------------------------------------------------------------- 1 | # substreams auth file 2 | .substreams.env 3 | 4 | # Compiled source files 5 | target/ 6 | 7 | # Sink data when running any sinker 8 | sink-data/ 9 | 10 | # The spkg packed by the subtreams cli 11 | *.spkg -------------------------------------------------------------------------------- /devel/eth-block-meta/schema.sql: -------------------------------------------------------------------------------- 1 | create table block_meta 2 | ( 3 | id text not null constraint block_meta_pk primary key, 4 | at text, 5 | number integer, 6 | hash text, 7 | parent_hash text, 8 | timestamp text 9 | ); 10 | -------------------------------------------------------------------------------- /db_changes/bundler/writer/init_test.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import "github.com/streamingfast/logging" 4 | 5 | var zlog, _ = logging.PackageLogger("writer", "github.com/streamingfast/substreams-graph-load/bundler/writer_test") 6 | 7 | func init() { 8 | logging.InstantiateLoggers() 9 | } 10 | -------------------------------------------------------------------------------- /db_changes/db/init_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | _ "github.com/lib/pq" 5 | "github.com/streamingfast/logging" 6 | ) 7 | 8 | var zlog, tracer = logging.PackageLogger("sink-sql", "github.com/streamingfast/substreams-sink-sql/db") 9 | 10 | func init() { 11 | logging.InstantiateLoggers() 12 | } 13 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | plugins: 3 | - plugin: buf.build/community/neoeinstein-prost:v0.4.0 4 | out: src/pb 5 | opt: 6 | - file_descriptor_set=false 7 | 8 | - plugin: buf.build/community/neoeinstein-prost-crate:v0.4.1 9 | out: src/pb 10 | opt: 11 | - no_features 12 | -------------------------------------------------------------------------------- /db_proto/sql/context.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | type Context struct { 4 | blockNumber int 5 | } 6 | 7 | func NewContext() *Context { 8 | return &Context{} 9 | } 10 | 11 | func (c *Context) SetNumber(id int) { 12 | c.blockNumber = id 13 | } 14 | 15 | func (c *Context) BlockNumber() int { 16 | return c.blockNumber 17 | } 18 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | plugins: 3 | - plugin: buf.build/community/neoeinstein-prost:v0.4.0 4 | out: src/pb 5 | opt: 6 | - file_descriptor_set=false 7 | 8 | - plugin: buf.build/community/neoeinstein-prost-crate:v0.4.1 9 | out: src/pb 10 | opt: 11 | - no_features 12 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod pb; 2 | use substreams_ethereum::pb::eth::v2::Block; 3 | 4 | #[allow(unused_imports)] 5 | use num_traits::cast::ToPrimitive; 6 | substreams_ethereum::init!(); 7 | 8 | #[substreams::handlers::map] 9 | fn full_block(blk: Block) -> Result { 10 | Ok(blk) 11 | } 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .envrc 2 | .env.release 3 | .idea 4 | .DS_Store 5 | .release_notes.md 6 | dist/ 7 | devel/data* 8 | build/ 9 | *.spkg 10 | /substreams-sink-sql 11 | /cursor.txt 12 | /replay.log 13 | /*_schema_hash.txt 14 | /db_proto/test/substreams/order/src/pb/.last_generated_hash 15 | /db_proto/test/substreams/generator.json 16 | /main 17 | /cmd/scratch/scratch 18 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/sf.substreams.index.v1.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, PartialEq, ::prost::Message)] 5 | pub struct Keys { 6 | #[prost(string, repeated, tag="1")] 7 | pub keys: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, 8 | } 9 | // @@protoc_insertion_point(module) 10 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/sf.substreams.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 5 | pub struct FieldOptions { 6 | #[prost(bool, tag="1")] 7 | pub load_from_file: bool, 8 | #[prost(bool, tag="2")] 9 | pub zip_from_folder: bool, 10 | } 11 | // @@protoc_insertion_point(module) 12 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/logging.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/streamingfast/cli" 5 | "github.com/streamingfast/logging" 6 | "go.uber.org/zap" 7 | ) 8 | 9 | var zlog, tracer = logging.RootLogger("sink-sql", "github.com/streamingfast/substreams-sink-mongodb/cmd/substreams-sink-sql") 10 | 11 | func init() { 12 | cli.SetLogger(zlog, tracer) 13 | 14 | logging.InstantiateLoggers(logging.WithDefaultLevel(zap.InfoLevel)) 15 | } 16 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/src/pb/sf.substreams.solana.v1.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, PartialEq, ::prost::Message)] 5 | pub struct Transactions { 6 | #[prost(message, repeated, tag="1")] 7 | pub transactions: ::prost::alloc::vec::Vec, 8 | } 9 | // @@protoc_insertion_point(module) 10 | -------------------------------------------------------------------------------- /substreams.yaml: -------------------------------------------------------------------------------- 1 | specVersion: v0.1.0 2 | package: 3 | name: substreams_sink_sql_protodefs 4 | version: v1.0.7 5 | url: https://github.com/streamingfast/substreams-sink-sql 6 | doc: | 7 | Protobuf definitions for Substreams SQL Sink modules. 8 | 9 | This package does not include any modules, it contains **only** Protobuf definitions for the sink config. 10 | 11 | protobuf: 12 | files: 13 | - sf/substreams/sink/sql/v1/services.proto 14 | importPaths: 15 | - ./proto 16 | -------------------------------------------------------------------------------- /db_changes/bundler/writer/interface.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "context" 5 | "io" 6 | 7 | "github.com/streamingfast/dstore" 8 | 9 | "github.com/streamingfast/bstream" 10 | ) 11 | 12 | type Writer interface { 13 | io.Writer 14 | 15 | IsWritten() bool 16 | StartBoundary(*bstream.Range) error 17 | CloseBoundary(ctx context.Context) (Uploadeable, error) 18 | Type() FileType 19 | } 20 | 21 | type Uploadeable interface { 22 | Upload(ctx context.Context, store dstore.Store) (string, error) 23 | } 24 | -------------------------------------------------------------------------------- /db_proto/sql/constraint.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import "fmt" 4 | 5 | type ForeignKey struct { 6 | Name string 7 | Table string 8 | Field string 9 | ForeignTable string 10 | ForeignField string 11 | } 12 | 13 | type Constraint struct { 14 | Table string 15 | Sql string 16 | } 17 | 18 | func (f *ForeignKey) String() string { 19 | return fmt.Sprintf("ALTER TABLE %s ADD CONSTRAINT %s FOREIGN KEY (%s) REFERENCES %s(%s)", f.Table, f.Name, f.Field, f.ForeignTable, f.ForeignField) 20 | } 21 | -------------------------------------------------------------------------------- /devel/eth-block-meta/substreams.dev.yaml: -------------------------------------------------------------------------------- 1 | 2 | specVersion: v0.1.0 3 | package: 4 | name: "substreams_eth_block_meta" 5 | version: v0.0.2 6 | 7 | imports: 8 | sql: https://github.com/streamingfast/substreams-sink-sql/releases/download/protodefs-v1.0.1/substreams-sink-sql-protodefs-v1.0.1.spkg 9 | main: https://github.com/streamingfast/substreams-eth-block-meta/releases/download/v0.5.1/substreams-eth-block-meta-v0.5.1.spkg 10 | 11 | network: 'mainnet' 12 | 13 | sink: 14 | module: main:db_out 15 | type: sf.substreams.sink.sql.v1.Service 16 | config: 17 | schema: "./schema.sql" 18 | -------------------------------------------------------------------------------- /db_changes/db/metrics.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "github.com/streamingfast/dmetrics" 5 | ) 6 | 7 | var metrics = dmetrics.NewSet(dmetrics.PrefixNameWith("substreams_sink_sql")) 8 | 9 | var QueryExecutionDuration = metrics.NewCounterVec("tx_query_execution_duration", []string{"query_type"}, "The amount of time spent executing queries by type (normal/undo) in nanoseconds") 10 | var PruneReversibleSegmentDuration = metrics.NewCounter("prune_reversible_segment_duration", "The amount of time spent pruning reversible segment in nanoseconds") 11 | 12 | func RegisterMetrics() { 13 | metrics.Register() 14 | } 15 | -------------------------------------------------------------------------------- /db_proto/sql/utils.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "google.golang.org/protobuf/reflect/protoreflect" 8 | ) 9 | 10 | func fieldName(f protoreflect.FieldDescriptor) string { 11 | fieldNameSuffix := "" 12 | if f.Kind() == protoreflect.MessageKind { 13 | fieldNameSuffix = "_id" 14 | } 15 | 16 | return fmt.Sprintf("%s%s", strings.ToLower(string(f.Name())), fieldNameSuffix) 17 | } 18 | 19 | func fieldQuotedName(f protoreflect.FieldDescriptor) string { 20 | return Quoted(fieldName(f)) 21 | } 22 | 23 | func Quoted(value string) string { 24 | return fmt.Sprintf("\"%s\"", value) 25 | } 26 | -------------------------------------------------------------------------------- /proto/buf.lock: -------------------------------------------------------------------------------- 1 | # Generated by buf. DO NOT EDIT. 2 | version: v1 3 | deps: 4 | - remote: buf.build 5 | owner: streamingfast 6 | repository: firehose 7 | commit: 9d6efa32daca4ea69d36b6a0bd1f2a72 8 | digest: shake256:b0f2d94a4fed1cea285d685397f49e35a1fa91a36d05b4aee0bfd146cb1b28d1de47ec8b1bda11e6ed06e6c6cf85b2d20a70fcdd289161f2d01f8154e780ce40 9 | - remote: buf.build 10 | owner: streamingfast 11 | repository: substreams 12 | commit: 0dd88de28914480ba21402c342a5553d 13 | digest: shake256:3b088d9e3817dd7c403f586e85227b93503de2a2a3744d8ebdeded44cfb01d21168493a3faf11f7574df006b1e75bf8c8bf0909297c7dd58a59ad594a84bc4f4 14 | -------------------------------------------------------------------------------- /db_changes/state/interface.go: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/streamingfast/bstream" 7 | sink "github.com/streamingfast/substreams-sink" 8 | "github.com/streamingfast/substreams-sink-sql/db_changes/bundler/writer" 9 | ) 10 | 11 | type Store interface { 12 | Start(context.Context) 13 | Close() 14 | NewBoundary(*bstream.Range) 15 | ReadCursor(context.Context) (*sink.Cursor, error) 16 | SetCursor(*sink.Cursor) 17 | GetState() (Saveable, error) 18 | UploadCursor(state Saveable) 19 | Shutdown(error) 20 | OnTerminating(func(error)) 21 | } 22 | 23 | type Saveable interface { 24 | Save() error 25 | GetUploadeable() writer.Uploadeable 26 | } 27 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=$BUILDPLATFORM golang:1.24-bullseye AS build 2 | 3 | WORKDIR /src 4 | 5 | ARG TARGETOS TARGETARCH VERSION=dev 6 | 7 | RUN --mount=target=. \ 8 | --mount=type=cache,target=/root/.cache/go-build \ 9 | --mount=type=cache,target=/go/pkg \ 10 | GOOS=$TARGETOS GOARCH=$TARGETARCH go build -ldflags "-X \"main.version=$VERSION\"" -o /app/substreams-sink-sql ./cmd/substreams-sink-sql 11 | 12 | FROM ubuntu:22.04 13 | 14 | RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ 15 | apt-get -y install -y ca-certificates libssl3 16 | 17 | COPY --from=build /app/substreams-sink-sql /app/substreams-sink-sql 18 | 19 | ENTRYPOINT ["/app/substreams-sink-sql"] 20 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/README.md: -------------------------------------------------------------------------------- 1 | # full_block Substreams modules 2 | 3 | This package was initialized via `substreams init`, using the `evm-hello-world` template. 4 | 5 | ## Usage 6 | 7 | ```bash 8 | substreams build 9 | substreams auth 10 | substreams gui # Get streaming! 11 | ``` 12 | 13 | Optionally, you can publish your Substreams to the [Substreams Registry](https://substreams.dev). 14 | 15 | ```bash 16 | substreams registry login # Login to substreams.dev 17 | substreams registry publish # Publish your Substreams to substreams.dev 18 | ``` 19 | 20 | ## Modules 21 | 22 | ### `map_my_data` 23 | 24 | This module extracts event logs from blocks into 'MyData' protobuf object 25 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/README.md: -------------------------------------------------------------------------------- 1 | # order Substreams modules 2 | 3 | This package was initialized via `substreams init`, using the `sol-minimal` template. 4 | 5 | ## Usage 6 | 7 | ```bash 8 | substreams build 9 | substreams auth 10 | substreams gui # Get streaming! 11 | ``` 12 | 13 | Optionally, you can publish your Substreams to the [Substreams Registry](https://substreams.dev). 14 | 15 | ```bash 16 | substreams registry login # Login to substreams.dev 17 | substreams registry publish # Publish your Substreams to substreams.dev 18 | ``` 19 | 20 | ## Modules 21 | 22 | ### `map_my_data` 23 | 24 | This module will do a simple computation of the number of **transactions** 25 | and the number of **instructions** in each block. 26 | -------------------------------------------------------------------------------- /db_changes/bundler/writer/common.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/streamingfast/bstream" 7 | "go.uber.org/zap" 8 | ) 9 | 10 | type FileType string 11 | 12 | const ( 13 | FileTypeJSONL FileType = "jsonl" 14 | FileTypeCSV FileType = "csv" 15 | ) 16 | 17 | type baseWriter struct { 18 | fileType FileType 19 | zlogger *zap.Logger 20 | } 21 | 22 | func newBaseWriter(fileType FileType, zlogger *zap.Logger) baseWriter { 23 | return baseWriter{ 24 | fileType: fileType, 25 | zlogger: zlogger, 26 | } 27 | 28 | } 29 | 30 | func (b baseWriter) filename(blockRange *bstream.Range) string { 31 | return fmt.Sprintf("%010d-%010d", blockRange.StartBlock(), *blockRange.EndBlock()-1) 32 | } 33 | 34 | func (b baseWriter) Type() FileType { 35 | return b.fileType 36 | } 37 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "full_block" 3 | version = "0.0.1" 4 | edition = "2021" 5 | 6 | [lib] 7 | name = "substreams" 8 | crate-type = ["cdylib"] 9 | 10 | [dependencies] 11 | ethabi = "17" 12 | hex-literal = "0.3.4" 13 | num-bigint = "0.4" 14 | num-traits = "0.2.15" 15 | prost = "0.13.3" 16 | prost-types = "0.13.3" 17 | substreams = "0.6.0" 18 | substreams-ethereum = "0.10.2" 19 | 20 | # Required so that ethabi > ethereum-types build correctly under wasm32-unknown-unknown 21 | [target.wasm32-unknown-unknown.dependencies] 22 | getrandom = { version = "0.2", features = ["custom"] } 23 | 24 | [build-dependencies] 25 | anyhow = "1" 26 | substreams-ethereum = "0.10.0" 27 | regex = "1.8" 28 | 29 | [profile.release] 30 | lto = true 31 | opt-level = 's' 32 | strip = "debuginfo" 33 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "order" 3 | version = "0.0.1" 4 | edition = "2021" 5 | 6 | [lib] 7 | name = "substreams" 8 | crate-type = ["cdylib"] 9 | 10 | [dependencies] 11 | hex-literal = "0.3.4" 12 | num-bigint = "0.4" 13 | num-traits = "0.2.15" 14 | prost = "0.13.3" 15 | prost-types = "0.13.3" 16 | substreams = "0.6.0" 17 | substreams-solana = "0.14.1" 18 | substreams-solana-program-instructions = "0.2.0" 19 | log = "0.4.27" 20 | 21 | # Required so that ethabi > ethereum-types build correctly under wasm32-unknown-unknown 22 | [target.wasm32-unknown-unknown.dependencies] 23 | getrandom = { version = "0.2", features = ["custom"] } 24 | 25 | [build-dependencies] 26 | anyhow = "1" 27 | regex = "1.8" 28 | 29 | [profile.release] 30 | lto = true 31 | opt-level = 's' 32 | strip = "debuginfo" 33 | -------------------------------------------------------------------------------- /devel/clickhouse-server/config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | debug 4 | /var/log/clickhouse-server/clickhouse-server.log 5 | /var/log/clickhouse-server/clickhouse-server.err.log 6 | 1000M 7 | 3 8 | 9 | ch_postgres 10 | 0.0.0.0 11 | 8123 12 | 9000 13 | 9005 14 | 15 | 16 | users.xml 17 | 18 | 19 | /var/lib/clickhouse/access/ 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/substreams.yaml: -------------------------------------------------------------------------------- 1 | specVersion: v0.1.0 2 | package: 3 | name: full_block 4 | version: v0.1.0 5 | 6 | protobuf: 7 | files: 8 | - full-block.proto 9 | descriptorSets: 10 | - module: buf.build/streamingfast/substreams-sink-sql 11 | importPaths: 12 | - ./proto 13 | excludePaths: 14 | - sf/substreams/rpc 15 | - sf/substreams/v1 16 | - sf/substreams/index 17 | - sf/substreams/index/v1 18 | - google 19 | 20 | binaries: 21 | default: 22 | type: wasm/rust-v1 23 | file: ./target/wasm32-unknown-unknown/release/substreams.wasm 24 | 25 | modules: 26 | - name: full_block 27 | kind: map 28 | initialBlock: 20000000 29 | inputs: 30 | - source: sf.ethereum.type.v2.Block 31 | output : 32 | type: proto:sf.ethereum.type.v2.Block 33 | 34 | network: mainnet 35 | -------------------------------------------------------------------------------- /db_changes/db/user.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "go.uber.org/zap" 8 | ) 9 | 10 | func (l *Loader) CreateUser(ctx context.Context, username string, password string, database string, readOnly bool) (err error) { 11 | tx, err := l.BeginTx(ctx, nil) 12 | if err != nil { 13 | return fmt.Errorf("failed to being db transaction: %w", err) 14 | } 15 | defer func() { 16 | if err != nil { 17 | if err := tx.Rollback(); err != nil { 18 | l.logger.Warn("failed to rollback transaction", zap.Error(err)) 19 | } 20 | } 21 | }() 22 | 23 | err = l.dialect.CreateUser(tx, ctx, l, username, password, database, readOnly) 24 | if err != nil { 25 | return fmt.Errorf("create user: %w", err) 26 | } 27 | 28 | if err := tx.Commit(); err != nil { 29 | return fmt.Errorf("failed to commit db transaction: %w", err) 30 | } 31 | l.reset() 32 | 33 | return nil 34 | } 35 | -------------------------------------------------------------------------------- /db_changes/sinker/metrics.go: -------------------------------------------------------------------------------- 1 | package sinker 2 | 3 | import ( 4 | "github.com/streamingfast/dmetrics" 5 | "github.com/streamingfast/substreams-sink-sql/db_changes/db" 6 | ) 7 | 8 | func RegisterMetrics() { 9 | metrics.Register() 10 | db.RegisterMetrics() 11 | } 12 | 13 | var metrics = dmetrics.NewSet() 14 | 15 | var FlushCount = metrics.NewCounter("substreams_sink_postgres_store_flush_count", "The amount of flush that happened so far") 16 | var FlushedRowsCount = metrics.NewCounter("substreams_sink_postgres_flushed_rows_count", "The number of flushed rows so far") 17 | var FlushDuration = metrics.NewCounter("substreams_sink_postgres_store_flush_duration", "The amount of time spent flushing cache to db (in nanoseconds)") 18 | var FlushedHeadBlockNumber = metrics.NewHeadBlockNumber("substreams_sink_postgres") 19 | var FlushedHeadBlockTimeDrift = metrics.NewHeadTimeDrift("substreams_sink_postgres") 20 | -------------------------------------------------------------------------------- /services/runner.go: -------------------------------------------------------------------------------- 1 | package services 2 | 3 | import ( 4 | "time" 5 | 6 | pbsql "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/services/v1" 7 | "go.uber.org/zap" 8 | ) 9 | 10 | func Run(service *pbsql.Service, logger *zap.Logger) error { 11 | if service.HasuraFrontend != nil { 12 | panic("Hasura front end not supported yet") 13 | } 14 | if service.PostgraphileFrontend != nil { 15 | panic("Postgraphile front end not supported yet") 16 | } 17 | if service.RestFrontend != nil { 18 | panic("Rest front end not supported yet") 19 | } 20 | 21 | if service.DbtConfig != nil && service.DbtConfig.Enabled { 22 | go func() { 23 | for { 24 | err := runDBT(service.DbtConfig, logger) 25 | if err != nil { 26 | logger.Error("running dbt", zap.Error(err)) 27 | time.Sleep(30 * time.Second) 28 | } 29 | } 30 | }() 31 | } 32 | 33 | return nil 34 | } 35 | -------------------------------------------------------------------------------- /devel/substreams-sink-sql: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )" 4 | 5 | active_pid= 6 | 7 | main() { 8 | set -e 9 | 10 | version="unknown" 11 | if [[ -f .version ]]; then 12 | version=`cat .version` 13 | fi 14 | 15 | commit=`git rev-list -1 HEAD` 16 | dirty= 17 | if [[ ! -z "$(git status --untracked-files=no --porcelain)" ]]; then 18 | dirty="dirty" 19 | fi 20 | 21 | pushd "$ROOT" &> /dev/null 22 | go install -ldflags "-X main.Version=$version" ./cmd/substreams-sink-sql 23 | popd &> /dev/null 24 | 25 | if [[ $KILL_AFTER != "" ]]; then 26 | ${GOPATH:-$HOME/go}/bin/substreams-sink-sql "$@" & 27 | active_pid=$! 28 | 29 | sleep $KILL_AFTER 30 | kill -s TERM $active_pid &> /dev/null || true 31 | else 32 | exec ${GOPATH:-$HOME/go}/bin/substreams-sink-sql "$@" 33 | fi 34 | } 35 | 36 | main "$@" 37 | 38 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/src/pb/sf.substreams.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 5 | pub struct FieldOptions { 6 | /// this option informs the `substreams pack` command that it should treat the corresponding manifest value as a path to a file, putting its content as bytes in this field. 7 | /// must be applied to a `bytes` or `string` field 8 | #[prost(bool, tag="1")] 9 | pub load_from_file: bool, 10 | /// this option informs the `substreams pack` command that it should treat the corresponding manifest value as a path to a folder, zipping its content and putting the zip content as bytes in this field. 11 | /// must be applied to a `bytes` field 12 | #[prost(bool, tag="2")] 13 | pub zip_from_folder: bool, 14 | } 15 | // @@protoc_insertion_point(module) 16 | -------------------------------------------------------------------------------- /db_changes/bundler/writer/types.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "io" 7 | 8 | "github.com/streamingfast/dstore" 9 | ) 10 | 11 | type dataFile struct { 12 | reader io.Reader 13 | outputFilename string 14 | } 15 | 16 | func (d *dataFile) Upload(ctx context.Context, store dstore.Store) (string, error) { 17 | if err := store.WriteObject(ctx, d.outputFilename, d.reader); err != nil { 18 | return "", fmt.Errorf("write object: %w", err) 19 | } 20 | return store.ObjectPath(d.outputFilename), nil 21 | } 22 | 23 | type localFile struct { 24 | localFilePath string 25 | outputFilename string 26 | } 27 | 28 | func (l *localFile) Upload(ctx context.Context, store dstore.Store) (string, error) { 29 | if err := store.PushLocalFile(ctx, l.localFilePath, l.outputFilename); err != nil { 30 | return "", fmt.Errorf("pushing object: %w", err) 31 | } 32 | return store.ObjectPath(l.outputFilename), nil 33 | } 34 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build and Test 2 | 3 | on: 4 | push: 5 | branches: [ "develop", "main" ] 6 | pull_request: 7 | branches: [ "develop", "main" ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v4 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v5 18 | with: 19 | go-version-file: 'go.mod' 20 | 21 | - name: Cache Go modules 22 | uses: actions/cache@v4 23 | with: 24 | path: | 25 | ~/.cache/go-build 26 | ~/go/pkg/mod 27 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 28 | restore-keys: | 29 | ${{ runner.os }}-go- 30 | 31 | - name: Download dependencies 32 | run: go mod download 33 | 34 | - name: Verify dependencies 35 | run: go mod verify 36 | 37 | - name: Build 38 | run: go build -v ./... 39 | 40 | - name: Run tests 41 | run: go test -v $(go list ./... | grep -v /tests/integration) 42 | -------------------------------------------------------------------------------- /proto/utils.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import ( 4 | "fmt" 5 | 6 | schema "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/schema/v1" 7 | "google.golang.org/protobuf/proto" 8 | "google.golang.org/protobuf/reflect/protoreflect" 9 | ) 10 | 11 | func TableInfo(d protoreflect.MessageDescriptor) *schema.Table { 12 | msgOptions := d.Options() 13 | 14 | if proto.HasExtension(msgOptions, schema.E_Table) { 15 | ext := proto.GetExtension(msgOptions, schema.E_Table) 16 | table, ok := ext.(*schema.Table) 17 | if ok { 18 | if table.Name == "" { 19 | panic(fmt.Sprintf("table name is required for message %q", string(d.Name()))) 20 | } 21 | return table 22 | } 23 | } 24 | return nil 25 | } 26 | 27 | func FieldInfo(d protoreflect.FieldDescriptor) *schema.Column { 28 | options := d.Options() 29 | 30 | if proto.HasExtension(options, schema.E_Field) { 31 | ext := proto.GetExtension(options, schema.E_Field) 32 | f, ok := ext.(*schema.Column) 33 | if ok { 34 | return f 35 | } 36 | } 37 | return nil 38 | } 39 | -------------------------------------------------------------------------------- /devel/eth-block-meta/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | 5 | main() { 6 | cd "$ROOT" &> /dev/null 7 | 8 | while getopts "hbc" opt; do 9 | case $opt in 10 | h) usage && exit 0;; 11 | b) bootstrap=true;; 12 | c) clean=true;; 13 | \?) usage_error "Invalid option: -$OPTARG";; 14 | esac 15 | done 16 | shift $((OPTIND-1)) 17 | 18 | set -e 19 | 20 | sink="../substreams-sink-sql" 21 | pg_password=${PGPASSWORD:-"insecure-change-me-in-prod"} 22 | pg_dsn="psql://dev-node:${pg_password}@127.0.0.1:5432/dev-node?sslmode=disable" 23 | 24 | if [[ "$clean" == "true" ]]; then 25 | echo "Cleaning up existing tables" 26 | PGPASSWORD="${pg_password}" psql -h localhost -U dev-node -d dev-node -c '\i clean.sql' 27 | fi 28 | 29 | if [[ "$clean" == "true" || "$bootstrap" == "true" ]]; then 30 | echo "Creating tables" 31 | $sink setup "$pg_dsn" ./substreams.dev.yaml 32 | fi 33 | 34 | $sink run \ 35 | "$pg_dsn" \ 36 | ./substreams.dev.yaml \ 37 | "$@" 38 | } 39 | 40 | main "$@" 41 | 42 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/src/pb/sf.substreams.sink.sql.schema.v1.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, PartialEq, ::prost::Message)] 5 | pub struct Table { 6 | #[prost(string, tag="1")] 7 | pub name: ::prost::alloc::string::String, 8 | #[prost(string, optional, tag="2")] 9 | pub child_of: ::core::option::Option<::prost::alloc::string::String>, 10 | #[prost(string, tag="81")] 11 | pub many_to_one_relation_field_name: ::prost::alloc::string::String, 12 | } 13 | #[allow(clippy::derive_partial_eq_without_eq)] 14 | #[derive(Clone, PartialEq, ::prost::Message)] 15 | pub struct Column { 16 | #[prost(string, optional, tag="1")] 17 | pub name: ::core::option::Option<::prost::alloc::string::String>, 18 | #[prost(string, optional, tag="2")] 19 | pub foreign_key: ::core::option::Option<::prost::alloc::string::String>, 20 | #[prost(bool, tag="3")] 21 | pub unique: bool, 22 | #[prost(bool, tag="4")] 23 | pub primary_key: bool, 24 | } 25 | // @@protoc_insertion_point(module) 26 | -------------------------------------------------------------------------------- /db_changes/bundler/encoder.go: -------------------------------------------------------------------------------- 1 | package bundler 2 | 3 | import ( 4 | "bytes" 5 | "encoding/csv" 6 | "encoding/json" 7 | "fmt" 8 | "sort" 9 | 10 | "github.com/golang/protobuf/proto" 11 | ) 12 | 13 | type Encoder func(proto.Message) ([]byte, error) 14 | 15 | func JSONLEncode(message proto.Message) ([]byte, error) { 16 | buf := []byte{} 17 | data, err := json.Marshal(message) 18 | if err != nil { 19 | return nil, fmt.Errorf("json marshal: %w", err) 20 | } 21 | buf = append(buf, data...) 22 | buf = append(buf, byte('\n')) 23 | return buf, nil 24 | } 25 | 26 | func CSVEncode(message map[string]string) ([]byte, error) { 27 | keys := make([]string, 0, len(message)) 28 | for k := range message { 29 | keys = append(keys, k) 30 | } 31 | sort.Strings(keys) 32 | 33 | row := make([]string, 0, len(keys)) 34 | for _, key := range keys { 35 | row = append(row, message[key]) 36 | } 37 | 38 | var buf bytes.Buffer 39 | writer := csv.NewWriter(&buf) 40 | if err := writer.Write(row); err != nil { 41 | return nil, err 42 | } 43 | writer.Flush() 44 | if err := writer.Error(); err != nil { 45 | return nil, err 46 | } 47 | 48 | return buf.Bytes(), nil 49 | } 50 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/src/pb/mod.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // @@protoc_insertion_point(attribute:schema) 3 | pub mod schema { 4 | include!("schema.rs"); 5 | // @@protoc_insertion_point(schema) 6 | } 7 | pub mod sf { 8 | pub mod solana { 9 | pub mod r#type { 10 | // @@protoc_insertion_point(attribute:sf.solana.type.v1) 11 | pub mod v1 { 12 | include!("sf.solana.type.v1.rs"); 13 | // @@protoc_insertion_point(sf.solana.type.v1) 14 | } 15 | } 16 | } 17 | // @@protoc_insertion_point(attribute:sf.substreams) 18 | pub mod substreams { 19 | include!("sf.substreams.rs"); 20 | // @@protoc_insertion_point(sf.substreams) 21 | pub mod solana { 22 | // @@protoc_insertion_point(attribute:sf.substreams.solana.v1) 23 | pub mod v1 { 24 | include!("sf.substreams.solana.v1.rs"); 25 | // @@protoc_insertion_point(sf.substreams.solana.v1) 26 | } 27 | } 28 | } 29 | } 30 | pub mod test { 31 | // @@protoc_insertion_point(attribute:test.relations) 32 | pub mod relations { 33 | include!("test.relations.rs"); 34 | // @@protoc_insertion_point(test.relations) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /db_changes/db/ops_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestGetPrimaryKey(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | in []*ColumnInfo 14 | expectOut map[string]string 15 | expectError bool 16 | }{ 17 | { 18 | name: "no primkey error", 19 | expectError: true, 20 | }, 21 | { 22 | name: "more than one primkey error", 23 | in: []*ColumnInfo{ 24 | { 25 | name: "one", 26 | }, 27 | { 28 | name: "two", 29 | }, 30 | }, 31 | expectError: true, 32 | }, 33 | { 34 | name: "single than primkey ok", 35 | in: []*ColumnInfo{ 36 | { 37 | name: "id", 38 | }, 39 | }, 40 | expectOut: map[string]string{ 41 | "id": "testval", 42 | }, 43 | }, 44 | } 45 | for _, test := range tests { 46 | t.Run(test.name, func(t *testing.T) { 47 | l := &Loader{ 48 | tables: map[string]*TableInfo{ 49 | "test": { 50 | primaryColumns: test.in, 51 | }, 52 | }, 53 | } 54 | out, err := l.GetPrimaryKey("test", "testval") 55 | if test.expectError { 56 | assert.Error(t, err) 57 | } else { 58 | require.NoError(t, err) 59 | assert.Equal(t, test.expectOut, out) 60 | } 61 | 62 | }) 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /devel/up.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )" 6 | 7 | clean= 8 | 9 | main() { 10 | pushd "$ROOT" &> /dev/null 11 | 12 | while getopts "hc" opt; do 13 | case $opt in 14 | h) usage && exit 0;; 15 | c) clean=true;; 16 | \?) usage_error "Invalid option: -$OPTARG";; 17 | esac 18 | done 19 | shift $((OPTIND-1)) 20 | 21 | if [[ -d "./devel/data" && $clean == true ]]; then 22 | echo "Cleaning data directory" 23 | rm -rf ./devel/data 1> /dev/null 24 | fi 25 | 26 | prepare 27 | 28 | # Pass execution to docker compose 29 | exec docker compose up 30 | } 31 | 32 | prepare() { 33 | if [[ ! -d "./devel/data/postgres" ]]; then 34 | mkdir -p ./devel/data/postgres 1> /dev/null 35 | fi 36 | } 37 | 38 | usage_error() { 39 | message="$1" 40 | exit_code="$2" 41 | 42 | echo "ERROR: $message" 43 | echo "" 44 | usage 45 | exit ${exit_code:-1} 46 | } 47 | 48 | usage() { 49 | echo "usage: up [-c]" 50 | echo "" 51 | echo "Setup required files layout and launch 'docker compose up'" 52 | echo "spinning up all required development dependencies." 53 | echo "" 54 | echo "Options" 55 | echo " -c Clean 'data' directory before launching dependencies" 56 | echo " -h Display help about this script" 57 | } 58 | 59 | main "$@" 60 | 61 | 62 | -------------------------------------------------------------------------------- /devel/tutorial/start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | 5 | main() { 6 | cd "$ROOT" &> /dev/null 7 | 8 | while getopts "hbc" opt; do 9 | case $opt in 10 | h) usage && exit 0;; 11 | b) bootstrap=true;; 12 | c) clean=true;; 13 | \?) usage_error "Invalid option: -$OPTARG";; 14 | esac 15 | done 16 | shift $((OPTIND-1)) 17 | 18 | set -e 19 | 20 | sink="../substreams-sink-sql" 21 | pg_password=${PGPASSWORD:-"insecure-change-me-in-prod"} 22 | pg_dsn="psql://dev-node:${pg_password}@127.0.0.1:5432/substreams_example?sslmode=disable" 23 | 24 | # Use the published substreams-template package for Database Changes example 25 | substreams_package="substreams-template@v0.3.1" 26 | 27 | if [[ "$clean" == "true" ]]; then 28 | echo "Cleaning up existing tables" 29 | PGPASSWORD=${pg_password} psql -h localhost -U dev-node -d dev-node -c 'drop database substreams_example;' 30 | fi 31 | 32 | if [[ "$clean" == "true" || "$bootstrap" == "true" ]]; then 33 | echo "Creating tables" 34 | set -e 35 | PGPASSWORD=${pg_password} psql -h localhost -U dev-node -d dev-node -c 'create database substreams_example;' 36 | set +e 37 | $sink setup "$pg_dsn" "$substreams_package" 38 | fi 39 | 40 | $sink run \ 41 | "$pg_dsn" \ 42 | "$substreams_package" \ 43 | "$@" 44 | } 45 | 46 | main "$@" 47 | -------------------------------------------------------------------------------- /proto/sf/substreams/sink/sql/v1/deprecated.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package sf.substreams.sink.sql.v1; 4 | 5 | option go_package = "github.com/streamingfast/substreams-sink-sql/pb;pbsql"; 6 | 7 | import "sf/substreams/options.proto"; 8 | 9 | message Service { 10 | // Containing both create table statements and index creation statements. 11 | string schema = 1 [ (sf.substreams.options).load_from_file = true ]; 12 | optional DBTConfig dbt_config = 2; 13 | HasuraFrontend hasura_frontend = 4; 14 | PostgraphileFrontend postgraphile_frontend = 5; 15 | 16 | enum Engine { 17 | unset = 0; 18 | postgres = 1; 19 | clickhouse = 2; 20 | } 21 | 22 | Engine engine = 7; 23 | 24 | RESTFrontend rest_frontend = 8; 25 | 26 | } 27 | 28 | // https://www.getdbt.com/product/what-is-dbt 29 | message DBTConfig { 30 | bytes files = 1 [ (sf.substreams.options).zip_from_folder = true ]; 31 | int32 run_interval_seconds = 2; 32 | bool enabled = 3; 33 | } 34 | 35 | // https://hasura.io/docs/latest/index/ 36 | message HasuraFrontend { 37 | bool enabled = 1; 38 | } 39 | 40 | // https://www.graphile.org/postgraphile/ 41 | message PostgraphileFrontend { 42 | bool enabled = 1; 43 | } 44 | 45 | // https://github.com/sosedoff/pgweb 46 | message PGWebFrontend { 47 | bool enabled = 1; 48 | } 49 | 50 | // https://github.com/semiotic-ai/sql-wrapper 51 | message RESTFrontend { 52 | bool enabled = 1; 53 | } 54 | -------------------------------------------------------------------------------- /proto/sf/substreams/sink/sql/services/v1/services.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package sf.substreams.sink.sql.service.v1; 4 | 5 | option go_package = "github.com/streamingfast/substreams-sink-sql/pb;pbsql"; 6 | 7 | import "sf/substreams/options.proto"; 8 | 9 | message Service { 10 | // Containing both create table statements and index creation statements. 11 | string schema = 1 [ (sf.substreams.options).load_from_file = true ]; 12 | optional DBTConfig dbt_config = 2; 13 | HasuraFrontend hasura_frontend = 4; 14 | PostgraphileFrontend postgraphile_frontend = 5; 15 | 16 | enum Engine { 17 | unset = 0; 18 | postgres = 1; 19 | clickhouse = 2; 20 | } 21 | 22 | Engine engine = 7; 23 | 24 | RESTFrontend rest_frontend = 8; 25 | 26 | } 27 | 28 | // https://www.getdbt.com/product/what-is-dbt 29 | message DBTConfig { 30 | bytes files = 1 [ (sf.substreams.options).zip_from_folder = true ]; 31 | int32 run_interval_seconds = 2; 32 | bool enabled = 3; 33 | } 34 | 35 | // https://hasura.io/docs/latest/index/ 36 | message HasuraFrontend { 37 | bool enabled = 1; 38 | } 39 | 40 | // https://www.graphile.org/postgraphile/ 41 | message PostgraphileFrontend { 42 | bool enabled = 1; 43 | } 44 | 45 | // https://github.com/sosedoff/pgweb 46 | message PGWebFrontend { 47 | bool enabled = 1; 48 | } 49 | 50 | // https://github.com/semiotic-ai/sql-wrapper 51 | message RESTFrontend { 52 | bool enabled = 1; 53 | } 54 | -------------------------------------------------------------------------------- /devel/clickhouse-server/users.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10000000000 6 | 0 7 | in_order 8 | 1 9 | 10 | 11 | 12 | 13 | 1 14 | default 15 | default 16 | 17 | ::/0 18 | 19 | default 20 | 1 21 | 1 22 | 1 23 | 1 24 | 25 | 26 | 27 | 28 | 29 | 3600 30 | 0 31 | 0 32 | 0 33 | 0 34 | 0 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /shared.go: -------------------------------------------------------------------------------- 1 | package sinksql 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | pbsql "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/services/v1" 8 | pbsubstreams "github.com/streamingfast/substreams/pb/sf/substreams/v1" 9 | "google.golang.org/protobuf/proto" 10 | ) 11 | 12 | var ( 13 | supportedDeployableUnits []string 14 | deprecated_supportedDeployableService = "sf.substreams.sink.sql.v1.Service" 15 | supportedDeployableService = "sf.substreams.sink.sql.service.v1.Service" 16 | ) 17 | 18 | func init() { 19 | supportedDeployableUnits = []string{ 20 | deprecated_supportedDeployableService, 21 | } 22 | } 23 | 24 | const typeUrlPrefix = "type.googleapis.com/" 25 | 26 | func ExtractSinkService(pkg *pbsubstreams.Package) (*pbsql.Service, error) { 27 | if pkg.SinkConfig == nil { 28 | return nil, fmt.Errorf("no sink config found in spkg") 29 | } 30 | 31 | configPackageID := strings.TrimPrefix(pkg.SinkConfig.TypeUrl, typeUrlPrefix) 32 | 33 | switch configPackageID { 34 | case deprecated_supportedDeployableService, supportedDeployableService: 35 | service := &pbsql.Service{} 36 | 37 | if err := proto.Unmarshal(pkg.SinkConfig.Value, service); err != nil { 38 | return nil, fmt.Errorf("failed to proto unmarshal: %w", err) 39 | } 40 | return service, nil 41 | } 42 | 43 | return nil, fmt.Errorf("invalid config type %q, supported configs are %q", pkg.SinkConfig.TypeUrl, strings.Join(supportedDeployableUnits, ", ")) 44 | } 45 | -------------------------------------------------------------------------------- /db_changes/db/dialect.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | 8 | sink "github.com/streamingfast/substreams-sink" 9 | ) 10 | 11 | type UnknownDriverError struct { 12 | Driver string 13 | } 14 | 15 | // Error returns a formatted string description. 16 | func (e UnknownDriverError) Error() string { 17 | return fmt.Sprintf("unknown database driver: %s", e.Driver) 18 | } 19 | 20 | type Dialect interface { 21 | GetCreateCursorQuery(schema string, withPostgraphile bool) string 22 | GetCreateHistoryQuery(schema string, withPostgraphile bool) string 23 | ExecuteSetupScript(ctx context.Context, l *Loader, schemaSql string) error 24 | DriverSupportRowsAffected() bool 25 | GetUpdateCursorQuery(table, moduleHash string, cursor *sink.Cursor, block_num uint64, block_id string) string 26 | GetAllCursorsQuery(table string) string 27 | ParseDatetimeNormalization(value string) string 28 | Flush(tx Tx, ctx context.Context, l *Loader, outputModuleHash string, lastFinalBlock uint64) (int, error) 29 | Revert(tx Tx, ctx context.Context, l *Loader, lastValidFinalBlock uint64) error 30 | OnlyInserts() bool 31 | AllowPkDuplicates() bool 32 | CreateUser(tx Tx, ctx context.Context, l *Loader, username string, password string, database string, readOnly bool) error 33 | GetTableColumns(db *sql.DB, schemaName, tableName string) ([]*sql.ColumnType, error) 34 | GetPrimaryKey(db *sql.DB, schemaName, tableName string) ([]string, error) 35 | GetTablesInSchema(db *sql.DB, schemaName string) ([][2]string, error) 36 | } 37 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/substreams.yaml: -------------------------------------------------------------------------------- 1 | specVersion: v0.1.0 2 | package: 3 | name: order 4 | version: v0.1.0 5 | 6 | imports: 7 | solana: https://spkg.io/streamingfast/solana-common-v0.3.0.spkg 8 | 9 | protobuf: 10 | files: 11 | - test/relations/relations.proto 12 | descriptorSets: 13 | - module: buf.build/streamingfast/substreams-sink-sql 14 | importPaths: 15 | - ./proto 16 | excludePaths: 17 | - sf/substreams/rpc 18 | - sf/substreams/v1 19 | - sf/substreams/sink 20 | - sf/substreams/index 21 | - sf/substreams/index/v1 22 | - instructions.proto # sol.instructions.v1 from the v0.2.0 spkg 23 | - transactions.proto # sol.transactions.v1 from the v0.2.0 spkg 24 | - google 25 | 26 | binaries: 27 | default: 28 | type: wasm/rust-v1 29 | file: ./target/wasm32-unknown-unknown/release/substreams.wasm 30 | 31 | modules: 32 | - name: map_output 33 | kind: map 34 | inputs: 35 | - map: solana:blocks_without_votes 36 | output: 37 | type: proto:test.relations.Output 38 | # For performance, you should use a blockFilter whenever possible, like this: 39 | # 40 | # blockFilter: 41 | # module: solana:program_ids_without_votes 42 | # query: 43 | # string: program:4vMsoUT2BWatFweudnQM1xedRLfJgJ7hswhcpz4xgBTy 44 | # 45 | # see https://substreams.dev/streamingfast/solana-common/latest for details 46 | 47 | network: solana-mainnet-beta 48 | 49 | sink: 50 | module: map_output 51 | type: sf.substreams.sink.sql.v1.Service 52 | # config: 53 | # dbt_config: 54 | # files: ./dbt 55 | # run_interval_seconds: 300 56 | # enabled: true 57 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | postgres: 3 | container_name: postgres-ssp2 4 | image: postgres:17 5 | ports: 6 | - "5432:5432" 7 | command: ["postgres", "-cshared_preload_libraries=pg_stat_statements"] 8 | #command: ["postgres", "-cshared_preload_libraries=pg_stat_statements", "-clog_statement=all"] 9 | environment: 10 | POSTGRES_USER: dev-node 11 | POSTGRES_PASSWORD: insecure-change-me-in-prod 12 | POSTGRES_DB: dev-node 13 | POSTGRES_INITDB_ARGS: "-E UTF8 --locale=C" 14 | POSTGRES_HOST_AUTH_METHOD: md5 15 | volumes: 16 | - ./devel/data/postgres:/var/lib/postgresql/data 17 | healthcheck: 18 | test: ["CMD", "nc", "-z", "localhost", "5432"] 19 | interval: 30s 20 | timeout: 10s 21 | retries: 15 22 | pgweb: 23 | container_name: pgweb-ssp2 24 | image: sosedoff/pgweb:0.16.1 25 | restart: on-failure 26 | ports: 27 | - "8081:8081" 28 | command: ["pgweb", "--bind=0.0.0.0", "--listen=8081", "--binary-codec=hex"] 29 | links: 30 | - postgres:postgres 31 | environment: 32 | - PGWEB_DATABASE_URL=postgres://dev-node:insecure-change-me-in-prod@postgres:5432/dev-node?sslmode=disable 33 | depends_on: 34 | - postgres 35 | database: 36 | container_name: clickhouse-ssp2 37 | image: clickhouse/clickhouse-server:23.9 38 | user: "101:101" 39 | hostname: clickhouse 40 | volumes: 41 | - ${PWD}/devel/clickhouse-server/config.xml:/etc/clickhouse-server/config.d/config.xml 42 | - ${PWD}/devel/clickhouse-server/users.xml:/etc/clickhouse-server/users.d/users.xml 43 | ports: 44 | - "8123:8123" 45 | - "9000:9000" 46 | - "9005:9005" 47 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/mod.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // @@protoc_insertion_point(attribute:schema) 3 | pub mod schema { 4 | include!("schema.rs"); 5 | // @@protoc_insertion_point(schema) 6 | } 7 | pub mod sf { 8 | pub mod ethereum { 9 | pub mod r#type { 10 | // @@protoc_insertion_point(attribute:sf.ethereum.type.v2) 11 | pub mod v2 { 12 | include!("sf.ethereum.type.v2.rs"); 13 | // @@protoc_insertion_point(sf.ethereum.type.v2) 14 | } 15 | } 16 | } 17 | // @@protoc_insertion_point(attribute:sf.substreams) 18 | pub mod substreams { 19 | include!("sf.substreams.rs"); 20 | // @@protoc_insertion_point(sf.substreams) 21 | pub mod sink { 22 | pub mod service { 23 | // @@protoc_insertion_point(attribute:sf.substreams.sink.service.v1) 24 | pub mod v1 { 25 | include!("sf.substreams.sink.service.v1.rs"); 26 | // @@protoc_insertion_point(sf.substreams.sink.service.v1) 27 | } 28 | } 29 | pub mod sql { 30 | pub mod service { 31 | // @@protoc_insertion_point(attribute:sf.substreams.sink.sql.service.v1) 32 | pub mod v1 { 33 | include!("sf.substreams.sink.sql.service.v1.rs"); 34 | // @@protoc_insertion_point(sf.substreams.sink.sql.service.v1) 35 | } 36 | } 37 | // @@protoc_insertion_point(attribute:sf.substreams.sink.sql.v1) 38 | pub mod v1 { 39 | include!("sf.substreams.sink.sql.v1.rs"); 40 | // @@protoc_insertion_point(sf.substreams.sink.sql.v1) 41 | } 42 | } 43 | } 44 | // @@protoc_insertion_point(attribute:sf.substreams.v1) 45 | pub mod v1 { 46 | include!("sf.substreams.v1.rs"); 47 | // @@protoc_insertion_point(sf.substreams.v1) 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /db_changes/db/types.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | "reflect" 6 | ) 7 | 8 | //go:generate go-enum -f=$GOFILE --marshal --names -nocase 9 | 10 | // ENUM( 11 | // 12 | // Ignore 13 | // Warn 14 | // Error 15 | // 16 | // ) 17 | type OnModuleHashMismatch uint 18 | 19 | type TableInfo struct { 20 | schema string 21 | schemaEscaped string 22 | name string 23 | nameEscaped string 24 | columnsByName map[string]*ColumnInfo 25 | primaryColumns []*ColumnInfo 26 | 27 | // Identifier is equivalent to 'escape().escape()' but pre-computed 28 | // for usage when computing queries. 29 | identifier string 30 | } 31 | 32 | func NewTableInfo(schema, name string, pkList []string, columnsByName map[string]*ColumnInfo) (*TableInfo, error) { 33 | schemaEscaped := EscapeIdentifier(schema) 34 | nameEscaped := EscapeIdentifier(name) 35 | primaryColumns := make([]*ColumnInfo, len(pkList)) 36 | 37 | for i, primaryKeyColumnName := range pkList { 38 | primaryColumn, found := columnsByName[primaryKeyColumnName] 39 | if !found { 40 | return nil, fmt.Errorf("primary key column %q not found", primaryKeyColumnName) 41 | } 42 | primaryColumns[i] = primaryColumn 43 | 44 | } 45 | if len(primaryColumns) == 0 { 46 | return nil, fmt.Errorf("sql sink requires a primary key in every table, none was found in table %s.%s", schema, name) 47 | } 48 | 49 | return &TableInfo{ 50 | schema: schema, 51 | schemaEscaped: schemaEscaped, 52 | name: name, 53 | nameEscaped: nameEscaped, 54 | identifier: schemaEscaped + "." + nameEscaped, 55 | primaryColumns: primaryColumns, 56 | columnsByName: columnsByName, 57 | }, nil 58 | } 59 | 60 | type ColumnInfo struct { 61 | name string 62 | escapedName string 63 | databaseTypeName string 64 | scanType reflect.Type 65 | } 66 | 67 | func NewColumnInfo(name string, databaseTypeName string, scanType any) *ColumnInfo { 68 | return &ColumnInfo{ 69 | name: name, 70 | escapedName: EscapeIdentifier(name), 71 | databaseTypeName: databaseTypeName, 72 | scanType: reflect.TypeOf(scanType), 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /db_changes/sinker/factory.go: -------------------------------------------------------------------------------- 1 | package sinker 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/streamingfast/logging" 10 | sink "github.com/streamingfast/substreams-sink" 11 | "github.com/streamingfast/substreams-sink-sql/db_changes/db" 12 | "go.uber.org/zap" 13 | ) 14 | 15 | type SinkerFactoryFunc func(ctx context.Context, dsnString string, logger *zap.Logger, tracer logging.Tracer) (*SQLSinker, error) 16 | 17 | type SinkerFactoryOptions struct { 18 | CursorTableName string 19 | HistoryTableName string 20 | ClickhouseCluster string 21 | BatchBlockFlushInterval int 22 | BatchRowFlushInterval int 23 | LiveBlockFlushInterval int 24 | OnModuleHashMismatch string 25 | HandleReorgs bool 26 | FlushRetryCount int 27 | FlushRetryDelay time.Duration 28 | } 29 | 30 | func SinkerFactory( 31 | baseSink *sink.Sinker, 32 | options SinkerFactoryOptions, 33 | ) SinkerFactoryFunc { 34 | return func(ctx context.Context, dsnString string, logger *zap.Logger, tracer logging.Tracer) (*SQLSinker, error) { 35 | dsn, err := db.ParseDSN(dsnString) 36 | if err != nil { 37 | return nil, fmt.Errorf("parsing dsn: %w", err) 38 | } 39 | 40 | dbLoader, err := db.NewLoader( 41 | dsn, 42 | options.CursorTableName, 43 | options.HistoryTableName, 44 | options.ClickhouseCluster, 45 | options.BatchBlockFlushInterval, 46 | options.BatchRowFlushInterval, 47 | options.LiveBlockFlushInterval, 48 | options.OnModuleHashMismatch, 49 | &options.HandleReorgs, 50 | logger, 51 | tracer, 52 | ) 53 | if err != nil { 54 | return nil, fmt.Errorf("creating loader: %w", err) 55 | } 56 | 57 | if err := dbLoader.LoadTables(dsn.Schema(), options.CursorTableName, options.HistoryTableName); err != nil { 58 | var e *db.SystemTableError 59 | if errors.As(err, &e) { 60 | return nil, fmt.Errorf("error validating the system table: %w. Did you run setup?", e) 61 | } 62 | return nil, fmt.Errorf("load tables: %w", err) 63 | } 64 | 65 | sinker, err := New(baseSink, dbLoader, logger, tracer, options.FlushRetryCount, options.FlushRetryDelay) 66 | if err != nil { 67 | return nil, fmt.Errorf("unable to setup SQL sinker: %w", err) 68 | } 69 | 70 | return sinker, nil 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/setup.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/spf13/cobra" 7 | "github.com/spf13/pflag" 8 | . "github.com/streamingfast/cli" 9 | "github.com/streamingfast/cli/sflags" 10 | sinker2 "github.com/streamingfast/substreams-sink-sql/db_changes/sinker" 11 | "github.com/streamingfast/substreams/manifest" 12 | ) 13 | 14 | var sinkSetupCmd = Command(sinkSetupE, 15 | "setup ", 16 | "Setup the required infrastructure to deploy a Substreams SQL deployable unit", 17 | ExactArgs(2), 18 | Flags(func(flags *pflag.FlagSet) { 19 | AddCommonDatabaseChangesFlags(flags) 20 | AddCommonSinkerFlags(flags) 21 | 22 | flags.Bool("postgraphile", false, "Will append the necessary 'comments' on cursors table to fully support postgraphile") 23 | flags.Bool("system-tables-only", false, "will only create/update the systems tables (cursors, substreams_history) and ignore the schema from the manifest") 24 | flags.Bool("ignore-duplicate-table-errors", false, "[Dev] Use this if you want to ignore duplicate table errors, take caution that this means the 'schemal.sql' file will not have run fully!") 25 | }), 26 | ) 27 | 28 | func sinkSetupE(cmd *cobra.Command, args []string) error { 29 | ctx := cmd.Context() 30 | 31 | dsnString := args[0] 32 | manifestPath := args[1] 33 | 34 | reader, err := manifest.NewReader(manifestPath) 35 | if err != nil { 36 | return fmt.Errorf("setup manifest reader: %w", err) 37 | } 38 | pkgBundle, err := reader.Read() 39 | if err != nil { 40 | return fmt.Errorf("read manifest: %w", err) 41 | } 42 | 43 | options := sinker2.SinkerSetupOptions{ 44 | CursorTableName: sflags.MustGetString(cmd, "cursors-table"), 45 | HistoryTableName: sflags.MustGetString(cmd, "history-table"), 46 | ClickhouseCluster: sflags.MustGetString(cmd, "clickhouse-cluster"), 47 | OnModuleHashMismatch: sflags.MustGetString(cmd, onModuleHashMistmatchFlag), 48 | SystemTablesOnly: sflags.MustGetBool(cmd, "system-tables-only"), 49 | IgnoreDuplicateTableErrors: sflags.MustGetBool(cmd, "ignore-duplicate-table-errors"), 50 | Postgraphile: sflags.MustGetBool(cmd, "postgraphile"), 51 | } 52 | 53 | return sinker2.SinkerSetup(ctx, dsnString, pkgBundle.Package, options, zlog, tracer) 54 | } 55 | 56 | -------------------------------------------------------------------------------- /db_changes/db/dialect_clickhouse_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "errors" 5 | "reflect" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func Test_convertToType(t *testing.T) { 13 | 14 | tests := []struct { 15 | name string 16 | value string 17 | expect any 18 | expectErr error 19 | valueType reflect.Type 20 | }{ 21 | { 22 | name: "Date", 23 | value: "2021-01-01", 24 | expect: "2021-01-01", 25 | expectErr: nil, 26 | valueType: reflect.TypeOf(time.Time{}), 27 | }, { 28 | name: "Invalid Date", 29 | value: "2021-99-01", 30 | expect: nil, 31 | expectErr: errors.New(`could not convert 2021-99-01 to date: parsing time "2021-99-01": month out of range`), 32 | valueType: reflect.TypeOf(time.Time{}), 33 | }, 34 | { 35 | name: "ISO 8601 datetime", 36 | value: "2021-01-01T00:00:00Z", 37 | expect: int64(1609459200), 38 | expectErr: nil, 39 | valueType: reflect.TypeOf(time.Time{}), 40 | }, 41 | { 42 | name: "common datetime", 43 | value: "2021-01-01 00:00:00", 44 | expect: int64(1609459200), 45 | expectErr: nil, 46 | valueType: reflect.TypeOf(time.Time{}), 47 | }, 48 | { 49 | name: "String Slice Double Quoted", 50 | value: `["field1", "field2"]`, 51 | expect: []string{"field1", "field2"}, 52 | expectErr: nil, 53 | valueType: reflect.TypeOf([]string{}), 54 | }, { 55 | name: "Int Slice", 56 | value: `[1, 2]`, 57 | expect: []int{1, 2}, 58 | expectErr: nil, 59 | valueType: reflect.TypeOf([]int{}), 60 | }, { 61 | name: "Float Slice", 62 | value: `[1.0, 2.0]`, 63 | expect: []float64{1, 2}, 64 | expectErr: nil, 65 | valueType: reflect.TypeOf([]float64{}), 66 | }, { 67 | name: "Invalid Type Slice Struct", 68 | value: `[""]`, 69 | expect: nil, 70 | expectErr: errors.New(`"Time" is not supported as Clickhouse Array type`), 71 | valueType: reflect.TypeOf([]time.Time{}), 72 | }, 73 | } 74 | for _, test := range tests { 75 | t.Run(test.name, func(t *testing.T) { 76 | res, err := convertToType(test.value, test.valueType) 77 | if test.expectErr != nil { 78 | assert.EqualError(t, err, test.expectErr.Error()) 79 | } else { 80 | assert.NoError(t, err) 81 | assert.Equal(t, test.expect, res) 82 | } 83 | }) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "net/http" 6 | _ "net/http/pprof" 7 | "time" 8 | 9 | _ "github.com/lib/pq" 10 | "github.com/spf13/cobra" 11 | "github.com/spf13/pflag" 12 | . "github.com/streamingfast/cli" 13 | "github.com/streamingfast/cli/sflags" 14 | "github.com/streamingfast/dmetrics" 15 | "github.com/streamingfast/logging/zapx" 16 | "go.uber.org/zap" 17 | ) 18 | 19 | // Version value, injected via go build `ldflags` at build time 20 | var version = "dev" 21 | 22 | func main() { 23 | go func() { 24 | log.Println(http.ListenAndServe(":6060", nil)) 25 | }() 26 | 27 | Run("substreams-sink-sql", "Substreams SQL Sink", 28 | sinkRunCmd, 29 | sinkSetupCmd, 30 | sinkToolsCmd, 31 | generateCsvCmd, 32 | injectCSVCmd, 33 | createUserCmd, 34 | fromProtoCmd, 35 | 36 | ConfigureViper("SINK_SQL"), 37 | ConfigureVersion(version), 38 | 39 | PersistentFlags(func(flags *pflag.FlagSet) { 40 | flags.Duration("delay-before-start", 0, "[Operator] Amount of time to wait before starting any internal processes, can be used to perform to maintenance on the pod before actually letting it starts") 41 | flags.String("metrics-listen-addr", "localhost:9102", "[Operator] If non-empty, the process will listen on this address for Prometheus metrics request(s)") 42 | flags.String("pprof-listen-addr", "localhost:6060", "[Operator] If non-empty, the process will listen on this address for pprof analysis (see https://golang.org/pkg/net/http/pprof/)") 43 | }), 44 | AfterAllHook(func(cmd *cobra.Command) { 45 | cmd.PersistentPreRun = preStart 46 | }), 47 | ) 48 | } 49 | 50 | func preStart(cmd *cobra.Command, _ []string) { 51 | 52 | delay := sflags.MustGetDuration(cmd, "delay-before-start") 53 | if delay > 0 { 54 | zlog.Info("sleeping to respect delay before start setting", zapx.HumanDuration("delay", delay)) 55 | time.Sleep(delay) 56 | } 57 | 58 | if v := sflags.MustGetString(cmd, "metrics-listen-addr"); v != "" { 59 | zlog.Debug("starting prometheus metrics server", zap.String("listen_addr", v)) 60 | go dmetrics.Serve(v) 61 | } 62 | 63 | if v := sflags.MustGetString(cmd, "pprof-listen-addr"); v != "" { 64 | go func() { 65 | zlog.Debug("starting pprof server", zap.String("listen_addr", v)) 66 | 67 | err := http.ListenAndServe(v, nil) 68 | if err != nil { 69 | zlog.Debug("unable to start profiling server", zap.Error(err), zap.String("listen_addr", v)) 70 | } 71 | }() 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /db_proto/sql/dialect.go: -------------------------------------------------------------------------------- 1 | package sql 2 | 3 | import ( 4 | "github.com/streamingfast/substreams-sink-sql/db_proto/sql/schema" 5 | "go.uber.org/zap" 6 | "golang.org/x/exp/maps" 7 | "google.golang.org/protobuf/reflect/protoreflect" 8 | "google.golang.org/protobuf/types/dynamicpb" 9 | ) 10 | 11 | const DialectTableBlock = "_blocks_" 12 | const DialectTableCursor = "_cursors_" 13 | 14 | const DialectFieldBlockNumber = "_block_number_" 15 | const DialectFieldBlockTimestamp = "_block_timestamp_" 16 | const DialectFieldVersion = "_version_" 17 | const DialectFieldDeleted = "_deleted_" 18 | 19 | type Dialect interface { 20 | SchemaHash() string 21 | FullTableName(table *schema.Table) string 22 | GetTable(table string) *schema.Table 23 | GetTables() []*schema.Table 24 | UseVersionField() bool 25 | UseDeletedField() bool 26 | AppendInlineFieldValues(fieldValues []any, fd protoreflect.FieldDescriptor, fv protoreflect.Value, dm *dynamicpb.Message) ([]any, error) 27 | } 28 | 29 | type BaseDialect struct { 30 | CreateTableSql map[string]string 31 | PrimaryKeySql []*Constraint 32 | ForeignKeySql []*Constraint 33 | UniqueConstraintSql []*Constraint 34 | TableRegistry map[string]*schema.Table 35 | Logger *zap.Logger 36 | } 37 | 38 | func NewBaseDialect(registry map[string]*schema.Table, logger *zap.Logger) *BaseDialect { 39 | return &BaseDialect{ 40 | CreateTableSql: make(map[string]string), 41 | TableRegistry: registry, 42 | Logger: logger, 43 | } 44 | } 45 | 46 | func (d *BaseDialect) AddCreateTableSql(table string, sql string) { 47 | d.CreateTableSql[table] = sql 48 | } 49 | 50 | func (d *BaseDialect) GetCreateTableSql(table string) string { 51 | return d.CreateTableSql[table] 52 | } 53 | 54 | func (d *BaseDialect) AddPrimaryKeySql(table string, sql string) { 55 | d.PrimaryKeySql = append(d.PrimaryKeySql, &Constraint{Table: table, Sql: sql}) 56 | } 57 | 58 | func (d *BaseDialect) AddForeignKeySql(table string, sql string) { 59 | d.ForeignKeySql = append(d.ForeignKeySql, &Constraint{Table: table, Sql: sql}) 60 | } 61 | 62 | func (d *BaseDialect) AddUniqueConstraintSql(table string, sql string) { 63 | d.UniqueConstraintSql = append(d.UniqueConstraintSql, &Constraint{Table: table, Sql: sql}) 64 | } 65 | 66 | func (d *BaseDialect) GetTable(table string) *schema.Table { 67 | return d.TableRegistry[table] 68 | } 69 | 70 | func (d *BaseDialect) GetTables() []*schema.Table { 71 | return maps.Values(d.TableRegistry) 72 | } 73 | -------------------------------------------------------------------------------- /db_proto/proto/utils.go: -------------------------------------------------------------------------------- 1 | package proto 2 | 3 | import ( 4 | "fmt" 5 | "maps" 6 | "slices" 7 | "strings" 8 | 9 | "github.com/jhump/protoreflect/desc" 10 | v1 "github.com/streamingfast/substreams/pb/sf/substreams/v1" 11 | "google.golang.org/protobuf/types/descriptorpb" 12 | ) 13 | 14 | func FileDescriptorForOutputType(spkg *v1.Package, err error, deps map[string]*desc.FileDescriptor, outputType string) (*desc.FileDescriptor, error) { 15 | for _, p := range spkg.ProtoFiles { 16 | fd, err := desc.CreateFileDescriptor(p, slices.Collect(maps.Values(deps))...) 17 | if err != nil { 18 | return nil, fmt.Errorf("creating file descriptor: %w", err) 19 | } 20 | 21 | for _, md := range fd.GetMessageTypes() { 22 | if md.GetFullyQualifiedName() == outputType { 23 | return fd, nil 24 | } 25 | } 26 | } 27 | 28 | return nil, fmt.Errorf("could not find file descriptor") 29 | } 30 | 31 | func ModuleOutputType(spkg *v1.Package, moduleName string) string { 32 | outputType := "" 33 | for _, m := range spkg.Modules.Modules { 34 | if m.Name == moduleName { 35 | outputType = strings.TrimPrefix(m.Output.Type, "proto:") 36 | break 37 | } 38 | } 39 | return outputType 40 | } 41 | func ResolveDependencies(protoFiles map[string]*descriptorpb.FileDescriptorProto) (map[string]*desc.FileDescriptor, error) { 42 | out := map[string]*desc.FileDescriptor{} 43 | for _, protoFile := range protoFiles { 44 | err := resolveDependencies(protoFile, protoFiles, out) 45 | if err != nil { 46 | return nil, fmt.Errorf("error resolving dependencies: %w", err) 47 | } 48 | } 49 | 50 | return out, nil 51 | } 52 | 53 | func resolveDependencies(protoFile *descriptorpb.FileDescriptorProto, protoFiles map[string]*descriptorpb.FileDescriptorProto, deps map[string]*desc.FileDescriptor) error { 54 | if deps[protoFile.GetName()] != nil { 55 | return nil 56 | } 57 | if len(protoFile.Dependency) != 0 { 58 | for _, dep := range protoFile.Dependency { 59 | depProtoFile, found := protoFiles[dep] 60 | if !found { 61 | return fmt.Errorf("could not find proto file for dependency %q", dep) 62 | } 63 | err := resolveDependencies(depProtoFile, protoFiles, deps) 64 | if err != nil { 65 | return fmt.Errorf("error resolving dependencies: %w", err) 66 | } 67 | } 68 | } 69 | 70 | d, err := desc.CreateFileDescriptor(protoFile, slices.Collect(maps.Values(deps))...) 71 | if err != nil { 72 | return fmt.Errorf("creating file descriptor: %w", err) 73 | } 74 | 75 | deps[protoFile.GetName()] = d 76 | return nil 77 | } 78 | -------------------------------------------------------------------------------- /db_changes/db/dsn_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestParseDSN(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | dns string 14 | expectError bool 15 | expectConnString string 16 | expectSchema string 17 | expectPassword string 18 | }{ 19 | { 20 | name: "golden path", 21 | dns: "psql://postgres:postgres@localhost/substreams-dev?enable_incremental_sort=off&sslmode=disable", 22 | expectConnString: "host=localhost port=5432 dbname=substreams-dev enable_incremental_sort=off sslmode=disable user=postgres password=postgres", 23 | expectSchema: "public", 24 | expectPassword: "postgres", 25 | }, 26 | { 27 | name: "with schemaName", 28 | dns: "psql://postgres:postgres@localhost/substreams-dev?enable_incremental_sort=off&sslmode=disable&schemaName=foo", 29 | expectConnString: "host=localhost port=5432 dbname=substreams-dev enable_incremental_sort=off sslmode=disable user=postgres password=postgres", 30 | expectSchema: "foo", 31 | expectPassword: "postgres", 32 | }, 33 | { 34 | name: "with password", 35 | dns: "clickhouse://default:password@host:9000/default", 36 | expectConnString: "clickhouse://default:password@host:9000/default", 37 | expectSchema: "default", 38 | expectPassword: "password", 39 | }, 40 | { 41 | name: "with blank password", 42 | dns: "clickhouse://default:@host:9000/default", 43 | expectConnString: "clickhouse://default:@host:9000/default", 44 | expectSchema: "default", 45 | expectPassword: "", 46 | }, 47 | { 48 | name: "clickhouse with schemaName", 49 | dns: "clickhouse://default:password@host:9000/default?schemaName=testdb", 50 | expectConnString: "clickhouse://default:password@host:9000/default", 51 | expectSchema: "testdb", 52 | expectPassword: "password", 53 | }, 54 | } 55 | for _, test := range tests { 56 | t.Run(test.name, func(t *testing.T) { 57 | d, err := ParseDSN(test.dns) 58 | if test.expectError { 59 | require.Error(t, err) 60 | } else { 61 | require.NoError(t, err) 62 | assert.Equal(t, test.expectConnString, d.ConnString()) 63 | assert.Equal(t, test.expectSchema, d.schema) 64 | assert.Equal(t, test.expectPassword, d.Password) 65 | } 66 | }) 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /services/dbt.go: -------------------------------------------------------------------------------- 1 | package services 2 | 3 | import ( 4 | "archive/zip" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "os" 9 | "os/exec" 10 | "path/filepath" 11 | "time" 12 | 13 | pbsql "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/services/v1" 14 | "go.uber.org/zap" 15 | ) 16 | 17 | func runDBT(config *pbsql.DBTConfig, logger *zap.Logger) error { 18 | data := config.Files 19 | dbtDir := "/tmp/dbt" 20 | 21 | if err := os.RemoveAll(dbtDir); err != nil { 22 | return fmt.Errorf("removing dbt directory: %w", err) 23 | } 24 | 25 | if err := os.MkdirAll(dbtDir, os.ModePerm); err != nil { 26 | return fmt.Errorf("creating dbt directory: %w", err) 27 | } 28 | 29 | reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) 30 | if err != nil { 31 | return fmt.Errorf("reading zip data from config: %w", err) 32 | } 33 | 34 | // Extract each file in the archive 35 | for _, file := range reader.File { 36 | filePath := filepath.Join(dbtDir, file.Name) 37 | 38 | // Ensure the file's directory structure exists 39 | if file.FileInfo().IsDir() { 40 | if err := os.MkdirAll(filePath, os.ModePerm); err != nil { 41 | return fmt.Errorf("creating directory %s: %w", file.FileInfo().Name(), err) 42 | } 43 | continue 44 | } 45 | 46 | // Ensure parent directories exist 47 | if err := os.MkdirAll(filepath.Dir(filePath), os.ModePerm); err != nil { 48 | return fmt.Errorf("creating parent directory %s: %w", filePath, err) 49 | } 50 | 51 | // Open the file inside the zip 52 | srcFile, err := file.Open() 53 | if err != nil { 54 | return fmt.Errorf("opening file %s: %w", file.FileInfo().Name(), err) 55 | } 56 | defer srcFile.Close() 57 | 58 | // Create the destination file 59 | destFile, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, file.Mode()) 60 | if err != nil { 61 | return fmt.Errorf("creating file %s: %w", filePath, err) 62 | } 63 | defer destFile.Close() 64 | 65 | // Copy the file contents 66 | if _, err := io.Copy(destFile, srcFile); err != nil { 67 | return fmt.Errorf("copying file %s: %w", filePath, err) 68 | } 69 | } 70 | 71 | for { 72 | logger.Info("running dbt") 73 | cmd := exec.Command("dbt", "run", "--profiles-dir", "/tmp/dbt", "--project-dir", "/tmp/dbt") 74 | cmd.Env = os.Environ() 75 | output, err := cmd.CombinedOutput() 76 | if err != nil { 77 | logger.Error("running dbt", zap.Error(err), zap.ByteString("output", output)) 78 | return fmt.Errorf("running dbt: %w", err) 79 | } 80 | logger.Info("dbt output") 81 | fmt.Println(string(output)) 82 | 83 | time.Sleep(time.Duration(config.RunIntervalSeconds)) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/create_user.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "time" 8 | 9 | "github.com/spf13/cobra" 10 | "github.com/spf13/pflag" 11 | . "github.com/streamingfast/cli" 12 | "github.com/streamingfast/cli/sflags" 13 | db2 "github.com/streamingfast/substreams-sink-sql/db_changes/db" 14 | ) 15 | 16 | var createUserCmd = Command(createUserE, 17 | "create-user ", 18 | "Create a user in the database", 19 | ExactArgs(3), 20 | Flags(func(flags *pflag.FlagSet) { 21 | AddCommonDatabaseChangesFlags(flags) 22 | 23 | flags.Int("retries", 3, "Number of retries to attempt when a connection error occurs") 24 | flags.Bool("read-only", false, "Create a read-only user") 25 | flags.String("password-env", "", "Name of the environment variable containing the password") 26 | }), 27 | ) 28 | 29 | func createUserE(cmd *cobra.Command, args []string) error { 30 | ctx := cmd.Context() 31 | 32 | dsnString := args[0] 33 | username := args[1] 34 | database := args[2] 35 | 36 | cursorTableName := sflags.MustGetString(cmd, "cursors-table") 37 | historyTableName := sflags.MustGetString(cmd, "history-table") 38 | 39 | readOnly := sflags.MustGetBool(cmd, "read-only") 40 | passwordEnv := sflags.MustGetString(cmd, "password-env") 41 | 42 | if passwordEnv == "" { 43 | return fmt.Errorf("password-env is required") 44 | } 45 | 46 | password := os.Getenv(passwordEnv) 47 | if password == "" { 48 | return fmt.Errorf("non-empty password is required") 49 | } 50 | 51 | dsn, err := db2.ParseDSN(dsnString) 52 | if err != nil { 53 | return fmt.Errorf("parsing dsn: %w", err) 54 | } 55 | 56 | if err := retry(ctx, func(ctx context.Context) error { 57 | handleReorgs := false 58 | dbLoader, err := db2.NewLoader( 59 | dsn, 60 | cursorTableName, 61 | historyTableName, 62 | sflags.MustGetString(cmd, "clickhouse-cluster"), 63 | 0, 0, 0, 64 | db2.OnModuleHashMismatchError.String(), 65 | &handleReorgs, 66 | zlog, tracer, 67 | ) 68 | 69 | err = dbLoader.CreateUser(ctx, username, password, database, readOnly) 70 | if err != nil { 71 | return fmt.Errorf("create user: %w", err) 72 | } 73 | 74 | return nil 75 | }, sflags.MustGetInt(cmd, "retries")); err != nil { 76 | return fmt.Errorf("create user: %w", err) 77 | } 78 | 79 | return nil 80 | } 81 | 82 | func retry(ctx context.Context, f func(ctx context.Context) error, reties int) error { 83 | var err error 84 | 85 | for i := 0; i < reties; i++ { 86 | err = f(ctx) 87 | if err == nil { 88 | return nil 89 | } 90 | time.Sleep(5*time.Duration(i)*time.Second + 1*time.Second) 91 | } 92 | 93 | return err 94 | } 95 | -------------------------------------------------------------------------------- /db_proto/sql/schema/column.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | v1 "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/schema/v1" 8 | "google.golang.org/protobuf/reflect/protoreflect" 9 | ) 10 | 11 | type Column struct { 12 | Name string 13 | ForeignKey *ForeignKey 14 | FieldDescriptor protoreflect.FieldDescriptor 15 | IsPrimaryKey bool 16 | IsUnique bool 17 | IsRepeated bool 18 | IsExtension bool 19 | IsMessage bool 20 | IsOptional bool 21 | Nested *Table 22 | Message string 23 | ConvertTo *v1.StringConvertion 24 | } 25 | 26 | func NewColumn(d protoreflect.FieldDescriptor, fieldInfo *v1.Column, ordinal int, inlineDepth int) (*Column, error) { 27 | out := &Column{ 28 | Name: string(d.Name()), 29 | FieldDescriptor: d, 30 | IsRepeated: d.IsList(), 31 | IsMessage: d.Kind() == protoreflect.MessageKind, 32 | IsExtension: d.IsExtension(), 33 | IsOptional: d.HasOptionalKeyword(), 34 | } 35 | 36 | if fieldInfo != nil { 37 | if fieldInfo.Inline { 38 | if inlineDepth >= 1 { 39 | return nil, fmt.Errorf("inline nesting level %d is not supported for column %q: only 1 level of inline nesting is allowed", inlineDepth+1, out.Name) 40 | } 41 | ti := &v1.Table{ 42 | Name: out.Name, 43 | } 44 | nested, err := NewTable(d.Message(), ti, ordinal+1, inlineDepth+1) 45 | if err != nil { 46 | return nil, fmt.Errorf("creating nested column %s: %w", out.Name, err) 47 | } 48 | out.Nested = nested 49 | } 50 | 51 | if fieldInfo.Name != nil { 52 | out.Name = *fieldInfo.Name 53 | } 54 | if fieldInfo.ForeignKey != nil { 55 | fk, err := NewForeignKey(*fieldInfo.ForeignKey) 56 | if err != nil { 57 | return nil, fmt.Errorf("error parsing foreign key %s: %w", *fieldInfo.ForeignKey, err) 58 | } 59 | out.ForeignKey = fk 60 | } 61 | out.IsPrimaryKey = fieldInfo.PrimaryKey 62 | out.IsUnique = fieldInfo.Unique 63 | out.ConvertTo = fieldInfo.ConvertTo 64 | } 65 | 66 | if out.IsMessage { 67 | out.Message = string(d.Message().Name()) 68 | } 69 | return out, nil 70 | } 71 | 72 | func (c *Column) QuotedName() string { 73 | return fmt.Sprintf("%q", c.Name) 74 | } 75 | 76 | type ForeignKey struct { 77 | Table string 78 | TableField string 79 | } 80 | 81 | func NewForeignKey(foreignKey string) (*ForeignKey, error) { 82 | parts := strings.Split(foreignKey, " on ") 83 | if len(parts) != 2 { 84 | return nil, fmt.Errorf("invalid foreign key format %q. expecting 'table_name on field_name' format", foreignKey) 85 | } 86 | return &ForeignKey{ 87 | Table: strings.TrimSpace(parts[0]), 88 | TableField: strings.TrimSpace(parts[1]), 89 | }, nil 90 | } 91 | -------------------------------------------------------------------------------- /db_changes/bundler/stats.go: -------------------------------------------------------------------------------- 1 | package bundler 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/streamingfast/bstream" 7 | "github.com/streamingfast/dmetrics" 8 | "github.com/streamingfast/logging/zapx" 9 | "go.uber.org/zap" 10 | ) 11 | 12 | type boundaryStats struct { 13 | creationStart time.Time 14 | 15 | boundaryProcessTime time.Duration 16 | procesingDataTime time.Duration 17 | uploadedDuration time.Duration 18 | 19 | totalBoundaryCount uint64 20 | boundary *bstream.Range 21 | 22 | // averages 23 | avgUploadDuration *dmetrics.AvgDurationCounter 24 | avgBoundaryProcessDuration *dmetrics.AvgDurationCounter 25 | avgDataProcessDuration *dmetrics.AvgDurationCounter 26 | } 27 | 28 | func newStats() *boundaryStats { 29 | return &boundaryStats{ 30 | avgUploadDuration: dmetrics.NewAvgDurationCounter(30*time.Second, time.Second, "upload dur"), 31 | avgBoundaryProcessDuration: dmetrics.NewAvgDurationCounter(30*time.Second, time.Second, "boundary process dur"), 32 | avgDataProcessDuration: dmetrics.NewAvgDurationCounter(30*time.Second, time.Second, "data process dur"), 33 | } 34 | } 35 | 36 | func (s *boundaryStats) startBoundary(b *bstream.Range) { 37 | s.creationStart = time.Now() 38 | s.boundary = b 39 | s.totalBoundaryCount++ 40 | s.boundaryProcessTime = 0 41 | s.procesingDataTime = 0 42 | s.uploadedDuration = 0 43 | } 44 | 45 | func (s *boundaryStats) addUploadedDuration(dur time.Duration) { 46 | s.avgUploadDuration.AddDuration(dur) 47 | s.uploadedDuration = dur 48 | } 49 | 50 | func (s *boundaryStats) endBoundary() { 51 | dur := time.Since(s.creationStart) 52 | s.avgBoundaryProcessDuration.AddDuration(dur) 53 | s.boundaryProcessTime = dur 54 | s.avgDataProcessDuration.AddDuration(s.procesingDataTime) 55 | } 56 | 57 | func (s *boundaryStats) addProcessingDataDur(dur time.Duration) { 58 | s.procesingDataTime += dur 59 | } 60 | 61 | func (s *boundaryStats) Log() []zap.Field { 62 | return []zap.Field{ 63 | zap.Uint64("file_count", s.totalBoundaryCount), 64 | zap.Stringer("boundary", s.boundary), 65 | zapx.HumanDuration("boundary_process_duration", s.boundaryProcessTime), 66 | zapx.HumanDuration("upload_duration", s.uploadedDuration), 67 | zapx.HumanDuration("data_process_duration", s.procesingDataTime), 68 | zapx.HumanDuration("avg_upload_duration", s.avgUploadDuration.Average()), 69 | zapx.HumanDuration("total_upload_duration", s.avgUploadDuration.Total()), 70 | zapx.HumanDuration("avg_boundary_process_duration", s.avgBoundaryProcessDuration.Average()), 71 | zapx.HumanDuration("total_boundary_process_duration", s.avgBoundaryProcessDuration.Total()), 72 | zapx.HumanDuration("avg_data_process_duration", s.avgDataProcessDuration.Average()), 73 | zapx.HumanDuration("total_data_process_duration", s.avgDataProcessDuration.Total()), 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /db_changes/db/flush.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/ClickHouse/clickhouse-go/v2" 9 | "github.com/streamingfast/logging/zapx" 10 | sink "github.com/streamingfast/substreams-sink" 11 | "go.uber.org/zap" 12 | ) 13 | 14 | func (l *Loader) Flush(ctx context.Context, outputModuleHash string, cursor *sink.Cursor, lastFinalBlock uint64) (rowFlushedCount int, err error) { 15 | ctx = clickhouse.Context(context.Background(), clickhouse.WithStdAsync(false)) 16 | 17 | startAt := time.Now() 18 | tx, err := l.BeginTx(ctx, nil) 19 | if err != nil { 20 | return 0, fmt.Errorf("failed to being db transaction: %w", err) 21 | } 22 | defer func() { 23 | if err != nil { 24 | if err := tx.Rollback(); err != nil { 25 | l.logger.Warn("failed to rollback transaction", zap.Error(err)) 26 | } 27 | } 28 | }() 29 | 30 | rowFlushedCount, err = l.dialect.Flush(tx, ctx, l, outputModuleHash, lastFinalBlock) 31 | if err != nil { 32 | return 0, fmt.Errorf("dialect flush: %w", err) 33 | } 34 | 35 | rowFlushedCount += 1 36 | if err := l.UpdateCursor(ctx, tx, outputModuleHash, cursor); err != nil { 37 | return 0, fmt.Errorf("update cursor: %w", err) 38 | } 39 | 40 | if err := tx.Commit(); err != nil { 41 | return 0, fmt.Errorf("failed to commit db transaction: %w", err) 42 | } 43 | l.reset() 44 | 45 | // We add + 1 to the table count because the `cursors` table is an implicit table 46 | l.logger.Debug("flushed table(s) rows to database", zap.Int("table_count", l.entries.Len()+1), zap.Int("row_count", rowFlushedCount), zapx.HumanDuration("took", time.Since(startAt))) 47 | return rowFlushedCount, nil 48 | } 49 | 50 | func (l *Loader) Revert(ctx context.Context, outputModuleHash string, cursor *sink.Cursor, lastValidBlock uint64) error { 51 | tx, err := l.BeginTx(ctx, nil) 52 | if err != nil { 53 | return fmt.Errorf("failed to being db transaction: %w", err) 54 | } 55 | defer func() { 56 | if err != nil { 57 | if err := tx.Rollback(); err != nil { 58 | l.logger.Warn("failed to rollback transaction", zap.Error(err)) 59 | } 60 | } 61 | }() 62 | 63 | if err := l.dialect.Revert(tx, ctx, l, lastValidBlock); err != nil { 64 | return err 65 | } 66 | 67 | if err := l.UpdateCursor(ctx, tx, outputModuleHash, cursor); err != nil { 68 | return fmt.Errorf("update cursor after revert: %w", err) 69 | } 70 | 71 | if err := tx.Commit(); err != nil { 72 | return fmt.Errorf("failed to commit db transaction: %w", err) 73 | } 74 | 75 | l.logger.Debug("reverted changes to database", zap.Uint64("last_valid_block", lastValidBlock)) 76 | return nil 77 | } 78 | 79 | func (l *Loader) reset() { 80 | for entriesPair := l.entries.Oldest(); entriesPair != nil; entriesPair = entriesPair.Next() { 81 | l.entries.Set(entriesPair.Key, NewOrderedMap[string, *Operation]()) 82 | } 83 | l.batchOrdinal = 0 84 | } 85 | -------------------------------------------------------------------------------- /db_changes/sinker/setup.go: -------------------------------------------------------------------------------- 1 | package sinker 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | 8 | "github.com/lib/pq" 9 | "github.com/streamingfast/logging" 10 | sinksql "github.com/streamingfast/substreams-sink-sql" 11 | db2 "github.com/streamingfast/substreams-sink-sql/db_changes/db" 12 | pbsubstreams "github.com/streamingfast/substreams/pb/sf/substreams/v1" 13 | "go.uber.org/zap" 14 | ) 15 | 16 | const ( 17 | deprecated_supportedDeployableService = "type.googleapis.com/sf.substreams.sink.sql.v1.Service" 18 | supportedDeployableService = "type.googleapis.com/sf.substreams.sink.sql.service.v1.Service" 19 | ) 20 | 21 | // SinkerSetupOptions contains configuration for the setup operation 22 | type SinkerSetupOptions struct { 23 | CursorTableName string 24 | HistoryTableName string 25 | ClickhouseCluster string 26 | OnModuleHashMismatch string 27 | SystemTablesOnly bool 28 | IgnoreDuplicateTableErrors bool 29 | Postgraphile bool 30 | } 31 | 32 | // SinkerSetup sets up the required infrastructure for a Substreams SQL sink 33 | func SinkerSetup( 34 | ctx context.Context, 35 | dsnString string, 36 | pkg *pbsubstreams.Package, 37 | options SinkerSetupOptions, 38 | logger *zap.Logger, 39 | tracer logging.Tracer, 40 | ) error { 41 | sinkConfig, err := sinksql.ExtractSinkService(pkg) 42 | if err != nil { 43 | return fmt.Errorf("extract sink config: %w", err) 44 | } 45 | 46 | dsn, err := db2.ParseDSN(dsnString) 47 | if err != nil { 48 | return fmt.Errorf("parse dsn: %w", err) 49 | } 50 | 51 | handleReorgs := false 52 | dbLoader, err := db2.NewLoader( 53 | dsn, 54 | options.CursorTableName, 55 | options.HistoryTableName, 56 | options.ClickhouseCluster, 57 | 0, 0, 0, 58 | options.OnModuleHashMismatch, 59 | &handleReorgs, 60 | logger, tracer, 61 | ) 62 | if err != nil { 63 | return fmt.Errorf("creating loader: %w", err) 64 | } 65 | defer dbLoader.Close() 66 | 67 | userSQLSchema := sinkConfig.Schema 68 | if options.SystemTablesOnly { 69 | userSQLSchema = "" 70 | } 71 | 72 | err = dbLoader.Setup(ctx, dsn.Schema(), userSQLSchema, options.Postgraphile) 73 | if err != nil { 74 | if isDuplicateTableError(err) && options.IgnoreDuplicateTableErrors { 75 | logger.Info("received duplicate table error, script did not execute successfully") 76 | } else { 77 | return fmt.Errorf("setup: %w", err) 78 | } 79 | } 80 | logger.Info("setup completed successfully") 81 | return nil 82 | } 83 | 84 | // isDuplicateTableError checks if the error is a PostgreSQL duplicate table error 85 | func isDuplicateTableError(err error) bool { 86 | var sqlError *pq.Error 87 | if !errors.As(err, &sqlError) { 88 | return false 89 | } 90 | 91 | // List at https://www.postgresql.org/docs/14/errcodes-appendix.html#ERRCODES-TABLE 92 | switch sqlError.Code { 93 | // Error code named `duplicate_table` 94 | case "42P07": 95 | return true 96 | } 97 | 98 | return false 99 | } 100 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/sf.substreams.sink.sql.v1.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, PartialEq, ::prost::Message)] 5 | pub struct Service { 6 | #[prost(string, tag="1")] 7 | pub schema: ::prost::alloc::string::String, 8 | #[prost(message, optional, tag="2")] 9 | pub dbt_config: ::core::option::Option, 10 | #[prost(message, optional, tag="4")] 11 | pub hasura_frontend: ::core::option::Option, 12 | #[prost(message, optional, tag="5")] 13 | pub postgraphile_frontend: ::core::option::Option, 14 | #[prost(enumeration="service::Engine", tag="7")] 15 | pub engine: i32, 16 | #[prost(message, optional, tag="8")] 17 | pub rest_frontend: ::core::option::Option, 18 | } 19 | /// Nested message and enum types in `Service`. 20 | pub mod service { 21 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] 22 | #[repr(i32)] 23 | pub enum Engine { 24 | Unset = 0, 25 | Postgres = 1, 26 | Clickhouse = 2, 27 | } 28 | impl Engine { 29 | /// String value of the enum field names used in the ProtoBuf definition. 30 | /// 31 | /// The values are not transformed in any way and thus are considered stable 32 | /// (if the ProtoBuf definition does not change) and safe for programmatic use. 33 | pub fn as_str_name(&self) -> &'static str { 34 | match self { 35 | Engine::Unset => "unset", 36 | Engine::Postgres => "postgres", 37 | Engine::Clickhouse => "clickhouse", 38 | } 39 | } 40 | /// Creates an enum from field names used in the ProtoBuf definition. 41 | pub fn from_str_name(value: &str) -> ::core::option::Option { 42 | match value { 43 | "unset" => Some(Self::Unset), 44 | "postgres" => Some(Self::Postgres), 45 | "clickhouse" => Some(Self::Clickhouse), 46 | _ => None, 47 | } 48 | } 49 | } 50 | } 51 | #[allow(clippy::derive_partial_eq_without_eq)] 52 | #[derive(Clone, PartialEq, ::prost::Message)] 53 | pub struct DbtConfig { 54 | #[prost(bytes="vec", tag="1")] 55 | pub files: ::prost::alloc::vec::Vec, 56 | #[prost(int32, tag="2")] 57 | pub run_interval_seconds: i32, 58 | #[prost(bool, tag="3")] 59 | pub enabled: bool, 60 | } 61 | #[allow(clippy::derive_partial_eq_without_eq)] 62 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 63 | pub struct HasuraFrontend { 64 | #[prost(bool, tag="1")] 65 | pub enabled: bool, 66 | } 67 | #[allow(clippy::derive_partial_eq_without_eq)] 68 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 69 | pub struct PostgraphileFrontend { 70 | #[prost(bool, tag="1")] 71 | pub enabled: bool, 72 | } 73 | #[allow(clippy::derive_partial_eq_without_eq)] 74 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 75 | pub struct PgWebFrontend { 76 | #[prost(bool, tag="1")] 77 | pub enabled: bool, 78 | } 79 | #[allow(clippy::derive_partial_eq_without_eq)] 80 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 81 | pub struct RestFrontend { 82 | #[prost(bool, tag="1")] 83 | pub enabled: bool, 84 | } 85 | // @@protoc_insertion_point(module) 86 | -------------------------------------------------------------------------------- /db_proto/test/substreams/full-block/src/pb/sf.substreams.sink.sql.service.v1.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | // This file is @generated by prost-build. 3 | #[allow(clippy::derive_partial_eq_without_eq)] 4 | #[derive(Clone, PartialEq, ::prost::Message)] 5 | pub struct Service { 6 | #[prost(string, tag="1")] 7 | pub schema: ::prost::alloc::string::String, 8 | #[prost(message, optional, tag="2")] 9 | pub dbt_config: ::core::option::Option, 10 | #[prost(message, optional, tag="4")] 11 | pub hasura_frontend: ::core::option::Option, 12 | #[prost(message, optional, tag="5")] 13 | pub postgraphile_frontend: ::core::option::Option, 14 | #[prost(enumeration="service::Engine", tag="7")] 15 | pub engine: i32, 16 | #[prost(message, optional, tag="8")] 17 | pub rest_frontend: ::core::option::Option, 18 | } 19 | /// Nested message and enum types in `Service`. 20 | pub mod service { 21 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] 22 | #[repr(i32)] 23 | pub enum Engine { 24 | Unset = 0, 25 | Postgres = 1, 26 | Clickhouse = 2, 27 | } 28 | impl Engine { 29 | /// String value of the enum field names used in the ProtoBuf definition. 30 | /// 31 | /// The values are not transformed in any way and thus are considered stable 32 | /// (if the ProtoBuf definition does not change) and safe for programmatic use. 33 | pub fn as_str_name(&self) -> &'static str { 34 | match self { 35 | Engine::Unset => "unset", 36 | Engine::Postgres => "postgres", 37 | Engine::Clickhouse => "clickhouse", 38 | } 39 | } 40 | /// Creates an enum from field names used in the ProtoBuf definition. 41 | pub fn from_str_name(value: &str) -> ::core::option::Option { 42 | match value { 43 | "unset" => Some(Self::Unset), 44 | "postgres" => Some(Self::Postgres), 45 | "clickhouse" => Some(Self::Clickhouse), 46 | _ => None, 47 | } 48 | } 49 | } 50 | } 51 | #[allow(clippy::derive_partial_eq_without_eq)] 52 | #[derive(Clone, PartialEq, ::prost::Message)] 53 | pub struct DbtConfig { 54 | #[prost(bytes="vec", tag="1")] 55 | pub files: ::prost::alloc::vec::Vec, 56 | #[prost(int32, tag="2")] 57 | pub run_interval_seconds: i32, 58 | #[prost(bool, tag="3")] 59 | pub enabled: bool, 60 | } 61 | #[allow(clippy::derive_partial_eq_without_eq)] 62 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 63 | pub struct HasuraFrontend { 64 | #[prost(bool, tag="1")] 65 | pub enabled: bool, 66 | } 67 | #[allow(clippy::derive_partial_eq_without_eq)] 68 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 69 | pub struct PostgraphileFrontend { 70 | #[prost(bool, tag="1")] 71 | pub enabled: bool, 72 | } 73 | #[allow(clippy::derive_partial_eq_without_eq)] 74 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 75 | pub struct PgWebFrontend { 76 | #[prost(bool, tag="1")] 77 | pub enabled: bool, 78 | } 79 | #[allow(clippy::derive_partial_eq_without_eq)] 80 | #[derive(Clone, Copy, PartialEq, ::prost::Message)] 81 | pub struct RestFrontend { 82 | #[prost(bool, tag="1")] 83 | pub enabled: bool, 84 | } 85 | // @@protoc_insertion_point(module) 86 | -------------------------------------------------------------------------------- /db_changes/bundler/bundler_test.go: -------------------------------------------------------------------------------- 1 | package bundler 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/streamingfast/bstream" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestBoundary_newBoundary(t *testing.T) { 11 | tests := []struct { 12 | name string 13 | bundlerSize uint64 14 | blockNum uint64 15 | stopBlock uint64 16 | expect *bstream.Range 17 | }{ 18 | {"start of boundary w/ blockCount 10", 10, 0, 1000, bstream.NewRangeExcludingEnd(0, 10)}, 19 | {"middle of boundary w/ blockCount 10", 10, 7, 1000, bstream.NewRangeExcludingEnd(0, 10)}, 20 | {"last block of boundary w/ blockCount 10", 10, 9, 1000, bstream.NewRangeExcludingEnd(0, 10)}, 21 | {"end block of boundary w/ blockCount 10", 10, 10, 1000, bstream.NewRangeExcludingEnd(10, 20)}, 22 | {"start of boundary w/ blockCount 100", 100, 0, 1000, bstream.NewRangeExcludingEnd(0, 100)}, 23 | {"middle of boundary w/ blockCount 100", 100, 73, 1000, bstream.NewRangeExcludingEnd(0, 100)}, 24 | {"last block of boundary w/ blockCount 100", 100, 99, 1000, bstream.NewRangeExcludingEnd(0, 100)}, 25 | {"end block of boundary w/ blockCount 100", 100, 100, 1000, bstream.NewRangeExcludingEnd(100, 200)}, 26 | 27 | {"start of boundary w/ stopBlock equal", 10, 0, 10, bstream.NewRangeExcludingEnd(0, 10)}, 28 | {"start of boundary w/ stopBlock within", 10, 0, 5, bstream.NewRangeExcludingEnd(0, 5)}, 29 | {"middle of boundary w/ stopBlock within", 10, 14, 15, bstream.NewRangeExcludingEnd(10, 15)}, 30 | } 31 | for _, test := range tests { 32 | t.Run(test.name, func(t *testing.T) { 33 | b := &Bundler{ 34 | blockCount: test.bundlerSize, 35 | stopBlock: test.stopBlock, 36 | } 37 | assert.Equal(t, test.expect, b.newBoundary(test.blockNum)) 38 | }) 39 | } 40 | } 41 | 42 | func TestBoundary_computeEndBlock(t *testing.T) { 43 | tests := []struct { 44 | name string 45 | start uint64 46 | size uint64 47 | expect uint64 48 | }{ 49 | {"on boundary", 100, 100, 200}, 50 | {"off boundary", 123, 100, 200}, 51 | } 52 | for _, test := range tests { 53 | t.Run(test.name, func(t *testing.T) { 54 | assert.Equal(t, test.expect, computeEndBlock(test.start, test.size)) 55 | }) 56 | } 57 | } 58 | 59 | func TestBundler_boundariesToSkip(t *testing.T) { 60 | tests := []struct { 61 | name string 62 | lastActiveBoundary *bstream.Range 63 | bundlerSize uint64 64 | blockNum uint64 65 | expect []*bstream.Range 66 | }{ 67 | {"before boundary", bstream.NewRangeExcludingEnd(0, 100), 100, 98, nil}, 68 | {"on boundary", bstream.NewRangeExcludingEnd(0, 100), 100, 100, nil}, 69 | {"above boundary", bstream.NewRangeExcludingEnd(0, 100), 100, 107, nil}, 70 | {"above boundary", bstream.NewRangeExcludingEnd(0, 100), 100, 199, nil}, 71 | {"above boundary", bstream.NewRangeExcludingEnd(2, 100), 100, 200, []*bstream.Range{ 72 | bstream.NewRangeExcludingEnd(100, 200), 73 | }}, 74 | {"above boundary", bstream.NewRangeExcludingEnd(4, 100), 100, 763, []*bstream.Range{ 75 | bstream.NewRangeExcludingEnd(100, 200), 76 | bstream.NewRangeExcludingEnd(200, 300), 77 | bstream.NewRangeExcludingEnd(300, 400), 78 | bstream.NewRangeExcludingEnd(400, 500), 79 | bstream.NewRangeExcludingEnd(500, 600), 80 | bstream.NewRangeExcludingEnd(600, 700), 81 | }}, 82 | } 83 | for _, test := range tests { 84 | t.Run(test.name, func(t *testing.T) { 85 | assert.Equal(t, test.expect, boundariesToSkip(test.lastActiveBoundary, test.blockNum, test.bundlerSize)) 86 | }) 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Build docker image 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v*" 7 | branches: 8 | - "*" 9 | 10 | env: 11 | REGISTRY: ghcr.io 12 | IMAGE_NAME: ${{ github.repository }} 13 | 14 | jobs: 15 | build-and-push-image: 16 | runs-on: ubuntu-22.04 17 | if: "${{ !startsWith(github.event.head_commit.message, 'GitBook: [#') }}" 18 | 19 | outputs: 20 | image: ${{ steps.meta.outputs.image }} 21 | tags: ${{ steps.meta.outputs.tags }} 22 | 23 | permissions: 24 | contents: read 25 | packages: write 26 | 27 | steps: 28 | - name: Checkout repository 29 | uses: actions/checkout@v4 30 | 31 | - uses: actions/cache@v4 32 | with: 33 | path: | 34 | ~/.cache/go-build 35 | ~/go/pkg/mod 36 | key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} 37 | restore-keys: | 38 | ${{ runner.os }}-go- 39 | 40 | - name: Set up QEMU 41 | uses: docker/setup-qemu-action@v3 42 | 43 | - name: Set up Docker Buildx 44 | uses: docker/setup-buildx-action@v3 45 | 46 | - name: Log in to the Container registry 47 | uses: docker/login-action@v3 48 | with: 49 | registry: ${{ env.REGISTRY }} 50 | username: ${{ github.actor }} 51 | password: ${{ secrets.GITHUB_TOKEN }} 52 | 53 | - name: Generate docker tags/labels from github build context 54 | id: meta 55 | uses: docker/metadata-action@v5 56 | with: 57 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 58 | tags: | 59 | type=ref,event=tag 60 | type=sha,prefix= 61 | type=raw,enable=${{ github.ref == 'refs/heads/develop' }},value=develop 62 | type=edge,branch=develop 63 | flavor: | 64 | latest=${{ startsWith(github.ref, 'refs/tags/') }} 65 | 66 | - name: Extract version 67 | id: extract-version 68 | run: | 69 | version=edge 70 | commit_date="$(git show -s --format=%cI)" 71 | if [[ "${GITHUB_REF}" == refs/tags/* ]]; then 72 | version=${GITHUB_REF#refs/tags/} 73 | fi 74 | 75 | echo "VERSION=$version (Commit ${GITHUB_SHA::7}, Commit Date $commit_date)" >> "$GITHUB_OUTPUT" 76 | 77 | - name: Build and push Docker image 78 | uses: docker/build-push-action@v6 79 | with: 80 | file: ./Dockerfile 81 | platforms: linux/amd64,linux/arm64 82 | push: true 83 | tags: ${{ steps.meta.outputs.tags }} 84 | labels: ${{ steps.meta.outputs.labels }} 85 | build-args: | 86 | VERSION=${{ steps.extract-version.outputs.VERSION }} 87 | 88 | slack-notifications: 89 | if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }} 90 | needs: [build-and-push-image] 91 | runs-on: ubuntu-22.04 92 | steps: 93 | - name: Slack notification 94 | env: 95 | SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} 96 | uses: Ilshidur/action-slack@2.0.2 97 | with: 98 | args: | 99 | :done: *${{ github.repository }}* Success building docker images from ${{ github.ref_type }} _${{ github.ref_name }}_ (${{ github.actor }}) :sparkling_heart: ```${{ join(needs.build-and-push-image.outputs.tags, ' ') }} 100 | ${{ needs.build-and-push-image.outputs.image }}``` 101 | -------------------------------------------------------------------------------- /db_changes/db/types_enum.go: -------------------------------------------------------------------------------- 1 | // Code generated by go-enum DO NOT EDIT. 2 | // Version: 3 | // Revision: 4 | // Build Date: 5 | // Built By: 6 | 7 | package db 8 | 9 | import ( 10 | "fmt" 11 | "strings" 12 | ) 13 | 14 | const ( 15 | // OnModuleHashMismatchIgnore is a OnModuleHashMismatch of type Ignore. 16 | OnModuleHashMismatchIgnore OnModuleHashMismatch = iota 17 | // OnModuleHashMismatchWarn is a OnModuleHashMismatch of type Warn. 18 | OnModuleHashMismatchWarn 19 | // OnModuleHashMismatchError is a OnModuleHashMismatch of type Error. 20 | OnModuleHashMismatchError 21 | ) 22 | 23 | var ErrInvalidOnModuleHashMismatch = fmt.Errorf("not a valid OnModuleHashMismatch, try [%s]", strings.Join(_OnModuleHashMismatchNames, ", ")) 24 | 25 | const _OnModuleHashMismatchName = "IgnoreWarnError" 26 | 27 | var _OnModuleHashMismatchNames = []string{ 28 | _OnModuleHashMismatchName[0:6], 29 | _OnModuleHashMismatchName[6:10], 30 | _OnModuleHashMismatchName[10:15], 31 | } 32 | 33 | // OnModuleHashMismatchNames returns a list of possible string values of OnModuleHashMismatch. 34 | func OnModuleHashMismatchNames() []string { 35 | tmp := make([]string, len(_OnModuleHashMismatchNames)) 36 | copy(tmp, _OnModuleHashMismatchNames) 37 | return tmp 38 | } 39 | 40 | var _OnModuleHashMismatchMap = map[OnModuleHashMismatch]string{ 41 | OnModuleHashMismatchIgnore: _OnModuleHashMismatchName[0:6], 42 | OnModuleHashMismatchWarn: _OnModuleHashMismatchName[6:10], 43 | OnModuleHashMismatchError: _OnModuleHashMismatchName[10:15], 44 | } 45 | 46 | // String implements the Stringer interface. 47 | func (x OnModuleHashMismatch) String() string { 48 | if str, ok := _OnModuleHashMismatchMap[x]; ok { 49 | return str 50 | } 51 | return fmt.Sprintf("OnModuleHashMismatch(%d)", x) 52 | } 53 | 54 | // IsValid provides a quick way to determine if the typed value is 55 | // part of the allowed enumerated values 56 | func (x OnModuleHashMismatch) IsValid() bool { 57 | _, ok := _OnModuleHashMismatchMap[x] 58 | return ok 59 | } 60 | 61 | var _OnModuleHashMismatchValue = map[string]OnModuleHashMismatch{ 62 | _OnModuleHashMismatchName[0:6]: OnModuleHashMismatchIgnore, 63 | strings.ToLower(_OnModuleHashMismatchName[0:6]): OnModuleHashMismatchIgnore, 64 | _OnModuleHashMismatchName[6:10]: OnModuleHashMismatchWarn, 65 | strings.ToLower(_OnModuleHashMismatchName[6:10]): OnModuleHashMismatchWarn, 66 | _OnModuleHashMismatchName[10:15]: OnModuleHashMismatchError, 67 | strings.ToLower(_OnModuleHashMismatchName[10:15]): OnModuleHashMismatchError, 68 | } 69 | 70 | // ParseOnModuleHashMismatch attempts to convert a string to a OnModuleHashMismatch. 71 | func ParseOnModuleHashMismatch(name string) (OnModuleHashMismatch, error) { 72 | if x, ok := _OnModuleHashMismatchValue[name]; ok { 73 | return x, nil 74 | } 75 | // Case insensitive parse, do a separate lookup to prevent unnecessary cost of lowercasing a string if we don't need to. 76 | if x, ok := _OnModuleHashMismatchValue[strings.ToLower(name)]; ok { 77 | return x, nil 78 | } 79 | return OnModuleHashMismatch(0), fmt.Errorf("%s is %w", name, ErrInvalidOnModuleHashMismatch) 80 | } 81 | 82 | // MarshalText implements the text marshaller method. 83 | func (x OnModuleHashMismatch) MarshalText() ([]byte, error) { 84 | return []byte(x.String()), nil 85 | } 86 | 87 | // UnmarshalText implements the text unmarshaller method. 88 | func (x *OnModuleHashMismatch) UnmarshalText(text []byte) error { 89 | name := string(text) 90 | tmp, err := ParseOnModuleHashMismatch(name) 91 | if err != nil { 92 | return err 93 | } 94 | *x = tmp 95 | return nil 96 | } 97 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/common_flags.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "time" 6 | 7 | "github.com/spf13/pflag" 8 | "github.com/streamingfast/bstream" 9 | "github.com/streamingfast/cli" 10 | "github.com/streamingfast/shutter" 11 | sink "github.com/streamingfast/substreams-sink" 12 | pbsubstreams "github.com/streamingfast/substreams/pb/sf/substreams/v1" 13 | "go.uber.org/zap" 14 | ) 15 | 16 | var ( 17 | onModuleHashMistmatchFlag = "on-module-hash-mistmatch" 18 | ) 19 | 20 | var supportedOutputTypes = "sf.substreams.sink.database.v1.DatabaseChanges,sf.substreams.database.v1.DatabaseChanges" 21 | 22 | // AddCommonSinkerFlags adds the flags common to all command that needs to create a sinker, 23 | // namely the `run` and `generate-csv` commands. 24 | func AddCommonSinkerFlags(flags *pflag.FlagSet) { 25 | flags.String(onModuleHashMistmatchFlag, "error", cli.FlagDescription(` 26 | What to do when the module hash in the manifest does not match the one in the database, can be 'error', 'warn' or 'ignore' 27 | 28 | - If 'error' is used (default), it will exit with an error explaining the problem and how to fix it. 29 | - If 'warn' is used, it does the same as 'ignore' but it will log a warning message when it happens. 30 | - If 'ignore' is set, we pick the cursor at the highest block number and use it as the starting point. Subsequent 31 | updates to the cursor will overwrite the module hash in the database. 32 | `)) 33 | } 34 | 35 | func AddCommonDatabaseChangesFlags(flags *pflag.FlagSet) { 36 | flags.String("cursors-table", "cursors", "[Operator] Name of the table to use for storing cursors") 37 | flags.String("history-table", "substreams_history", "[Operator] Name of the table to use for storing block history, used to handle reorgs") 38 | flags.String("clickhouse-cluster", "", "[Operator] If non-empty, a 'ON CLUSTER ' clause will be applied when setting up tables in Clickhouse. It will also replace the table engine with it's replicated counterpart (MergeTree will be replaced with ReplicatedMergeTree for example).") 39 | flags.String("bytes-encoding", "raw", "[Schema] Encoding for protobuf bytes fields: raw, hex, 0xhex, base64, base58. Non-raw encodings store data as string type in database.") 40 | } 41 | 42 | func readBlockRangeArgument(in string) (blockRange *bstream.Range, err error) { 43 | return sink.ReadBlockRange(&pbsubstreams.Module{ 44 | Name: "dummy", 45 | InitialBlock: 0, 46 | }, in) 47 | } 48 | 49 | type cliApplication struct { 50 | appCtx context.Context 51 | shutter *shutter.Shutter 52 | } 53 | 54 | func (a *cliApplication) WaitForTermination(logger *zap.Logger, unreadyPeriodAfterSignal, gracefulShutdownDelay time.Duration) error { 55 | // On any exit path, we synchronize the logger one last time 56 | defer func() { 57 | logger.Sync() 58 | }() 59 | 60 | signalHandler, isSignaled, _ := cli.SetupSignalHandler(unreadyPeriodAfterSignal, logger) 61 | select { 62 | case <-signalHandler: 63 | go a.shutter.Shutdown(nil) 64 | break 65 | case <-a.shutter.Terminating(): 66 | logger.Info("run terminating", zap.Bool("from_signal", isSignaled.Load()), zap.Bool("with_error", a.shutter.Err() != nil)) 67 | break 68 | } 69 | 70 | logger.Info("waiting for run termination") 71 | select { 72 | case <-a.shutter.Terminated(): 73 | case <-time.After(gracefulShutdownDelay): 74 | logger.Warn("application did not terminate within graceful period of " + gracefulShutdownDelay.String() + ", forcing termination") 75 | } 76 | 77 | if err := a.shutter.Err(); err != nil { 78 | return err 79 | } 80 | 81 | logger.Info("run terminated gracefully") 82 | return nil 83 | } 84 | -------------------------------------------------------------------------------- /db_proto/stats/stats.go: -------------------------------------------------------------------------------- 1 | package stats 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/streamingfast/logging/zapx" 7 | "go.uber.org/zap" 8 | ) 9 | 10 | type Average struct { 11 | Duration []time.Duration 12 | windowSize int 13 | title string 14 | lastX int 15 | } 16 | 17 | func NewAverage(title string, windowSize int, lastX int) *Average { 18 | return &Average{ 19 | title: title, 20 | windowSize: windowSize, 21 | lastX: lastX, 22 | } 23 | } 24 | func (a *Average) Add(d time.Duration) { 25 | a.Duration = append(a.Duration, d) 26 | if len(a.Duration) > a.windowSize { 27 | a.Duration = a.Duration[1:] 28 | } 29 | } 30 | 31 | func (a *Average) Average() time.Duration { 32 | if len(a.Duration) == 0 { 33 | return 0 34 | } 35 | var total time.Duration 36 | for _, d := range a.Duration { 37 | total += d 38 | } 39 | return time.Duration(total / time.Duration(len(a.Duration))) 40 | } 41 | 42 | func (a *Average) LastItemsAverage(count int) time.Duration { 43 | if len(a.Duration) == 0 { 44 | return 0 45 | } 46 | if count <= 0 || count > len(a.Duration) { 47 | count = len(a.Duration) 48 | } 49 | var total int64 50 | for _, d := range a.Duration[len(a.Duration)-count:] { 51 | total += d.Nanoseconds() 52 | } 53 | return time.Duration(total / int64(count)) 54 | } 55 | 56 | func (a *Average) Log(logger *zap.Logger) { 57 | logger.Info(a.title, 58 | zapx.HumanDuration("average", a.Average()), 59 | zapx.HumanDuration("last X average", a.LastItemsAverage(a.lastX)), 60 | ) 61 | } 62 | 63 | type Stats struct { 64 | logger *zap.Logger 65 | BlockCount int 66 | WaitDurationBetweenBlocks *Average 67 | BlockProcessingDuration *Average 68 | UnmarshallingDuration *Average 69 | BlockInsertDuration *Average 70 | EntitiesInsertDuration *Average 71 | FlushDuration *Average 72 | LastBlockProcessAt time.Time 73 | TotalProcessingDuration time.Duration 74 | TotalDurationBetween time.Duration 75 | } 76 | 77 | func NewStats(logger *zap.Logger) *Stats { 78 | s := &Stats{ 79 | logger: logger, 80 | WaitDurationBetweenBlocks: NewAverage(" Wait Duration Between Blocks", 250_000, 1000), 81 | BlockProcessingDuration: NewAverage(" Block Processing Duration", 250_000, 1000), 82 | UnmarshallingDuration: NewAverage(" Unmarshalling Duration", 250_000, 1000), 83 | BlockInsertDuration: NewAverage(" Block Insert Duration", 250_000, 1000), 84 | EntitiesInsertDuration: NewAverage(" Entities Insert Duration", 250_000, 1000), 85 | FlushDuration: NewAverage(" Flush duration", 1000, 10), 86 | } 87 | 88 | go func() { 89 | for { 90 | time.Sleep(30 * time.Second) 91 | s.Log() 92 | } 93 | }() 94 | 95 | return s 96 | } 97 | 98 | func (s *Stats) Log() { 99 | s.logger.Info("-----------------------------------") 100 | 101 | if s.BlockCount == 0 { 102 | s.logger.Info("Stats: no blocks processed yet") 103 | } else { 104 | s.logger.Info("Stats", 105 | zap.Int("block_count", s.BlockCount), 106 | zapx.HumanDuration("Processing Time", s.TotalProcessingDuration), 107 | zapx.HumanDuration("Total Wait Duration", s.TotalDurationBetween), 108 | zapx.HumanDuration("Total Duration", s.TotalDurationBetween+s.TotalProcessingDuration), 109 | zap.Time("Last Block Process At", s.LastBlockProcessAt), 110 | ) 111 | 112 | s.WaitDurationBetweenBlocks.Log(s.logger) 113 | s.BlockProcessingDuration.Log(s.logger) 114 | s.UnmarshallingDuration.Log(s.logger) 115 | s.BlockInsertDuration.Log(s.logger) 116 | s.EntitiesInsertDuration.Log(s.logger) 117 | s.FlushDuration.Log(s.logger) 118 | } 119 | 120 | s.logger.Info("-----------------------------------") 121 | } 122 | -------------------------------------------------------------------------------- /bytes/encoding.go: -------------------------------------------------------------------------------- 1 | package bytes 2 | 3 | import ( 4 | "encoding/base64" 5 | "encoding/hex" 6 | "fmt" 7 | "strings" 8 | 9 | "github.com/btcsuite/btcutil/base58" 10 | ) 11 | 12 | // Encoding represents the different encoding types for protobuf bytes fields 13 | type Encoding int 14 | 15 | const ( 16 | // EncodingRaw keeps bytes as raw binary data (default) 17 | EncodingRaw Encoding = iota 18 | // EncodingHex encodes bytes as hexadecimal string 19 | EncodingHex 20 | // EncodingHexWith0x encodes bytes as hexadecimal string with 0x prefix 21 | EncodingHexWith0x 22 | // EncodingBase64 encodes bytes as base64 string 23 | EncodingBase64 24 | // EncodingBase58 encodes bytes as base58 string 25 | EncodingBase58 26 | ) 27 | 28 | // String returns the string representation of the encoding 29 | func (e Encoding) String() string { 30 | switch e { 31 | case EncodingRaw: 32 | return "raw" 33 | case EncodingHex: 34 | return "hex" 35 | case EncodingHexWith0x: 36 | return "0xhex" 37 | case EncodingBase64: 38 | return "base64" 39 | case EncodingBase58: 40 | return "base58" 41 | default: 42 | return "unknown" 43 | } 44 | } 45 | 46 | // ParseEncoding parses a string into an Encoding type 47 | func ParseEncoding(s string) (Encoding, error) { 48 | switch strings.ToLower(s) { 49 | case "raw": 50 | return EncodingRaw, nil 51 | case "hex": 52 | return EncodingHex, nil 53 | case "0xhex": 54 | return EncodingHexWith0x, nil 55 | case "base64": 56 | return EncodingBase64, nil 57 | case "base58": 58 | return EncodingBase58, nil 59 | default: 60 | return EncodingRaw, fmt.Errorf("invalid encoding: %s", s) 61 | } 62 | } 63 | 64 | // IsStringType returns true if the encoding converts bytes to string database type 65 | func (e Encoding) IsStringType() bool { 66 | return e != EncodingRaw 67 | } 68 | 69 | // EncodeBytes encodes the given bytes using the specified encoding 70 | func (e Encoding) EncodeBytes(data []byte) (interface{}, error) { 71 | switch e { 72 | case EncodingRaw: 73 | return data, nil 74 | case EncodingHex: 75 | return hex.EncodeToString(data), nil 76 | case EncodingHexWith0x: 77 | return "0x" + hex.EncodeToString(data), nil 78 | case EncodingBase64: 79 | return base64.StdEncoding.EncodeToString(data), nil 80 | case EncodingBase58: 81 | return base58.Encode(data), nil 82 | default: 83 | return nil, fmt.Errorf("unsupported encoding: %s", e) 84 | } 85 | } 86 | 87 | // DecodeBytes decodes the given string back to bytes using the specified encoding 88 | func (e Encoding) DecodeBytes(encoded interface{}) ([]byte, error) { 89 | switch e { 90 | case EncodingRaw: 91 | if data, ok := encoded.([]byte); ok { 92 | return data, nil 93 | } 94 | return nil, fmt.Errorf("expected []byte for raw encoding, got %T", encoded) 95 | case EncodingHex: 96 | if str, ok := encoded.(string); ok { 97 | return hex.DecodeString(str) 98 | } 99 | return nil, fmt.Errorf("expected string for hex encoding, got %T", encoded) 100 | case EncodingHexWith0x: 101 | if str, ok := encoded.(string); ok { 102 | if strings.HasPrefix(str, "0x") || strings.HasPrefix(str, "0X") { 103 | return hex.DecodeString(str[2:]) 104 | } 105 | return hex.DecodeString(str) 106 | } 107 | return nil, fmt.Errorf("expected string for 0xhex encoding, got %T", encoded) 108 | case EncodingBase64: 109 | if str, ok := encoded.(string); ok { 110 | return base64.StdEncoding.DecodeString(str) 111 | } 112 | return nil, fmt.Errorf("expected string for base64 encoding, got %T", encoded) 113 | case EncodingBase58: 114 | if str, ok := encoded.(string); ok { 115 | return base58.Decode(str), nil 116 | } 117 | return nil, fmt.Errorf("expected string for base58 encoding, got %T", encoded) 118 | default: 119 | return nil, fmt.Errorf("unsupported encoding: %s", e) 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /db_changes/sinker/stats.go: -------------------------------------------------------------------------------- 1 | package sinker 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/streamingfast/bstream" 7 | "github.com/streamingfast/dmetrics" 8 | "github.com/streamingfast/shutter" 9 | sink "github.com/streamingfast/substreams-sink" 10 | "go.uber.org/zap" 11 | ) 12 | 13 | type Stats struct { 14 | *shutter.Shutter 15 | 16 | dbFlushRate *dmetrics.AvgRatePromCounter 17 | dbFlushAvgDuration *dmetrics.AvgDurationCounter 18 | flushedRows *dmetrics.ValueFromMetric 19 | dbFlushedRowsRate *dmetrics.AvgRatePromCounter 20 | handleBlockDuration *dmetrics.AvgDurationCounter 21 | handleUndoDuration *dmetrics.AvgDurationCounter 22 | hasUndoSegments bool 23 | lastBlock bstream.BlockRef 24 | logger *zap.Logger 25 | } 26 | 27 | func NewStats(logger *zap.Logger) *Stats { 28 | return &Stats{ 29 | Shutter: shutter.New(), 30 | 31 | dbFlushRate: dmetrics.MustNewAvgRateFromPromCounter(FlushCount, 1*time.Second, 30*time.Second, "flush"), 32 | dbFlushAvgDuration: dmetrics.NewAvgDurationCounter(30*time.Second, dmetrics.InferUnit, "per flush"), 33 | flushedRows: dmetrics.NewValueFromMetric(FlushedRowsCount, "rows"), 34 | dbFlushedRowsRate: dmetrics.MustNewAvgRateFromPromCounter(FlushedRowsCount, 1*time.Second, 30*time.Second, "flushed rows"), 35 | handleBlockDuration: dmetrics.NewAvgDurationCounter(30*time.Second, dmetrics.InferUnit, "per block"), 36 | handleUndoDuration: dmetrics.NewAvgDurationCounter(30*time.Second, dmetrics.InferUnit, "per undo"), 37 | logger: logger, 38 | 39 | lastBlock: unsetBlockRef{}, 40 | } 41 | } 42 | 43 | func (s *Stats) RecordBlock(block bstream.BlockRef) { 44 | s.lastBlock = block 45 | } 46 | 47 | func (s *Stats) AverageFlushDuration() time.Duration { 48 | return s.dbFlushAvgDuration.Average() 49 | } 50 | 51 | func (s *Stats) RecordFlushDuration(duration time.Duration) { 52 | s.dbFlushAvgDuration.AddDuration(duration) 53 | } 54 | 55 | func (s *Stats) RecordHandleBlockDuration(duration time.Duration) { 56 | s.handleBlockDuration.AddDuration(duration) 57 | } 58 | 59 | func (s *Stats) RecordHandleUndoDuration(duration time.Duration) { 60 | s.handleUndoDuration.AddDuration(duration) 61 | s.hasUndoSegments = true 62 | } 63 | 64 | func (s *Stats) Start(each time.Duration, cursor *sink.Cursor) { 65 | if !cursor.IsBlank() { 66 | s.lastBlock = cursor.Block() 67 | } 68 | 69 | if s.IsTerminating() || s.IsTerminated() { 70 | panic("already shutdown, refusing to start again") 71 | } 72 | 73 | go func() { 74 | ticker := time.NewTicker(each) 75 | defer ticker.Stop() 76 | 77 | for { 78 | select { 79 | case <-ticker.C: 80 | s.LogNow() 81 | case <-s.Terminating(): 82 | return 83 | } 84 | } 85 | }() 86 | } 87 | 88 | func (s *Stats) LogNow() { 89 | // Logging fields order is important as it affects the final rendering, we carefully ordered 90 | // them so the development logs looks nicer. 91 | fields := []zap.Field{ 92 | zap.Stringer("db_flush_rate", s.dbFlushRate), 93 | zap.Stringer("db_flush_duration_rate", s.dbFlushAvgDuration), 94 | zap.Stringer("db_flushed_rows_rate", s.dbFlushedRowsRate), 95 | zap.Stringer("handle_block_duration", s.handleBlockDuration), 96 | } 97 | 98 | // Only log undo metrics if we've had any undo operations (typically in live mode) 99 | if s.hasUndoSegments { 100 | fields = append(fields, zap.Stringer("handle_undo_duration", s.handleUndoDuration)) 101 | } 102 | 103 | fields = append(fields, zap.Stringer("last_block", s.lastBlock)) 104 | 105 | s.logger.Info("postgres sink stats", fields...) 106 | } 107 | 108 | func (s *Stats) Close() { 109 | s.Shutdown(nil) 110 | } 111 | 112 | type unsetBlockRef struct{} 113 | 114 | func (unsetBlockRef) ID() string { return "" } 115 | func (unsetBlockRef) Num() uint64 { return 0 } 116 | func (unsetBlockRef) String() string { return "" } 117 | -------------------------------------------------------------------------------- /db_changes/bundler/writer/buffered_test.go: -------------------------------------------------------------------------------- 1 | package writer 2 | 3 | import ( 4 | "context" 5 | "io/fs" 6 | "path/filepath" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/streamingfast/bstream" 11 | "github.com/streamingfast/dstore" 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | func TestNewBufferedIO(t *testing.T) { 17 | listFiles := func(root string) (out []string) { 18 | filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { 19 | if err == nil { 20 | if !d.IsDir() { 21 | out = append(out, strings.Replace(path, root, "", 1)) 22 | } 23 | } 24 | 25 | return err 26 | }) 27 | return 28 | } 29 | 30 | newSimplerWritter := func(t *testing.T, writer *BufferedIO) *simplerWriter { 31 | return &simplerWriter{ 32 | writer: writer, 33 | t: t, 34 | } 35 | } 36 | 37 | tests := []struct { 38 | name string 39 | bufferSize uint64 40 | checks func(t *testing.T, writer *BufferedIO, workingDir string, output *dstore.MockStore) 41 | }{ 42 | { 43 | "all in memory no write", 44 | 16, 45 | func(t *testing.T, writer *BufferedIO, workingDir string, output *dstore.MockStore) { 46 | simpler := newSimplerWritter(t, writer) 47 | 48 | require.NoError(t, writer.StartBoundary(bstream.NewInclusiveRange(0, 10))) 49 | require.NoError(t, simpler.Write([]byte("{first}"))) 50 | require.NoError(t, simpler.Write([]byte("{second}"))) 51 | 52 | uploadeable, err := writer.CloseBoundary(context.Background()) 53 | require.NoError(t, err) 54 | writtenFiles := listFiles(workingDir) 55 | 56 | _, err = uploadeable.Upload(context.Background(), output) 57 | require.NoError(t, err) 58 | 59 | assert.Len(t, writtenFiles, 0) 60 | assert.Equal(t, map[string][]byte{ 61 | "0000000000-0000000009": []byte(`{first}{second}`), 62 | }, output.Files) 63 | }, 64 | }, 65 | 66 | { 67 | "write to file", 68 | 4, 69 | func(t *testing.T, writer *BufferedIO, workingDir string, output *dstore.MockStore) { 70 | simpler := newSimplerWritter(t, writer) 71 | 72 | require.NoError(t, writer.StartBoundary(bstream.NewInclusiveRange(0, 10)), "start boundary") 73 | require.NoError(t, simpler.Write([]byte("{first}")), "write first content") 74 | require.NoError(t, simpler.Write([]byte("{second}")), "write second content") 75 | 76 | //require.NoError(t, writer.CloseBoundary(context.Background()), "closing boundary") 77 | uploadeable, err := writer.CloseBoundary(context.Background()) 78 | require.NoError(t, err) 79 | 80 | writtenFiles := listFiles(workingDir) 81 | 82 | _, err = uploadeable.Upload(context.Background(), output) 83 | require.NoError(t, err, "upload file") 84 | 85 | assert.ElementsMatch(t, []string{"/0000000000-0000000010.tmp.jsonl"}, writtenFiles) 86 | assert.Equal(t, map[string][]byte{ 87 | "0000000000-0000000009": []byte(`{first}{second}`), 88 | }, output.Files) 89 | }, 90 | }, 91 | } 92 | for _, tt := range tests { 93 | t.Run(tt.name, func(t *testing.T) { 94 | workingDir := t.TempDir() 95 | outputStore := dstore.NewMockStore(nil) 96 | 97 | writer := NewBufferedIO(tt.bufferSize, workingDir, FileTypeJSONL, zlog) 98 | 99 | tt.checks(t, writer, workingDir, outputStore) 100 | }) 101 | } 102 | } 103 | 104 | type simplerWriter struct { 105 | writer *BufferedIO 106 | t *testing.T 107 | } 108 | 109 | func (w *simplerWriter) Write(buf []byte) (err error) { 110 | w.t.Helper() 111 | 112 | n, err := w.writer.Write(buf) 113 | if n < 0 { 114 | w.t.Fatal("writer returned negative byte written, this invalid according to 'io.Writer' spec") 115 | } 116 | 117 | if n > len(buf) { 118 | w.t.Fatal("writer returned more written byte than our actual buffer length, this invalid according to 'io.Writer' spec") 119 | } 120 | 121 | if n < len(buf) && err == nil { 122 | w.t.Fatal("writer returned less written byte than our actual buffer length but err is nil, this invalid according to 'io.Writer' spec") 123 | } 124 | 125 | return err 126 | } 127 | -------------------------------------------------------------------------------- /db_proto/sql/schema/table.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | pbSchmema "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/schema/v1" 8 | "github.com/streamingfast/substreams-sink-sql/proto" 9 | "google.golang.org/protobuf/reflect/protoreflect" 10 | ) 11 | 12 | type PrimaryKey struct { 13 | Name string 14 | FieldDescriptor protoreflect.FieldDescriptor 15 | Index int 16 | } 17 | 18 | type ChildOf struct { 19 | ParentTable string 20 | ParentTableField string 21 | } 22 | 23 | func NewChildOf(childOf string) (*ChildOf, error) { 24 | parts := strings.Split(childOf, " on ") 25 | if len(parts) != 2 { 26 | return nil, fmt.Errorf("invalid child of format %q. expecting 'table_name on field_name' format", childOf) 27 | } 28 | 29 | return &ChildOf{ 30 | ParentTable: strings.TrimSpace(parts[0]), 31 | ParentTableField: strings.TrimSpace(parts[1]), 32 | }, nil 33 | } 34 | 35 | type Table struct { 36 | Name string 37 | PrimaryKey *PrimaryKey 38 | ChildOf *ChildOf 39 | Columns []*Column 40 | Ordinal int 41 | InlineDepth int 42 | PbTableInfo *pbSchmema.Table 43 | } 44 | 45 | func NewTable(descriptor protoreflect.MessageDescriptor, tableInfo *pbSchmema.Table, ordinal int, inlineDepth int) (*Table, error) { 46 | table := &Table{ 47 | Name: string(descriptor.Name()), 48 | Ordinal: ordinal, 49 | InlineDepth: inlineDepth, 50 | PbTableInfo: tableInfo, 51 | } 52 | table.Name = tableInfo.Name 53 | 54 | typeName := string(descriptor.Name()) 55 | isTimestamp := typeName == ".google.protobuf.Timestamp" || typeName == "Timestamp" 56 | if isTimestamp { 57 | return nil, nil 58 | } 59 | 60 | if tableInfo.ChildOf != nil { 61 | co, err := NewChildOf(*tableInfo.ChildOf) 62 | if err != nil { 63 | return nil, fmt.Errorf("error parsing child of: %w", err) 64 | } 65 | table.ChildOf = co 66 | } 67 | 68 | err := table.processColumns(descriptor) 69 | if err != nil { 70 | return nil, fmt.Errorf("error processing fields for table %q: %w", string(descriptor.Name()), err) 71 | } 72 | 73 | if len(table.Columns) == 0 { 74 | return nil, nil 75 | } 76 | 77 | return table, nil 78 | } 79 | 80 | func (t *Table) processColumns(descriptor protoreflect.MessageDescriptor) error { 81 | fields := descriptor.Fields() 82 | for idx := 0; idx < fields.Len(); idx++ { 83 | fieldDescriptor := fields.Get(idx) 84 | fieldInfo := proto.FieldInfo(fieldDescriptor) 85 | 86 | if fieldDescriptor.ContainingOneof() != nil && !fieldDescriptor.HasOptionalKeyword() { 87 | continue 88 | } 89 | 90 | if fieldDescriptor.IsList() { 91 | if fieldDescriptor.Kind() == protoreflect.MessageKind { 92 | // Check if this is an inline nested field - if so, process it as a column 93 | if fieldInfo != nil && fieldInfo.Inline { 94 | // Allow inline repeated message fields to be processed as nested columns 95 | } else { 96 | // This will be handled by table relations 97 | continue 98 | } 99 | } 100 | // Allow repeated scalar fields to be processed as array columns 101 | } 102 | 103 | if fieldDescriptor.Kind() == protoreflect.MessageKind { 104 | typeName := string(fieldDescriptor.Message().Name()) 105 | isTimestamp := typeName == ".google.protobuf.Timestamp" || typeName == "Timestamp" 106 | 107 | isInline := fieldInfo != nil && fieldInfo.Inline 108 | if !isTimestamp && !isInline { 109 | continue 110 | } 111 | } 112 | column, err := NewColumn(fieldDescriptor, fieldInfo, t.Ordinal, t.InlineDepth) 113 | if err != nil { 114 | return fmt.Errorf("error processing column %q: %w", string(fieldDescriptor.Name()), err) 115 | } 116 | 117 | if column.IsPrimaryKey { 118 | if t.PrimaryKey != nil { 119 | return fmt.Errorf("multiple field mark has primary keys are not supported") 120 | } 121 | 122 | t.PrimaryKey = &PrimaryKey{ 123 | Name: column.Name, 124 | FieldDescriptor: fieldDescriptor, 125 | Index: idx, 126 | } 127 | } 128 | t.Columns = append(t.Columns, column) 129 | } 130 | 131 | return nil 132 | } 133 | -------------------------------------------------------------------------------- /db_proto/sql/schema/schema.go: -------------------------------------------------------------------------------- 1 | package schema 2 | 3 | import ( 4 | "fmt" 5 | 6 | schema "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/schema/v1" 7 | "github.com/streamingfast/substreams-sink-sql/proto" 8 | "go.uber.org/zap" 9 | "google.golang.org/protobuf/reflect/protoreflect" 10 | ) 11 | 12 | type Schema struct { 13 | Name string 14 | TableRegistry map[string]*Table 15 | logger *zap.Logger 16 | rootMessageDescriptor protoreflect.MessageDescriptor 17 | withProtoOption bool 18 | } 19 | 20 | func NewSchema(name string, rootMessageDescriptor protoreflect.MessageDescriptor, withProtoOption bool, logger *zap.Logger) (*Schema, error) { 21 | logger.Info("creating schema", zap.String("name", name), zap.String("root_message_descriptor", string(rootMessageDescriptor.Name())), zap.Bool("with_proto_option", withProtoOption)) 22 | s := &Schema{ 23 | Name: name, 24 | TableRegistry: make(map[string]*Table), 25 | logger: logger, 26 | rootMessageDescriptor: rootMessageDescriptor, 27 | withProtoOption: withProtoOption, 28 | } 29 | 30 | err := s.init(rootMessageDescriptor) 31 | if err != nil { 32 | return nil, fmt.Errorf("initializing schema: %w", err) 33 | } 34 | return s, nil 35 | } 36 | 37 | func (s *Schema) ChangeName(name string) error { 38 | s.Name = name 39 | s.TableRegistry = make(map[string]*Table) 40 | err := s.init(s.rootMessageDescriptor) 41 | if err != nil { 42 | return fmt.Errorf("changing schema name: %w", err) 43 | } 44 | 45 | return nil 46 | } 47 | 48 | func (s *Schema) init(rootMessageDescriptor protoreflect.MessageDescriptor) error { 49 | s.logger.Info("initializing schema", zap.String("name", s.Name), zap.String("root_message_descriptor", string(rootMessageDescriptor.Name()))) 50 | err := s.walkMessageDescriptor(rootMessageDescriptor, 0, func(md protoreflect.MessageDescriptor, ordinal int) error { 51 | s.logger.Debug("creating table message descriptor", zap.String("message_descriptor_name", string(md.Name())), zap.Int("ordinal", ordinal)) 52 | tableInfo := proto.TableInfo(md) 53 | if tableInfo == nil { 54 | if s.withProtoOption { 55 | return nil 56 | } 57 | tableInfo = &schema.Table{ 58 | Name: string(md.Name()), 59 | ChildOf: nil, 60 | } 61 | } 62 | if _, found := s.TableRegistry[tableInfo.Name]; found { 63 | return nil 64 | } 65 | table, err := NewTable(md, tableInfo, ordinal, 0) 66 | if err != nil { 67 | return fmt.Errorf("creating table message descriptor: %w", err) 68 | } 69 | if table != nil { 70 | s.logger.Debug("created table message descriptor", zap.String("message_descriptor_name", string(md.Name())), zap.Int("ordinal", ordinal), zap.String("table_name", table.Name)) 71 | s.TableRegistry[tableInfo.Name] = table 72 | } 73 | return nil 74 | }) 75 | 76 | if err != nil { 77 | return fmt.Errorf("walking and creating table message descriptors registry: %q: %w", string(rootMessageDescriptor.Name()), err) 78 | } 79 | 80 | return nil 81 | } 82 | 83 | func (s *Schema) walkMessageDescriptor(md protoreflect.MessageDescriptor, ordinal int, task func(md protoreflect.MessageDescriptor, ordinal int) error) error { 84 | s.logger.Debug("walking message descriptor", zap.String("message_descriptor_name", string(md.Name())), zap.Int("ordinal", ordinal)) 85 | fields := md.Fields() 86 | for i := 0; i < fields.Len(); i++ { 87 | field := fields.Get(i) 88 | s.logger.Debug("walking field", zap.String("field_name", string(field.Name())), zap.String("field_type", field.Kind().String())) 89 | if field.Kind() == protoreflect.MessageKind { 90 | err := s.walkMessageDescriptor(field.Message(), ordinal+1, task) 91 | if err != nil { 92 | return fmt.Errorf("walking field %q message descriptor: %w", string(field.Name()), err) 93 | } 94 | } 95 | } 96 | 97 | err := task(md, ordinal) 98 | if err != nil { 99 | return fmt.Errorf("running task on message descriptor %q: %w", string(md.Name()), err) 100 | } 101 | 102 | return nil 103 | } 104 | 105 | func (s *Schema) String() string { 106 | return fmt.Sprintf("%s", s.Name) 107 | } 108 | -------------------------------------------------------------------------------- /tests/integration/substreams_grpc_mock_test.go: -------------------------------------------------------------------------------- 1 | package tests 2 | 3 | import ( 4 | "fmt" 5 | "iter" 6 | "sync" 7 | 8 | pbsubstreamsrpc "github.com/streamingfast/substreams/pb/sf/substreams/rpc/v2" 9 | "google.golang.org/grpc/codes" 10 | "google.golang.org/grpc/status" 11 | ) 12 | 13 | // MessageResult represents a single result from the message sequence. 14 | // The semantics are: 15 | // - Response: nil, Error: nil => EOF of stream 16 | // - Response: non-nil, Error: nil => Send message to stream 17 | // - Response: , Error: non-nil => Close stream with error 18 | type MessageBucket struct { 19 | Responses []*pbsubstreamsrpc.Response 20 | EndOfStreamError error 21 | } 22 | 23 | func (b *MessageBucket) iterator() iter.Seq2[*pbsubstreamsrpc.Response, error] { 24 | return func(yield func(*pbsubstreamsrpc.Response, error) bool) { 25 | for _, response := range b.Responses { 26 | if !yield(response, nil) { 27 | return 28 | } 29 | } 30 | 31 | yield(nil, b.EndOfStreamError) 32 | } 33 | } 34 | 35 | // FakeStreamServer implements pbsubstreamsrpc.StreamServer for testing 36 | // It supports buckets of message sequences, where each call to Blocks processes the next bucket. 37 | type FakeStreamServer struct { 38 | pbsubstreamsrpc.UnimplementedStreamServer 39 | buckets []MessageBucket 40 | currentBucket int 41 | mu sync.Mutex 42 | } 43 | 44 | func newFakeStreamServer(pattern []any) *FakeStreamServer { 45 | buckets := []MessageBucket{} 46 | currentBucket := []*pbsubstreamsrpc.Response{} 47 | 48 | rollBucket := func(endOfStreamError error) { 49 | if len(currentBucket) > 0 { 50 | buckets = append(buckets, MessageBucket{ 51 | Responses: currentBucket, 52 | EndOfStreamError: endOfStreamError, 53 | }) 54 | currentBucket = []*pbsubstreamsrpc.Response{} 55 | } 56 | } 57 | 58 | for _, item := range pattern { 59 | switch v := item.(type) { 60 | case *pbsubstreamsrpc.Response: 61 | currentBucket = append(currentBucket, v) 62 | case error: 63 | // If we encounter an error, it indicates the end of the current bucket 64 | rollBucket(v) 65 | default: 66 | // If the item is nil, it's an of stream signal and mark a new bucket 67 | if v == nil { 68 | rollBucket(nil) 69 | continue 70 | } 71 | } 72 | } 73 | 74 | // Roll the last bucket if it has items 75 | rollBucket(nil) 76 | 77 | return &FakeStreamServer{ 78 | buckets: buckets, 79 | currentBucket: 0, 80 | } 81 | } 82 | 83 | // Blocks implements the Stream RPC method 84 | // It uses the iterator to get messages and handles them according to the iterator semantics. 85 | func (s *FakeStreamServer) Blocks(req *pbsubstreamsrpc.Request, stream pbsubstreamsrpc.Stream_BlocksServer) error { 86 | bucket := s.nextBucket() 87 | if bucket == nil { 88 | // We use Unauthenticated because it's a fatal error in the sinker which will stop processing 89 | return status.Error(codes.Unauthenticated, "test mock data exhausted: no more message buckets available") 90 | } 91 | 92 | // First send SessionInit message 93 | sessionInit := &pbsubstreamsrpc.Response{ 94 | Message: &pbsubstreamsrpc.Response_Session{ 95 | Session: &pbsubstreamsrpc.SessionInit{ 96 | TraceId: "test-trace-id", 97 | ResolvedStartBlock: uint64(req.StartBlockNum), 98 | LinearHandoffBlock: uint64(req.StartBlockNum), 99 | MaxParallelWorkers: 1, 100 | }, 101 | }, 102 | } 103 | 104 | if err := stream.Send(sessionInit); err != nil { 105 | return fmt.Errorf("failed to send session init: %w", err) 106 | } 107 | 108 | for response, err := range bucket.iterator() { 109 | if err != nil { 110 | return err 111 | } 112 | 113 | if response == nil { 114 | // This indicates the end of the stream, we can return nil to indicate EOF 115 | return nil 116 | } 117 | 118 | if err := stream.Send(response); err != nil { 119 | return fmt.Errorf("failed to send message: %w", err) 120 | } 121 | } 122 | 123 | return nil 124 | } 125 | 126 | func (s *FakeStreamServer) nextBucket() *MessageBucket { 127 | s.mu.Lock() 128 | defer s.mu.Unlock() 129 | 130 | if s.currentBucket >= len(s.buckets) { 131 | return nil 132 | } 133 | 134 | bucket := &s.buckets[s.currentBucket] 135 | s.currentBucket++ 136 | 137 | return bucket 138 | } 139 | -------------------------------------------------------------------------------- /db_changes/db/testing.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "fmt" 7 | "strings" 8 | "testing" 9 | 10 | "maps" 11 | 12 | "github.com/streamingfast/logging" 13 | "github.com/stretchr/testify/require" 14 | "go.uber.org/zap" 15 | ) 16 | 17 | const testCursorTableName = "cursors" 18 | const testHistoryTableName = "substreams_history" 19 | 20 | func NewTestLoader( 21 | t *testing.T, 22 | dsnRaw string, 23 | testTx *TestTx, 24 | tables map[string]*TableInfo, 25 | zlog *zap.Logger, 26 | tracer logging.Tracer, 27 | ) *Loader { 28 | dsn, err := ParseDSN(dsnRaw) 29 | require.NoError(t, err) 30 | 31 | loader, err := NewLoader( 32 | dsn, 33 | testCursorTableName, 34 | testHistoryTableName, 35 | "", 36 | 0, 0, 0, 37 | OnModuleHashMismatchIgnore.String(), 38 | nil, 39 | zlog, tracer, 40 | ) 41 | require.NoError(t, err) 42 | 43 | if testTx != nil { 44 | loader.testTx = testTx 45 | } 46 | loader.tables = tables 47 | loader.cursorTable = tables[testCursorTableName] 48 | return loader 49 | 50 | } 51 | 52 | func TestSinglePrimaryKeyTables(schema string) map[string]*TableInfo { 53 | return TestTables(schema, map[string]*TableInfo{ 54 | "xfer": mustNewTableInfo(schema, "xfer", []string{"id"}, map[string]*ColumnInfo{ 55 | "id": NewColumnInfo("id", "text", ""), 56 | "from": NewColumnInfo("from", "text", ""), 57 | "to": NewColumnInfo("to", "text", ""), 58 | }), 59 | }) 60 | } 61 | 62 | func TestTables(schema string, customTable map[string]*TableInfo) map[string]*TableInfo { 63 | out := map[string]*TableInfo{} 64 | 65 | addCursorsTable(schema, out) 66 | maps.Copy(out, customTable) 67 | 68 | return out 69 | } 70 | 71 | func addCursorsTable(schema string, into map[string]*TableInfo) { 72 | into[testCursorTableName] = mustNewTableInfo(schema, testCursorTableName, []string{"id"}, map[string]*ColumnInfo{ 73 | "block_num": NewColumnInfo("block_num", "bigint", ""), 74 | "block_id": NewColumnInfo("block_id", "text", ""), 75 | "cursor": NewColumnInfo("cursor", "text", ""), 76 | "id": NewColumnInfo("id", "text", ""), 77 | }) 78 | } 79 | 80 | func GenerateCreateTableSQL(tables map[string]*TableInfo) string { 81 | var sqlStatements []string 82 | for _, tableInfo := range tables { 83 | if tableInfo.name == testCursorTableName { 84 | continue 85 | } 86 | 87 | var columns []string 88 | for _, colInfo := range tableInfo.columnsByName { 89 | columns = append(columns, fmt.Sprintf("%s %s", colInfo.escapedName, colInfo.databaseTypeName)) 90 | } 91 | var pkColumns []string 92 | for _, pkCol := range tableInfo.primaryColumns { 93 | pkColumns = append(pkColumns, pkCol.escapedName) 94 | } 95 | pk := fmt.Sprintf("PRIMARY KEY (%s)", strings.Join(pkColumns, ", ")) 96 | columns = append(columns, pk) 97 | createStmt := fmt.Sprintf( 98 | "CREATE TABLE %s (%s);", 99 | tableInfo.identifier, 100 | strings.Join(columns, ", "), 101 | ) 102 | sqlStatements = append(sqlStatements, createStmt) 103 | } 104 | return strings.Join(sqlStatements, "\n") 105 | } 106 | 107 | func mustNewTableInfo(schema, name string, pkList []string, columnsByName map[string]*ColumnInfo) *TableInfo { 108 | ti, err := NewTableInfo(schema, name, pkList, columnsByName) 109 | if err != nil { 110 | panic(err) 111 | } 112 | return ti 113 | } 114 | 115 | type TestTx struct { 116 | queries []string 117 | next []*sql.Rows 118 | } 119 | 120 | func (t *TestTx) Rollback() error { 121 | t.queries = append(t.queries, "ROLLBACK") 122 | return nil 123 | } 124 | 125 | func (t *TestTx) Commit() error { 126 | t.queries = append(t.queries, "COMMIT") 127 | return nil 128 | } 129 | 130 | func (t *TestTx) ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) { 131 | t.queries = append(t.queries, query) 132 | return &testResult{}, nil 133 | } 134 | 135 | func (t *TestTx) Results() []string { 136 | return t.queries 137 | } 138 | 139 | func (t *TestTx) QueryContext(ctx context.Context, query string, args ...any) (out *sql.Rows, err error) { 140 | t.queries = append(t.queries, query) 141 | return nil, nil 142 | } 143 | 144 | type testResult struct{} 145 | 146 | func (t *testResult) LastInsertId() (int64, error) { 147 | return 0, nil 148 | } 149 | 150 | func (t *testResult) RowsAffected() (int64, error) { 151 | return 1, nil 152 | } 153 | -------------------------------------------------------------------------------- /db_changes/db/operations.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "reflect" 7 | "regexp" 8 | "strings" 9 | "time" 10 | ) 11 | 12 | type TypeGetter func(tableName string, columnName string) (reflect.Type, error) 13 | 14 | type Queryable interface { 15 | query(d Dialect) (string, error) 16 | } 17 | 18 | type OperationType string 19 | 20 | const ( 21 | OperationTypeInsert OperationType = "INSERT" 22 | OperationTypeUpsert OperationType = "UPSERT" 23 | OperationTypeUpdate OperationType = "UPDATE" 24 | OperationTypeDelete OperationType = "DELETE" 25 | ) 26 | 27 | type Operation struct { 28 | table *TableInfo 29 | opType OperationType 30 | primaryKey map[string]string 31 | data map[string]string 32 | ordinal uint64 33 | reversibleBlockNum *uint64 // nil if that block is known to be irreversible 34 | } 35 | 36 | func (o *Operation) String() string { 37 | return fmt.Sprintf("%s/%s (%s)", o.table.identifier, createRowUniqueID(o.primaryKey), strings.ToLower(string(o.opType))) 38 | } 39 | 40 | func (l *Loader) newInsertOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, ordinal uint64, reversibleBlockNum *uint64) *Operation { 41 | return &Operation{ 42 | table: table, 43 | opType: OperationTypeInsert, 44 | primaryKey: primaryKey, 45 | data: data, 46 | ordinal: ordinal, 47 | reversibleBlockNum: reversibleBlockNum, 48 | } 49 | } 50 | 51 | func (l *Loader) newUpsertOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, ordinal uint64, reversibleBlockNum *uint64) *Operation { 52 | return &Operation{ 53 | table: table, 54 | opType: OperationTypeUpsert, 55 | primaryKey: primaryKey, 56 | data: data, 57 | ordinal: ordinal, 58 | reversibleBlockNum: reversibleBlockNum, 59 | } 60 | } 61 | 62 | func (l *Loader) newUpdateOperation(table *TableInfo, primaryKey map[string]string, data map[string]string, ordinal uint64, reversibleBlockNum *uint64) *Operation { 63 | return &Operation{ 64 | table: table, 65 | opType: OperationTypeUpdate, 66 | primaryKey: primaryKey, 67 | data: data, 68 | ordinal: ordinal, 69 | reversibleBlockNum: reversibleBlockNum, 70 | } 71 | } 72 | 73 | func (l *Loader) newDeleteOperation(table *TableInfo, primaryKey map[string]string, ordinal uint64, reversibleBlockNum *uint64) *Operation { 74 | return &Operation{ 75 | table: table, 76 | opType: OperationTypeDelete, 77 | primaryKey: primaryKey, 78 | ordinal: ordinal, 79 | reversibleBlockNum: reversibleBlockNum, 80 | } 81 | } 82 | 83 | func (o *Operation) mergeData(newData map[string]string) error { 84 | if o.opType == OperationTypeDelete { 85 | return fmt.Errorf("unable to merge data for a delete operation") 86 | } 87 | 88 | for k, v := range newData { 89 | o.data[k] = v 90 | } 91 | return nil 92 | } 93 | 94 | // mergeOperation merges another operation into this one, keeping the lowest ordinal 95 | func (o *Operation) mergeOperation(otherData map[string]string) error { 96 | if o.opType == OperationTypeDelete { 97 | return fmt.Errorf("unable to merge operation for a delete operation") 98 | } 99 | 100 | return o.mergeData(otherData) 101 | } 102 | 103 | var integerRegex = regexp.MustCompile(`^\d+$`) 104 | var dateRegex = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`) 105 | var reflectTypeTime = reflect.TypeOf(time.Time{}) 106 | 107 | func EscapeIdentifier(valueToEscape string) string { 108 | if strings.Contains(valueToEscape, `"`) { 109 | valueToEscape = strings.ReplaceAll(valueToEscape, `"`, `""`) 110 | } 111 | 112 | return `"` + valueToEscape + `"` 113 | } 114 | 115 | func escapeStringValue(valueToEscape string) string { 116 | if strings.Contains(valueToEscape, `'`) { 117 | valueToEscape = strings.ReplaceAll(valueToEscape, `'`, `''`) 118 | } 119 | 120 | return `'` + valueToEscape + `'` 121 | } 122 | 123 | // to store in an history table 124 | func primaryKeyToJSON(primaryKey map[string]string) string { 125 | m, err := json.Marshal(primaryKey) 126 | if err != nil { 127 | panic(err) // should never happen with map[string]string 128 | } 129 | return string(m) 130 | } 131 | 132 | // to store in an history table 133 | func jsonToPrimaryKey(in string) (map[string]string, error) { 134 | out := make(map[string]string) 135 | err := json.Unmarshal([]byte(in), &out) 136 | if err != nil { 137 | return nil, err 138 | } 139 | return out, nil 140 | } 141 | -------------------------------------------------------------------------------- /db_proto/sql/postgres/accumulator_inserter.go: -------------------------------------------------------------------------------- 1 | package postgres 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | "strings" 7 | 8 | sql2 "github.com/streamingfast/substreams-sink-sql/db_proto/sql" 9 | "github.com/streamingfast/substreams-sink-sql/db_proto/sql/schema" 10 | "go.uber.org/zap" 11 | ) 12 | 13 | type accumulator struct { 14 | query string 15 | rowValues [][]string 16 | } 17 | 18 | type AccumulatorInserter struct { 19 | accumulators map[string]*accumulator 20 | cursorStmt *sql.Stmt 21 | logger *zap.Logger 22 | } 23 | 24 | func NewAccumulatorInserter(logger *zap.Logger) (*AccumulatorInserter, error) { 25 | logger = logger.Named("postgres inserter") 26 | 27 | return &AccumulatorInserter{ 28 | logger: logger, 29 | }, nil 30 | } 31 | 32 | func (i *AccumulatorInserter) init(database *Database) error { 33 | tables := database.dialect.GetTables() 34 | accumulators := map[string]*accumulator{} 35 | 36 | for _, table := range tables { 37 | query, err := createInsertFromDescriptorAcc(table, database.dialect) 38 | if err != nil { 39 | return fmt.Errorf("creating insert from descriptor for table %q: %w", table.Name, err) 40 | } 41 | accumulators[table.Name] = &accumulator{ 42 | query: query, 43 | } 44 | } 45 | accumulators["_blocks_"] = &accumulator{ 46 | query: fmt.Sprintf("INSERT INTO %s (number, hash, timestamp) VALUES ", tableName(database.schema.Name, "_blocks_")), 47 | } 48 | 49 | cursorQuery := fmt.Sprintf("INSERT INTO %s (name, cursor) VALUES ($1, $2) ON CONFLICT (name) DO UPDATE SET cursor = $2", tableName(database.schema.Name, "_cursor_")) 50 | cs, err := database.db.Prepare(cursorQuery) 51 | if err != nil { 52 | return fmt.Errorf("preparing statement %q: %w", cursorQuery, err) 53 | } 54 | 55 | i.accumulators = accumulators 56 | i.cursorStmt = cs 57 | 58 | return nil 59 | } 60 | 61 | func createInsertFromDescriptorAcc(table *schema.Table, dialect sql2.Dialect) (string, error) { 62 | tableName := dialect.FullTableName(table) 63 | fields := table.Columns 64 | 65 | var fieldNames []string 66 | fieldNames = append(fieldNames, sql2.DialectFieldBlockNumber) 67 | fieldNames = append(fieldNames, sql2.DialectFieldBlockTimestamp) 68 | 69 | if pk := table.PrimaryKey; pk != nil { 70 | fieldNames = append(fieldNames, pk.Name) 71 | } 72 | 73 | if table.ChildOf != nil { 74 | fieldNames = append(fieldNames, table.ChildOf.ParentTableField) 75 | } 76 | 77 | for _, field := range fields { 78 | if table.PrimaryKey != nil && field.Name == table.PrimaryKey.Name { 79 | continue 80 | } 81 | 82 | if field.IsExtension { //not a direct child 83 | continue 84 | } 85 | if field.IsRepeated { 86 | // Check if it's a repeated message (which should be skipped) or repeated scalar (which should be processed) 87 | if field.IsMessage { 88 | continue 89 | } 90 | // Allow repeated scalar fields to be processed as arrays 91 | } 92 | fieldNames = append(fieldNames, field.QuotedName()) 93 | } 94 | 95 | return fmt.Sprintf("INSERT INTO %s (%s) VALUES ", 96 | tableName, 97 | strings.Join(fieldNames, ", "), 98 | ), nil 99 | 100 | } 101 | 102 | func (i *AccumulatorInserter) insert(table string, values []any, database *Database) error { 103 | var v []string 104 | if table == "_cursor_" { 105 | stmt := database.wrapInsertStatement(i.cursorStmt) 106 | _, err := stmt.Exec(values...) 107 | if err != nil { 108 | return fmt.Errorf("executing insert: %w", err) 109 | } 110 | return nil 111 | } 112 | for _, value := range values { 113 | v = append(v, ValueToString(value, database.dialect.bytesEncoding)) 114 | } 115 | accumulator := i.accumulators[table] 116 | if accumulator == nil { 117 | return fmt.Errorf("accumulator not found for table %q", table) 118 | } 119 | accumulator.rowValues = append(accumulator.rowValues, v) 120 | 121 | return nil 122 | } 123 | 124 | func (i *AccumulatorInserter) flush(database *Database) error { 125 | for _, acc := range i.accumulators { 126 | if len(acc.rowValues) == 0 { 127 | continue 128 | } 129 | var b strings.Builder 130 | b.WriteString(acc.query) 131 | for _, values := range acc.rowValues { 132 | b.WriteString("(") 133 | b.WriteString(strings.Join(values, ",")) 134 | b.WriteString("),") 135 | } 136 | insert := strings.Trim(b.String(), ",") 137 | 138 | _, err := database.tx.Exec(insert) 139 | if err != nil { 140 | shortInsert := insert 141 | if len(insert) > 256 { 142 | shortInsert = insert[:256] + "..." 143 | } 144 | fmt.Println("insert query:", insert) 145 | return fmt.Errorf("executing insert %s: %w", shortInsert, err) 146 | } 147 | acc.rowValues = acc.rowValues[:0] 148 | } 149 | 150 | return nil 151 | } 152 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/proto/test/relations/relations.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | import "google/protobuf/descriptor.proto"; 3 | import "google/protobuf/timestamp.proto"; 4 | 5 | import "sf/substreams/sink/sql/schema/v1/schema.proto"; 6 | 7 | package test.relations; 8 | option go_package = "test/relations"; 9 | 10 | 11 | message Output { 12 | repeated Entity entities = 1; 13 | } 14 | 15 | 16 | message Entity { 17 | 18 | oneof entity { 19 | TypesTest types_test =1; 20 | 21 | Customer customer = 10; 22 | Order order = 11; 23 | Item item = 12; 24 | } 25 | } 26 | 27 | message TypesTest { 28 | option (schema.table) = { 29 | name: "types_tests" 30 | clickhouse_table_options: { 31 | order_by_fields: [{name: "id"}] 32 | } 33 | }; 34 | 35 | 36 | uint64 id =1 [(schema.field) = { primary_key: true }]; 37 | // Field for each protobuf native type 38 | double double_field = 2; 39 | float float_field = 3; 40 | int32 int32_field = 4; 41 | int64 int64_field = 5; 42 | uint32 uint32_field = 6; 43 | uint64 uint64_field = 7; 44 | sint32 sint32_field = 8; 45 | sint64 sint64_field = 9; 46 | fixed32 fixed32_field = 10; 47 | fixed64 fixed64_field = 11; 48 | sfixed32 sfixed32_field = 12; 49 | sfixed64 sfixed64_field = 13; 50 | bool bool_field = 14; 51 | string string_field = 15; 52 | bytes bytes_field = 16; 53 | 54 | optional string optional_string_set = 17; 55 | optional string optional_string_not_set = 18; 56 | optional int32 optional_int32_field_set = 19; 57 | optional int32 optional_int32_field_not_set = 20; 58 | 59 | 60 | google.protobuf.Timestamp timestamp_field = 30; 61 | 62 | // Array fields for all supported proto types 63 | repeated int32 repeated_int32_field = 50; 64 | repeated int64 repeated_int64_field = 51; 65 | repeated uint32 repeated_uint32_field = 52; 66 | repeated uint64 repeated_uint64_field = 53; 67 | repeated sint32 repeated_sint32_field = 54; 68 | repeated sint64 repeated_sint64_field = 55; 69 | repeated fixed32 repeated_fixed32_field = 56; 70 | repeated fixed64 repeated_fixed64_field = 57; 71 | repeated sfixed32 repeated_sfixed32_field = 58; 72 | repeated sfixed64 repeated_sfixed64_field = 59; 73 | repeated double repeated_double_field = 60; 74 | repeated float repeated_float_field = 61; 75 | repeated bool repeated_bool_field = 62; 76 | repeated string repeated_string_field = 63; 77 | 78 | string str_2_int128 = 100 [(schema.field) = { convertTo: { int128{} }}]; 79 | string str_2_uint128 = 101 [(schema.field) = { convertTo: { uint128 {} }}]; 80 | 81 | string str_2_int256 = 102 [(schema.field) = { convertTo: { int256{} }}]; 82 | string str_2_uint256 = 103 [(schema.field) = { convertTo: { uint256{}} }]; 83 | 84 | string str_2_decimal128 = 104 [(schema.field) = { convertTo: { decimal128{ scale: 4 }}}]; 85 | string str_2_decimal256 = 105 [(schema.field) = { convertTo: { decimal256{ scale: 4 }}}]; 86 | 87 | // Optional uint256 field to test empty string handling 88 | optional string optional_str_2_uint256 = 106 [(schema.field) = { convertTo: { uint256{}} }]; 89 | 90 | NestedLevel1 level1 = 200 [(schema.field) = {inline: true}]; 91 | repeated NestedLevel1 list_of_level1 = 201 [(schema.field) = {inline: true}]; 92 | 93 | } 94 | 95 | message Customer { 96 | option (schema.table) = { 97 | name: "customers" 98 | clickhouse_table_options: { 99 | order_by_fields: [{name: "customer_id"}] 100 | } 101 | }; 102 | 103 | 104 | string customer_id = 1 [(schema.field) = { primary_key: true }]; 105 | string name = 2; 106 | } 107 | 108 | message Order { 109 | option (schema.table) = { 110 | name: "orders" 111 | clickhouse_table_options: { 112 | order_by_fields: [{name: "order_id"}, {name: "customer_ref_id"}] 113 | index_fields: [{ 114 | field_name: "order_id" 115 | name: "order_id_idx" 116 | type: bloom_filter 117 | granularity: 4 118 | }] 119 | } 120 | }; 121 | 122 | 123 | string order_id = 1 [(schema.field) = { primary_key: true}]; 124 | string customer_ref_id = 2 [(schema.field) = { foreign_key: "customers on customer_id"}]; 125 | repeated OrderItem items = 3; 126 | OrderExtension extension = 4; 127 | } 128 | 129 | message NestedLevel1 { 130 | string name = 1; 131 | string desc = 2; 132 | } 133 | 134 | message OrderExtension { 135 | option (schema.table) = { 136 | name: "order_extensions", 137 | child_of: "orders on order_id" 138 | clickhouse_table_options: { 139 | order_by_fields: [{name: "order_id"}], 140 | partition_fields: [{name: "_block_timestamp_", function: toYYYYDD}] 141 | } 142 | }; 143 | string description = 1; 144 | } 145 | 146 | message OrderItem { 147 | option (schema.table) = { 148 | name: "order_items", 149 | child_of: "orders on order_id" 150 | clickhouse_table_options: { 151 | order_by_fields: [{name: "order_id"}, {name: "item_id"}] 152 | } 153 | }; 154 | 155 | // can also leverage orders._id using "order on order_id" if order do not have a external unique identifier 156 | string item_id = 2 [(schema.field) = { foreign_key: "items on item_id"}]; 157 | int64 quantity = 11; 158 | } 159 | 160 | message Item { 161 | option (schema.table) = { 162 | name: "items" 163 | clickhouse_table_options: { 164 | order_by_fields: [{name: "item_id"}] 165 | } 166 | }; 167 | 168 | 169 | string item_id = 1 [(schema.field) = { unique: true }]; 170 | 171 | string name = 10; 172 | double price = 11; 173 | } 174 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/run.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | "github.com/spf13/cobra" 8 | "github.com/spf13/pflag" 9 | . "github.com/streamingfast/cli" 10 | "github.com/streamingfast/cli/sflags" 11 | sink "github.com/streamingfast/substreams-sink" 12 | sinker2 "github.com/streamingfast/substreams-sink-sql/db_changes/sinker" 13 | "github.com/streamingfast/substreams/manifest" 14 | ) 15 | 16 | type ignoreUndoBufferSize struct{} 17 | 18 | func (i ignoreUndoBufferSize) IsIgnored(in string) bool { 19 | return in == "undo-buffer-size" 20 | } 21 | 22 | var sinkRunCmd = Command(sinkRunE, 23 | "run [:]", 24 | "Runs SQL sink process", 25 | RangeArgs(2, 3), 26 | Flags(func(flags *pflag.FlagSet) { 27 | sink.AddFlagsToSet(flags, ignoreUndoBufferSize{}) 28 | AddCommonSinkerFlags(flags) 29 | AddCommonDatabaseChangesFlags(flags) 30 | 31 | flags.Int("undo-buffer-size", 0, "If non-zero, handling of reorgs in the database is disabled. Instead, a buffer is introduced to only process blocks once they have been confirmed by that many blocks, introducing a latency but slightly reducing the load on the database when close to head. Set to 0 to enable reorg handling in the database (required for some databases like Postgres).") 32 | flags.Int("batch-block-flush-interval", 1_000, "When in catch up mode, flush every N blocks or after batch-row-flush-interval, whichever comes first. Set to 0 to disable and only use batch-row-flush-interval. Ineffective if the sink is now in the live portion of the chain where only 'live-block-flush-interval' applies.") 33 | flags.Int("batch-row-flush-interval", 100_000, "When in catch up mode, flush every N rows or after batch-block-flush-interval, whichever comes first. Set to 0 to disable and only use batch-block-flush-interval. Ineffective if the sink is now in the live portion of the chain where only 'live-block-flush-interval' applies.") 34 | flags.Int("live-block-flush-interval", 1, "When processing in live mode, flush every N blocks.") 35 | flags.Int("flush-interval", 0, "(deprecated) please use --batch-block-flush-interval instead") 36 | flags.Int("flush-retry-count", 3, "Number of retry attempts for flush operations") 37 | flags.Duration("flush-retry-delay", 1*time.Second, "Base delay for incremental retry backoff on flush failures") 38 | flags.StringP("endpoint", "e", "", "Specify the substreams endpoint, ex: `mainnet.eth.streamingfast.io:443`") 39 | }), 40 | Example("substreams-sink-sql run 'postgres://localhost:5432/posgres?sslmode=disable' uniswap-v3@v0.2.10"), 41 | OnCommandErrorLogAndExit(zlog), 42 | ) 43 | 44 | func sinkRunE(cmd *cobra.Command, args []string) error { 45 | app := NewApplication(cmd.Context()) 46 | 47 | sink.RegisterMetrics() 48 | sinker2.RegisterMetrics() 49 | 50 | dsnString := args[0] 51 | manifestPath := args[1] 52 | blockRange := "" 53 | if len(args) > 2 { 54 | blockRange = args[2] 55 | } 56 | 57 | endpoint := sflags.MustGetString(cmd, "endpoint") 58 | if endpoint == "" { 59 | network := sflags.MustGetString(cmd, "network") 60 | if network == "" { 61 | reader, err := manifest.NewReader(manifestPath) 62 | if err != nil { 63 | return fmt.Errorf("setup manifest reader: %w", err) 64 | } 65 | pkgBundle, err := reader.Read() 66 | if err != nil { 67 | return fmt.Errorf("read manifest: %w", err) 68 | } 69 | network = pkgBundle.Package.Network 70 | } 71 | var err error 72 | endpoint, err = manifest.ExtractNetworkEndpoint(network, sflags.MustGetString(cmd, "endpoint"), zlog) 73 | if err != nil { 74 | return err 75 | } 76 | } 77 | 78 | sink, err := sink.NewFromViper( 79 | cmd, 80 | supportedOutputTypes, 81 | endpoint, 82 | manifestPath, 83 | sink.InferOutputModuleFromPackage, 84 | blockRange, 85 | zlog, 86 | tracer, 87 | ) 88 | if err != nil { 89 | return fmt.Errorf("new base sinker: %w", err) 90 | } 91 | 92 | batchBlockFlushInterval := sflags.MustGetInt(cmd, "batch-block-flush-interval") 93 | if sflags.MustGetInt(cmd, "flush-interval") != 0 { 94 | batchBlockFlushInterval = sflags.MustGetInt(cmd, "flush-interval") 95 | } 96 | batchRowFlushInterval := sflags.MustGetInt(cmd, "batch-row-flush-interval") 97 | liveBlockFlushInterval := sflags.MustGetInt(cmd, "live-block-flush-interval") 98 | flushRetryCount := sflags.MustGetInt(cmd, "flush-retry-count") 99 | flushRetryDelay := sflags.MustGetDuration(cmd, "flush-retry-delay") 100 | 101 | cursorTableName := sflags.MustGetString(cmd, "cursors-table") 102 | historyTableName := sflags.MustGetString(cmd, "history-table") 103 | handleReorgs := sflags.MustGetInt(cmd, "undo-buffer-size") == 0 104 | 105 | sinkerFactory := sinker2.SinkerFactory(sink, sinker2.SinkerFactoryOptions{ 106 | CursorTableName: cursorTableName, 107 | HistoryTableName: historyTableName, 108 | ClickhouseCluster: sflags.MustGetString(cmd, "clickhouse-cluster"), 109 | BatchBlockFlushInterval: batchBlockFlushInterval, 110 | BatchRowFlushInterval: batchRowFlushInterval, 111 | LiveBlockFlushInterval: liveBlockFlushInterval, 112 | OnModuleHashMismatch: sflags.MustGetString(cmd, onModuleHashMistmatchFlag), 113 | HandleReorgs: handleReorgs, 114 | FlushRetryCount: flushRetryCount, 115 | FlushRetryDelay: flushRetryDelay, 116 | }) 117 | 118 | sqlSinker, err := sinkerFactory(app.Context(), dsnString, zlog, tracer) 119 | if err != nil { 120 | return fmt.Errorf("unable to setup sql sinker: %w", err) 121 | } 122 | 123 | app.SuperviseAndStart(sqlSinker) 124 | 125 | return app.WaitForTermination(zlog, 0*time.Second, 30*time.Second) 126 | } 127 | -------------------------------------------------------------------------------- /db_changes/db/dsn.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "fmt" 5 | "iter" 6 | "net/url" 7 | "os" 8 | "strconv" 9 | "strings" 10 | 11 | "github.com/drone/envsubst" 12 | ) 13 | 14 | type DSN struct { 15 | driver string 16 | original string 17 | scheme string 18 | 19 | Host string 20 | Port int64 21 | Username string 22 | Password string 23 | Database string 24 | Options DSNOptions 25 | 26 | // schema is the extracted schema from the DSN schemaName option (if present) 27 | schema string 28 | } 29 | 30 | var driverMap = map[string]string{ 31 | "psql": "postgres", 32 | "postgres": "postgres", 33 | "clickhouse": "clickhouse", 34 | "parquet": "parquet", 35 | } 36 | 37 | func ParseDSN(dsn string) (*DSN, error) { 38 | expanded, err := envsubst.Eval(dsn, os.Getenv) 39 | if err != nil { 40 | return nil, fmt.Errorf("variables expansion failed: %w", err) 41 | } 42 | 43 | dsnURL, err := url.Parse(expanded) 44 | if err != nil { 45 | return nil, fmt.Errorf("invalid url: %w", err) 46 | } 47 | 48 | driver, ok := driverMap[dsnURL.Scheme] 49 | if !ok { 50 | keys := make([]string, len(driverMap)) 51 | i := 0 52 | for k := range driverMap { 53 | keys[i] = k 54 | i++ 55 | } 56 | 57 | return nil, fmt.Errorf("invalid scheme %s, allowed schemes: [%s]", dsnURL.Scheme, strings.Join(keys, ",")) 58 | } 59 | 60 | host := dsnURL.Hostname() 61 | 62 | port := int64(5432) 63 | if strings.Contains(dsnURL.Host, ":") { 64 | port, _ = strconv.ParseInt(dsnURL.Port(), 10, 32) 65 | } 66 | 67 | username := dsnURL.User.Username() 68 | password, _ := dsnURL.User.Password() 69 | database := dsnURL.EscapedPath() 70 | if database != "parquet" { 71 | database = strings.TrimPrefix(database, "/") 72 | } 73 | 74 | d := &DSN{ 75 | original: dsn, 76 | driver: driver, 77 | scheme: dsnURL.Scheme, 78 | Host: host, 79 | Port: port, 80 | Username: username, 81 | Password: password, 82 | Database: database, 83 | Options: DSNOptions(dsnURL.Query()), 84 | } 85 | 86 | schemaName := d.Options.RemoveOr("schemaName", "") 87 | 88 | if driver == "clickhouse" { 89 | // For ClickHouse, store the target database name in schema, but keep 90 | // connecting to the original database to allow CREATE DATABASE commands 91 | if schemaName != "" { 92 | d.schema = schemaName 93 | } else { 94 | d.schema = database 95 | } 96 | } else { 97 | if schemaName == "" { 98 | schemaName = "public" 99 | } 100 | 101 | // For other databases (PostgreSQL), schemaName is separate from database 102 | d.schema = schemaName 103 | } 104 | 105 | return d, nil 106 | } 107 | 108 | func (c *DSN) Driver() string { 109 | return c.driver 110 | } 111 | 112 | func (c *DSN) ConnString() string { 113 | if c.driver == "clickhouse" { 114 | scheme := c.driver 115 | host := c.Host 116 | 117 | baseURL := fmt.Sprintf("%s://%s:%s@%s:%d/%s", scheme, c.Username, c.Password, host, c.Port, c.Database) 118 | if len(c.Options) > 0 { 119 | baseURL += "?" + c.Options.Encode() 120 | } 121 | 122 | return baseURL 123 | } 124 | // PostgreSQL connection string uses space-separated options 125 | options := c.Options.EncodeWithSeparator(" ") 126 | out := fmt.Sprintf("host=%s port=%d dbname=%s %s", c.Host, c.Port, c.Database, options) 127 | if c.Username != "" { 128 | out = out + " user=" + c.Username 129 | } 130 | if c.Password != "" { 131 | out = out + " password=" + c.Password 132 | } 133 | return out 134 | } 135 | 136 | func (c *DSN) Schema() string { 137 | return c.schema 138 | } 139 | 140 | func (c *DSN) Clone() *DSN { 141 | return &DSN{ 142 | driver: c.driver, 143 | original: c.original, 144 | scheme: c.scheme, 145 | Host: c.Host, 146 | Port: c.Port, 147 | Username: c.Username, 148 | Password: c.Password, 149 | Database: c.Database, 150 | Options: c.Options, 151 | schema: c.schema, 152 | } 153 | } 154 | 155 | // DSNOptions is a thin wrapper around url.Values to provide helper methods and 156 | // better names. 157 | type DSNOptions url.Values 158 | 159 | // Iterate over the first value of each key, to be used in for range loops. 160 | func (v DSNOptions) Iter() iter.Seq2[string, string] { 161 | return func(yield func(k string, v string) bool) { 162 | for k, vs := range v { 163 | if len(vs) > 0 { 164 | if !yield(k, vs[0]) { 165 | return 166 | } 167 | } 168 | } 169 | } 170 | } 171 | 172 | // Encode encodes the values into “URL encoded” form ("bar=baz&foo=quux") sorted by key. 173 | func (v DSNOptions) Encode() string { 174 | return (url.Values(v)).Encode() 175 | } 176 | 177 | // EncodeWithSeparator encodes the values into “URL encoded” like form ("bar=baz foo=quux") sorted by key 178 | // where essentially the separator is used instead of '&'. 179 | func (v DSNOptions) EncodeWithSeparator(sep string) string { 180 | return strings.ReplaceAll((url.Values(v)).Encode(), "&", sep) 181 | } 182 | 183 | // Get returns the value associated with the key. 184 | func (v DSNOptions) Get(key string) string { 185 | return (url.Values(v)).Get(key) 186 | } 187 | 188 | // GetOr returns the value associated with the key or defaultValue if not found. 189 | func (v DSNOptions) GetOr(key, defaultValue string) string { 190 | if val := (url.Values(v)).Get(key); val != "" { 191 | return val 192 | } 193 | 194 | return defaultValue 195 | } 196 | 197 | // RemoveOr removes the key from the options and returns its value or defaultValue if not found. 198 | func (v DSNOptions) RemoveOr(key, defaultValue string) string { 199 | val := (url.Values(v)).Get(key) 200 | (url.Values(v)).Del(key) 201 | if val != "" { 202 | return val 203 | } 204 | return defaultValue 205 | } 206 | -------------------------------------------------------------------------------- /db_proto/test/substreams/order/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod pb; 2 | 3 | use std::i64; 4 | use pb::test as model; 5 | 6 | use substreams_solana::pb::sf::solana::r#type::v1::Block; 7 | use crate::pb::test::relations::{NestedLevel1, OrderExtension}; 8 | 9 | #[substreams::handlers::map] 10 | fn map_output(block: Block) -> model::relations::Output { 11 | let mut entities = vec![]; 12 | 13 | 14 | let long_string = "a".repeat(255); 15 | 16 | let byte_vector: Vec = (1..=255).collect(); 17 | 18 | if block.slot % 100 != 0 { 19 | entities.push(model::relations::Entity { 20 | entity: Some(model::relations::entity::Entity::TypesTest { 21 | 0: model::relations::TypesTest { 22 | id: block.slot, 23 | int32_field: i32::MAX, 24 | int64_field: i64::MAX, 25 | uint32_field: u32::MAX, 26 | uint64_field: u64::MAX, 27 | sint32_field: i32::MAX, 28 | sint64_field: i64::MAX, 29 | fixed32_field: u32::MAX, 30 | fixed64_field: u64::MAX, 31 | sfixed32_field: i32::MAX, 32 | float_field: f32::MAX, 33 | double_field: f64::MAX, 34 | string_field: long_string, 35 | bytes_field: byte_vector, 36 | timestamp_field: Some(prost_types::Timestamp::default()), 37 | bool_field: true, 38 | // Add other fields if there are more in your `TypesTest` message definition 39 | sfixed64_field: i64::MAX, 40 | optional_string_set: Some("string.1".to_string()), 41 | optional_string_not_set: None, 42 | optional_int32_field_set: Some(99), 43 | optional_int32_field_not_set: None, 44 | 45 | repeated_int32_field: vec![0, 1, 2, 3], 46 | repeated_int64_field: vec![0, 1, 2, 3], 47 | repeated_uint32_field: vec![0, 1, 2, 3], 48 | repeated_uint64_field: vec![0, 1, 2, 3], 49 | repeated_sint32_field: vec![0, 1, 2, 3], 50 | repeated_sint64_field: vec![0, 1, 2, 3], 51 | repeated_fixed32_field: vec![0, 1, 2, 3], 52 | repeated_fixed64_field: vec![0, 1, 2, 3], 53 | repeated_sfixed32_field: vec![0, 1, 2, 3], 54 | repeated_sfixed64_field: vec![0, 1, 2, 3], 55 | repeated_double_field: vec![0.0, 1.0, 2.0, 3.0], 56 | repeated_float_field: vec![0.0, 1.0, 2.0, 3.0], 57 | repeated_bool_field: vec![true, false, true, false], 58 | repeated_string_field: vec!["A".to_string(), "B".to_string(), "C".to_string(), "D".to_string()], 59 | 60 | str_2_int128: "170141183460469231731687303715884105727".to_string(), 61 | str_2_uint128: "340282366920938463463374607431768211455".to_string(), 62 | str_2_int256: "57896044618658097711785492504343953926634992332820282019728792003956564819967".to_string(), 63 | str_2_uint256: "115792089237316195423570985008687907853269984665640564039457584007913129639935".to_string(), 64 | str_2_decimal128: "17014118346046923173168.9988".to_string(), 65 | str_2_decimal256: "17014118346046923173168.9988".to_string(), 66 | optional_str_2_uint256: None, 67 | level1: Some(NestedLevel1 { 68 | name: "level1.name".to_string(), 69 | desc: "level1.desc".to_string(), 70 | }), 71 | list_of_level1: vec![ 72 | NestedLevel1 { 73 | name: "name.1".to_string(), 74 | desc: "desc,1".to_string(), 75 | }, 76 | NestedLevel1 { 77 | name: "name.2".to_string(), 78 | desc: "desc,2".to_string(), 79 | }], 80 | }, 81 | }), 82 | }); 83 | } 84 | 85 | entities.push(model::relations::Entity { 86 | entity: Some(model::relations::entity::Entity::Customer { 87 | 0: model::relations::Customer { 88 | name: format!("customer.name.{}", block.slot), 89 | customer_id: format!("customer.id.{}", block.slot), 90 | }, 91 | }), 92 | }); 93 | 94 | entities.push(model::relations::Entity { 95 | entity: Some(model::relations::entity::Entity::Item { 96 | 0: model::relations::Item { 97 | item_id: format!("item.id.{}", block.slot), 98 | name: format!("item.name.{}", block.slot), 99 | price: 99.99, 100 | }, 101 | }), 102 | }); 103 | 104 | entities.push(model::relations::Entity { 105 | entity: Some(model::relations::entity::Entity::Order { 106 | 0: model::relations::Order { 107 | order_id: format!("order.id.{}", block.slot), 108 | customer_ref_id: format!("customer.id.{}", block.slot), 109 | items: vec![ 110 | model::relations::OrderItem { 111 | item_id: format!("item.id.{}", block.slot), 112 | quantity: 10, 113 | }, 114 | // model::relations::OrderItem { item_id: format!("item.id.{}", block.slot+1), quantity: 20 }, 115 | ], 116 | extension: Some(OrderExtension { description: "desc".to_string() }), 117 | }, 118 | }), 119 | }); 120 | 121 | model::relations::Output { entities } 122 | } 123 | -------------------------------------------------------------------------------- /db_changes/db/operations_test.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/bobg/go-generics/v2/slices" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestEscapeColumns(t *testing.T) { 16 | ctx := context.Background() 17 | dsnString := os.Getenv("PG_DSN") 18 | if dsnString == "" { 19 | t.Skip(`PG_DSN not set, please specify PG_DSN to run this test, example: PG_DSN="psql://dev-node:insecure-change-me-in-prod@localhost:5432/dev-node?enable_incremental_sort=off&sslmode=disable"`) 20 | } 21 | 22 | dsn, err := ParseDSN(dsnString) 23 | require.NoError(t, err) 24 | 25 | dbLoader, err := NewLoader( 26 | dsn, 27 | testCursorTableName, 28 | testHistoryTableName, 29 | "cluster.name.1", 30 | 0, 0, 0, 31 | OnModuleHashMismatchIgnore.String(), 32 | nil, 33 | zlog, tracer, 34 | ) 35 | require.NoError(t, err) 36 | 37 | tx, err := dbLoader.DB.Begin() 38 | require.NoError(t, err) 39 | 40 | colInputs := []string{ 41 | "regular", 42 | 43 | "from", // reserved keyword 44 | 45 | "withnewline\nafter", 46 | "withtab\tafter", 47 | "withreturn\rafter", 48 | "withbackspace\bafter", 49 | "withformfeed\fafter", 50 | 51 | `withdoubleQuote"aftersdf`, 52 | `withbackslash\after`, 53 | `withsinglequote'after`, 54 | } 55 | 56 | columnDefs := strings.Join(slices.Map(colInputs, func(str string) string { 57 | return fmt.Sprintf("%s text", EscapeIdentifier(str)) 58 | }), ",") 59 | 60 | createStatement := fmt.Sprintf(`create table "test" (%s)`, columnDefs) 61 | _, err = tx.ExecContext(ctx, createStatement) 62 | require.NoError(t, err) 63 | 64 | columns := strings.Join(slices.Map(colInputs, EscapeIdentifier), ",") 65 | values := strings.Join(slices.Map(colInputs, func(str string) string { return `'any'` }), ",") 66 | insertStatement := fmt.Sprintf(`insert into "test" (%s) values (%s)`, columns, values) 67 | 68 | _, err = tx.ExecContext(ctx, insertStatement) 69 | require.NoError(t, err) 70 | 71 | err = tx.Rollback() 72 | require.NoError(t, err) 73 | } 74 | 75 | func TestEscapeValues(t *testing.T) { 76 | 77 | ctx := context.Background() 78 | dsnString := os.Getenv("PG_DSN") 79 | if dsnString == "" { 80 | t.Skip(`PG_DSN not set, please specify PG_DSN to run this test, example: PG_DSN="psql://dev-node:insecure-change-me-in-prod@localhost:5432/dev-node?enable_incremental_sort=off&sslmode=disable"`) 81 | } 82 | 83 | dsn, err := ParseDSN(dsnString) 84 | require.NoError(t, err) 85 | 86 | dbLoader, err := NewLoader( 87 | dsn, 88 | testCursorTableName, 89 | testHistoryTableName, 90 | "cluster.name.1", 91 | 0, 0, 0, 92 | OnModuleHashMismatchIgnore.String(), 93 | nil, 94 | zlog, tracer, 95 | ) 96 | require.NoError(t, err) 97 | 98 | tx, err := dbLoader.DB.Begin() 99 | require.NoError(t, err) 100 | 101 | createStatement := `create table "test" ("col" text);` 102 | _, err = tx.ExecContext(ctx, createStatement) 103 | require.NoError(t, err) 104 | 105 | err = tx.Commit() 106 | require.NoError(t, err) 107 | 108 | defer func() { 109 | _, err = dbLoader.DB.ExecContext(ctx, `drop table "test"`) 110 | require.NoError(t, err) 111 | }() 112 | 113 | valueStrings := []string{ 114 | `regularValue`, 115 | 116 | `withApostrophe'`, 117 | 118 | "withNewlineCharNone\nafter", 119 | "withTabCharNone\tafter", 120 | "withCarriageReturnCharNone\rafter", 121 | "withBackspaceCharNone\bafter", 122 | "withFormFeedCharNone\fafter", 123 | 124 | `with\nNewlineLiteral`, 125 | 126 | `with'singleQuote`, 127 | `withDoubleQuote"`, 128 | `withSingle\Backslash`, 129 | 130 | `withExoticCharacterNone中文`, 131 | } 132 | 133 | for _, str := range valueStrings { 134 | t.Run(str, func(tt *testing.T) { 135 | 136 | tx, err := dbLoader.DB.Begin() 137 | require.NoError(t, err) 138 | 139 | insertStatement := fmt.Sprintf(`insert into "test" ("col") values (%s);`, escapeStringValue(str)) 140 | _, err = tx.ExecContext(ctx, insertStatement) 141 | require.NoError(tt, err) 142 | 143 | checkStatement := `select "col" from "test";` 144 | row := tx.QueryRowContext(ctx, checkStatement) 145 | var value string 146 | err = row.Scan(&value) 147 | require.NoError(tt, err) 148 | require.Equal(tt, str, value, "Inserted value is not equal to the expected value") 149 | 150 | err = tx.Rollback() 151 | require.NoError(tt, err) 152 | }) 153 | } 154 | } 155 | 156 | func Test_prepareColValues(t *testing.T) { 157 | type args struct { 158 | table *TableInfo 159 | colValues map[string]string 160 | } 161 | tests := []struct { 162 | name string 163 | args args 164 | wantColumns []string 165 | wantValues []string 166 | assertion require.ErrorAssertionFunc 167 | }{ 168 | { 169 | "bool true", 170 | args{ 171 | newTable(t, "schemaName", "name", "id", NewColumnInfo("col", "bool", true)), 172 | map[string]string{"col": "true"}, 173 | }, 174 | []string{`"col"`}, 175 | []string{`'true'`}, 176 | require.NoError, 177 | }, 178 | } 179 | for _, tt := range tests { 180 | t.Run(tt.name, func(t *testing.T) { 181 | dialect := PostgresDialect{} 182 | 183 | gotColumns, gotValues, err := dialect.prepareColValues(tt.args.table, tt.args.colValues) 184 | tt.assertion(t, err) 185 | assert.Equal(t, tt.wantColumns, gotColumns) 186 | assert.Equal(t, tt.wantValues, gotValues) 187 | }) 188 | } 189 | } 190 | 191 | func newTable(t *testing.T, schema, name, primaryColumn string, columnInfos ...*ColumnInfo) *TableInfo { 192 | columns := make(map[string]*ColumnInfo) 193 | columns[primaryColumn] = NewColumnInfo(primaryColumn, "text", "") 194 | for _, columnInfo := range columnInfos { 195 | columns[columnInfo.name] = columnInfo 196 | } 197 | 198 | table, err := NewTableInfo("public", "data", []string{"id"}, columns) 199 | require.NoError(t, err) 200 | 201 | return table 202 | } 203 | -------------------------------------------------------------------------------- /cmd/substreams-sink-sql/generate_csv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/spf13/cobra" 9 | "github.com/spf13/pflag" 10 | . "github.com/streamingfast/cli" 11 | "github.com/streamingfast/cli/sflags" 12 | sink "github.com/streamingfast/substreams-sink" 13 | db2 "github.com/streamingfast/substreams-sink-sql/db_changes/db" 14 | sinker2 "github.com/streamingfast/substreams-sink-sql/db_changes/sinker" 15 | "github.com/streamingfast/substreams/manifest" 16 | ) 17 | 18 | // lastCursorFilename is the name of the file where the last cursor is stored, no extension as it's added by the store 19 | const lastCursorFilename = "last_cursor" 20 | 21 | var generateCsvCmd = Command(generateCsvE, 22 | "generate-csv [start]:", 23 | "Generates CSVs for each table so it can be bulk inserted with `inject-csv` (for postgresql only)", 24 | Description(` 25 | This command command is the first of a multi-step process to bulk insert data into a PostgreSQL database. 26 | It creates a folder for each table and generates CSVs for block ranges. This files can be used with 27 | the 'inject-csv' command to bulk insert data into the database. 28 | 29 | It needs that the database already exists and that the schema is already created. 30 | 31 | The process is as follows: 32 | 33 | - Generate CSVs for each table with this command 34 | - Inject the CSVs into the database with the 'inject-csv' command (contains 'cursors' table, double check you injected it correctly!) 35 | - Start streaming with the 'run' command 36 | `), 37 | ExactArgs(3), 38 | Flags(func(flags *pflag.FlagSet) { 39 | sink.AddFlagsToSet(flags, sink.FlagIgnore("final-blocks-only")) 40 | AddCommonSinkerFlags(flags) 41 | AddCommonDatabaseChangesFlags(flags) 42 | 43 | flags.Uint64("bundle-size", 10000, "Size of output bundle, in blocks") 44 | flags.String("working-dir", "./workdir", "Path to local folder used as working directory") 45 | flags.String("output-dir", "./csv-output", "Path to local folder used as destination for CSV") 46 | flags.StringP("endpoint", "e", "", "Specify the substreams endpoint, ex: `mainnet.eth.streamingfast.io:443`") 47 | flags.Uint64("buffer-max-size", 4*1024*1024, FlagDescription(` 48 | Amount of memory bytes to allocate to the buffered writer. If your data set is small enough that every is hold in memory, we are going to avoid 49 | the local I/O operation(s) and upload accumulated content in memory directly to final storage location. 50 | 51 | Ideally, you should set this as to about 80%% of RAM the process has access to. This will maximize amount of element in memory, 52 | and reduce 'syscall' and I/O operations to write to the temporary file as we are buffering a lot of data. 53 | 54 | This setting has probably the greatest impact on writing throughput. 55 | 56 | Default value for the buffer is 4 MiB. 57 | `)) 58 | }), 59 | OnCommandErrorLogAndExit(zlog), 60 | ) 61 | 62 | func generateCsvE(cmd *cobra.Command, args []string) error { 63 | app := NewApplication(cmd.Context()) 64 | 65 | sink.RegisterMetrics() 66 | sinker2.RegisterMetrics() 67 | 68 | dsnString := args[0] 69 | manifestPath := args[1] 70 | blockRange := args[2] 71 | 72 | outputDir := sflags.MustGetString(cmd, "output-dir") 73 | bundleSize := sflags.MustGetUint64(cmd, "bundle-size") 74 | bufferMaxSize := sflags.MustGetUint64(cmd, "buffer-max-size") 75 | workingDir := sflags.MustGetString(cmd, "working-dir") 76 | cursorTableName := sflags.MustGetString(cmd, "cursors-table") 77 | historyTableName := sflags.MustGetString(cmd, "history-table") 78 | 79 | endpoint := sflags.MustGetString(cmd, "endpoint") 80 | if endpoint == "" { 81 | network := sflags.MustGetString(cmd, "network") 82 | if network == "" { 83 | reader, err := manifest.NewReader(manifestPath) 84 | if err != nil { 85 | return fmt.Errorf("setup manifest reader: %w", err) 86 | } 87 | pkgBundle, err := reader.Read() 88 | if err != nil { 89 | return fmt.Errorf("read manifest: %w", err) 90 | } 91 | network = pkgBundle.Package.Network 92 | } 93 | var err error 94 | endpoint, err = manifest.ExtractNetworkEndpoint(network, sflags.MustGetString(cmd, "endpoint"), zlog) 95 | if err != nil { 96 | return err 97 | } 98 | } 99 | 100 | sink, err := sink.NewFromViper( 101 | cmd, 102 | supportedOutputTypes, 103 | endpoint, 104 | manifestPath, 105 | sink.InferOutputModuleFromPackage, 106 | blockRange, 107 | zlog, 108 | tracer, 109 | sink.WithFinalBlocksOnly(), 110 | ) 111 | if err != nil { 112 | return fmt.Errorf("new base sinker: %w", err) 113 | } 114 | 115 | dsn, err := db2.ParseDSN(dsnString) 116 | if err != nil { 117 | return fmt.Errorf("parse dsn: %w", err) 118 | } 119 | 120 | handleReorgs := false 121 | dbLoader, err := db2.NewLoader( 122 | dsn, 123 | cursorTableName, 124 | historyTableName, 125 | sflags.MustGetString(cmd, "clickhouse-cluster"), 126 | 0, 0, 0, 127 | sflags.MustGetString(cmd, onModuleHashMistmatchFlag), 128 | &handleReorgs, 129 | zlog, tracer, 130 | ) 131 | 132 | if err != nil { 133 | return fmt.Errorf("creating loader: %w", err) 134 | } 135 | 136 | if err := dbLoader.LoadTables(dsn.Schema(), cursorTableName, historyTableName); err != nil { 137 | var e *db2.SystemTableError 138 | if errors.As(err, &e) { 139 | fmt.Printf("Error validating the system table: %s\n", e) 140 | fmt.Println("Did you run setup ?") 141 | return e 142 | } 143 | 144 | return fmt.Errorf("load tables: %w", err) 145 | } 146 | 147 | generateCSVSinker, err := sinker2.NewGenerateCSVSinker( 148 | sink, 149 | outputDir, 150 | workingDir, 151 | cursorTableName, 152 | bundleSize, 153 | bufferMaxSize, 154 | dbLoader, 155 | lastCursorFilename, 156 | zlog, 157 | tracer, 158 | ) 159 | if err != nil { 160 | return fmt.Errorf("unable to setup generate csv sinker: %w", err) 161 | } 162 | 163 | app.Supervise(generateCSVSinker.Shutter) 164 | 165 | go func() { 166 | generateCSVSinker.Run(app.Context()) 167 | }() 168 | 169 | return app.WaitForTermination(zlog, 0*time.Second, 30*time.Second) 170 | } 171 | -------------------------------------------------------------------------------- /db_proto/sinker_factory.go: -------------------------------------------------------------------------------- 1 | package db_proto 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "time" 7 | 8 | "github.com/streamingfast/logging" 9 | sink "github.com/streamingfast/substreams-sink" 10 | "github.com/streamingfast/substreams-sink-sql/bytes" 11 | "github.com/streamingfast/substreams-sink-sql/db_changes/db" 12 | protosql "github.com/streamingfast/substreams-sink-sql/db_proto/sql" 13 | clickhouse "github.com/streamingfast/substreams-sink-sql/db_proto/sql/click_house" 14 | "github.com/streamingfast/substreams-sink-sql/db_proto/sql/postgres" 15 | schema2 "github.com/streamingfast/substreams-sink-sql/db_proto/sql/schema" 16 | stats2 "github.com/streamingfast/substreams-sink-sql/db_proto/stats" 17 | "go.uber.org/zap" 18 | "google.golang.org/protobuf/reflect/protoreflect" 19 | ) 20 | 21 | type SinkerFactoryFunc func(ctx context.Context, dsnString, schemaName string, logger *zap.Logger, tracer logging.Tracer) (*Sinker, error) 22 | 23 | type SinkerFactoryOptions struct { 24 | UseProtoOption bool 25 | UseConstraints bool 26 | UseTransactions bool 27 | BlockBatchSize int 28 | Parallel bool 29 | Encoding bytes.Encoding 30 | Clickhouse SinkerFactoryClickhouse 31 | } 32 | 33 | type SinkerFactoryClickhouse struct { 34 | SinkInfoFolder string 35 | CursorFilePath string 36 | QueryRetryCount int 37 | QueryRetrySleep time.Duration 38 | } 39 | 40 | func (o SinkerFactoryOptions) Defaults() SinkerFactoryOptions { 41 | if o.BlockBatchSize <= 0 { 42 | o.BlockBatchSize = 25 43 | } 44 | o.UseTransactions = true 45 | if o.Encoding == 0 { 46 | o.Encoding = bytes.EncodingRaw 47 | } 48 | return o 49 | } 50 | 51 | func SinkerFactory( 52 | baseSink *sink.Sinker, 53 | outputModuleName string, 54 | rootMessageDescriptor protoreflect.MessageDescriptor, 55 | options SinkerFactoryOptions, 56 | ) SinkerFactoryFunc { 57 | return func(ctx context.Context, dsnString string, schemaName string, logger *zap.Logger, tracer logging.Tracer) (*Sinker, error) { 58 | dsn, err := db.ParseDSN(dsnString) 59 | if err != nil { 60 | return nil, fmt.Errorf("parsing dsn: %w", err) 61 | } 62 | 63 | schema, err := schema2.NewSchema(schemaName, rootMessageDescriptor, options.UseProtoOption, logger) 64 | if err != nil { 65 | return nil, fmt.Errorf("creating schema: %w", err) 66 | } 67 | 68 | var database protosql.Database 69 | 70 | switch dsn.Driver() { 71 | case "postgres": 72 | database, err = postgres.NewDatabase(schema, dsn, outputModuleName, rootMessageDescriptor, options.UseProtoOption, options.UseConstraints, options.Encoding, logger) 73 | if err != nil { 74 | return nil, fmt.Errorf("creating postgres database: %w", err) 75 | } 76 | 77 | case "clickhouse": 78 | database, err = clickhouse.NewDatabase( 79 | ctx, 80 | schema, 81 | dsn, 82 | outputModuleName, 83 | rootMessageDescriptor, 84 | options.Clickhouse.SinkInfoFolder, 85 | options.Clickhouse.CursorFilePath, 86 | true, 87 | options.Encoding, 88 | logger, 89 | tracer, 90 | options.Clickhouse.QueryRetryCount, 91 | options.Clickhouse.QueryRetrySleep, 92 | ) 93 | if err != nil { 94 | return nil, fmt.Errorf("creating clickhouse database: %w", err) 95 | } 96 | 97 | default: 98 | panic(fmt.Sprintf("unsupported driver: %s", dsn.Driver())) 99 | 100 | } 101 | 102 | sinkInfo, err := database.FetchSinkInfo(schema.Name) 103 | if err != nil { 104 | return nil, fmt.Errorf("fetching sink info: %w", err) 105 | } 106 | 107 | logger.Info("sink info read", zap.Reflect("sink_info", sinkInfo)) 108 | if sinkInfo == nil { 109 | err := database.BeginTransaction() 110 | if err != nil { 111 | return nil, fmt.Errorf("begin transaction: %w", err) 112 | } 113 | err = database.CreateDatabase(options.UseConstraints) 114 | if err != nil { 115 | database.RollbackTransaction() 116 | return nil, fmt.Errorf("creating database: %w", err) 117 | } 118 | 119 | err = database.StoreSinkInfo(schemaName, database.GetDialect().SchemaHash()) 120 | if err != nil { 121 | database.RollbackTransaction() 122 | return nil, fmt.Errorf("storing sink info: %w", err) 123 | } 124 | 125 | err = database.CommitTransaction() 126 | 127 | } else { 128 | migrationNeeded := sinkInfo.SchemaHash != database.GetDialect().SchemaHash() 129 | if migrationNeeded { 130 | 131 | tempSchemaName := schema.Name + "_" + database.GetDialect().SchemaHash() 132 | tempSinkInfo, err := database.FetchSinkInfo(tempSchemaName) 133 | if err != nil { 134 | return nil, fmt.Errorf("fetching temp schema sink info: %w", err) 135 | } 136 | if tempSinkInfo != nil { 137 | hash, err := database.DatabaseHash(schema.Name) 138 | if err != nil { 139 | return nil, fmt.Errorf("fetching schema %q hash: %w", schema.Name, err) 140 | } 141 | dbTempHash, err := database.DatabaseHash(tempSchemaName) 142 | if err != nil { 143 | return nil, fmt.Errorf("fetching temp schema %q hash: %w", tempSchemaName, err) 144 | } 145 | 146 | if hash != dbTempHash { 147 | return nil, fmt.Errorf("schema %s and temp schema %s have different hash", schema.Name, tempSchemaName) 148 | } 149 | err = database.BeginTransaction() 150 | if err != nil { 151 | return nil, fmt.Errorf("begin transaction: %w", err) 152 | } 153 | err = database.UpdateSinkInfoHash(schemaName, tempSinkInfo.SchemaHash) 154 | if err != nil { 155 | database.RollbackTransaction() 156 | return nil, fmt.Errorf("updating sink info hash: %w", err) 157 | } 158 | 159 | err = database.CommitTransaction() 160 | if err != nil { 161 | return nil, fmt.Errorf("commit transaction: %w", err) 162 | } 163 | 164 | } else { 165 | //todo: create the temp schema ... and exit 166 | 167 | //err = schema.ChangeName(tempSchemaName, dialect) 168 | //if err != nil { 169 | // return nil, fmt.Errorf("changing schema name: %w", err) 170 | //} 171 | //generateTempSchema = true 172 | } 173 | } 174 | } 175 | 176 | err = database.Open() 177 | if err != nil { 178 | return nil, fmt.Errorf("opening database: %w", err) 179 | } 180 | 181 | return NewSinker( 182 | rootMessageDescriptor, 183 | baseSink, 184 | database, 185 | options.UseTransactions, 186 | options.UseConstraints, 187 | options.BlockBatchSize, 188 | options.Parallel, 189 | stats2.NewStats(logger), 190 | logger, 191 | ), nil 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /db_proto/sql/postgres/types.go: -------------------------------------------------------------------------------- 1 | package postgres 2 | 3 | import ( 4 | "encoding/hex" 5 | "fmt" 6 | "strconv" 7 | "strings" 8 | "time" 9 | 10 | "github.com/streamingfast/substreams-sink-sql/bytes" 11 | "github.com/streamingfast/substreams-sink-sql/db_proto/sql/schema" 12 | v1 "github.com/streamingfast/substreams-sink-sql/pb/sf/substreams/sink/sql/schema/v1" 13 | "google.golang.org/protobuf/encoding/protojson" 14 | "google.golang.org/protobuf/reflect/protoreflect" 15 | "google.golang.org/protobuf/types/known/timestamppb" 16 | ) 17 | 18 | type DataType string 19 | 20 | const ( 21 | TypeNumeric DataType = "NUMERIC" 22 | TypeInteger DataType = "INTEGER" 23 | TypeBool DataType = "BOOLEAN" 24 | TypeBigInt DataType = "BIGINT" 25 | TypeDecimal DataType = "DECIMAL" 26 | TypeDouble DataType = "DOUBLE PRECISION" 27 | TypeText DataType = "TEXT" 28 | TypeBlob DataType = "BLOB" 29 | TypeVarchar DataType = "VARCHAR(255)" 30 | TypeBytea DataType = "BYTEA" 31 | TypeTimestamp DataType = "TIMESTAMP" 32 | TypeJSONB DataType = "JSONB" 33 | ) 34 | 35 | func (s DataType) String() string { 36 | return string(s) 37 | } 38 | 39 | func IsWellKnownType(fd protoreflect.FieldDescriptor) bool { 40 | if fd.Kind() != protoreflect.MessageKind { 41 | return false 42 | } 43 | switch string(fd.Message().FullName()) { 44 | case "google.protobuf.Timestamp": 45 | return true 46 | default: 47 | return false 48 | } 49 | } 50 | 51 | func MapFieldType(fd protoreflect.FieldDescriptor, bytesEncoding bytes.Encoding, column *schema.Column) DataType { 52 | kind := fd.Kind() 53 | var baseType DataType 54 | 55 | switch kind { 56 | case protoreflect.MessageKind: 57 | if column.Nested != nil { 58 | baseType = TypeJSONB 59 | } else { 60 | switch string(fd.Message().FullName()) { 61 | case "google.protobuf.Timestamp": 62 | baseType = TypeTimestamp 63 | default: 64 | panic(fmt.Sprintf("Message type not supported: %s", string(fd.Message().FullName()))) 65 | } 66 | } 67 | case protoreflect.BoolKind: 68 | baseType = TypeBool 69 | case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind: 70 | baseType = TypeInteger 71 | case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind: 72 | baseType = TypeBigInt 73 | case protoreflect.Uint64Kind, protoreflect.Fixed64Kind: 74 | baseType = TypeNumeric 75 | case protoreflect.Uint32Kind, protoreflect.Fixed32Kind: 76 | baseType = TypeNumeric 77 | case protoreflect.FloatKind: 78 | baseType = TypeDecimal 79 | case protoreflect.DoubleKind: 80 | baseType = TypeDouble 81 | case protoreflect.StringKind: 82 | if column.ConvertTo != nil && column.ConvertTo.Convertion != nil { 83 | switch column.ConvertTo.Convertion.(type) { 84 | case *v1.StringConvertion_Int128: 85 | baseType = TypeNumeric 86 | case *v1.StringConvertion_Uint128: 87 | baseType = TypeNumeric 88 | case *v1.StringConvertion_Int256: 89 | baseType = TypeNumeric 90 | case *v1.StringConvertion_Uint256: 91 | baseType = TypeNumeric 92 | case *v1.StringConvertion_Decimal128: 93 | decimal128Conv := column.ConvertTo.Convertion.(*v1.StringConvertion_Decimal128) 94 | baseType = DataType(fmt.Sprintf("DECIMAL(38,%d)", decimal128Conv.Decimal128.Scale)) 95 | case *v1.StringConvertion_Decimal256: 96 | decimal256Conv := column.ConvertTo.Convertion.(*v1.StringConvertion_Decimal256) 97 | baseType = DataType(fmt.Sprintf("DECIMAL(76,%d)", decimal256Conv.Decimal256.Scale)) 98 | default: 99 | baseType = TypeVarchar 100 | } 101 | } else { 102 | baseType = TypeVarchar 103 | } 104 | case protoreflect.BytesKind: 105 | if bytesEncoding.IsStringType() { 106 | baseType = TypeText 107 | } else { 108 | baseType = TypeBytea 109 | } 110 | case protoreflect.EnumKind: 111 | baseType = TypeText 112 | default: 113 | panic(fmt.Sprintf("unsupported type: %s", kind)) 114 | } 115 | 116 | // If field is repeated, wrap the base type as an array 117 | if fd.IsList() { 118 | return DataType(fmt.Sprintf("%s[]", baseType)) 119 | } 120 | 121 | return baseType 122 | } 123 | 124 | func ValueToString(value any, bytesEncoding bytes.Encoding) (s string) { 125 | switch v := value.(type) { 126 | case string: 127 | s = "'" + strings.ReplaceAll(strings.ReplaceAll(v, "'", "''"), "\\", "\\\\") + "'" 128 | case int64: 129 | s = strconv.FormatInt(v, 10) 130 | case int32: 131 | s = strconv.FormatInt(int64(v), 10) 132 | case int: 133 | s = strconv.FormatInt(int64(v), 10) 134 | case uint64: 135 | s = strconv.FormatUint(v, 10) 136 | case uint32: 137 | s = strconv.FormatUint(uint64(v), 10) 138 | case uint: 139 | s = strconv.FormatUint(uint64(v), 10) 140 | case float64: 141 | s = strconv.FormatFloat(v, 'f', -1, 64) 142 | case float32: 143 | s = strconv.FormatFloat(float64(v), 'f', -1, 32) 144 | case []uint8: 145 | if bytesEncoding == bytes.EncodingRaw { 146 | // For raw encoding, use PostgreSQL bytea format 147 | //s = "'" + base64.StdEncoding.EncodeToString(v) + "'" 148 | s = "E'" + hex.EncodeToString(v) + "'::BYTEA" 149 | } else { 150 | encoded, err := bytesEncoding.EncodeBytes(v) 151 | if err != nil { 152 | panic(fmt.Sprintf("failed to encode bytes: %v", err)) 153 | } 154 | s = "'" + encoded.(string) + "'" 155 | } 156 | case bool: 157 | s = strconv.FormatBool(v) 158 | case time.Time: 159 | s = "'" + v.Format(time.RFC3339) + "'" 160 | case *timestamppb.Timestamp: 161 | s = "'" + v.AsTime().Format(time.RFC3339) + "'" 162 | // Handle array types for PostgreSQL 163 | case []interface{}: 164 | var elements []string 165 | for _, elem := range v { 166 | elements = append(elements, ValueToString(elem, bytesEncoding)) 167 | } 168 | s = "array[" + strings.Join(elements, ",") + "]" 169 | case protoreflect.Message: 170 | jsonBytes, err := protojson.Marshal(v.Interface()) 171 | if err != nil { 172 | panic(fmt.Sprintf("failed to marshal protobuf message to JSON: %v", err)) 173 | } 174 | s = "'" + strings.ReplaceAll(strings.ReplaceAll(string(jsonBytes), "'", "''"), "\\", "\\\\") + "'" 175 | return 176 | default: 177 | if msg, ok := v.(protoreflect.ProtoMessage); ok { 178 | jsonBytes, err := protojson.Marshal(msg) 179 | if err != nil { 180 | panic(fmt.Sprintf("failed to marshal protobuf message to JSON: %v", err)) 181 | } 182 | s = "'" + strings.ReplaceAll(strings.ReplaceAll(string(jsonBytes), "'", "''"), "\\", "\\\\") + "'" 183 | return 184 | } 185 | panic(fmt.Sprintf("unsupported type: %T", v)) 186 | } 187 | return 188 | } 189 | -------------------------------------------------------------------------------- /db_proto/sql/postgres/row_inserter.go: -------------------------------------------------------------------------------- 1 | package postgres 2 | 3 | import ( 4 | "database/sql" 5 | "encoding/base64" 6 | "fmt" 7 | "strconv" 8 | "strings" 9 | "time" 10 | 11 | sql2 "github.com/streamingfast/substreams-sink-sql/db_proto/sql" 12 | "github.com/streamingfast/substreams-sink-sql/db_proto/sql/schema" 13 | "go.uber.org/zap" 14 | "google.golang.org/protobuf/types/known/timestamppb" 15 | ) 16 | 17 | type RowInserter struct { 18 | insertQueries map[string]string 19 | insertStatements map[string]*sql.Stmt 20 | logger *zap.Logger 21 | database *Database 22 | } 23 | 24 | func NewRowInserter(logger *zap.Logger) (*RowInserter, error) { 25 | logger = logger.Named("postgres inserter") 26 | 27 | return &RowInserter{ 28 | logger: logger, 29 | }, nil 30 | } 31 | 32 | func (i *RowInserter) init(database *Database) error { 33 | tables := database.dialect.GetTables() 34 | insertStatements := map[string]*sql.Stmt{} 35 | insertQueries := map[string]string{} 36 | 37 | i.database = database 38 | 39 | for _, table := range tables { 40 | query, err := createInsertFromDescriptor(table, database.dialect) 41 | if err != nil { 42 | return fmt.Errorf("creating insert from descriptor for table %q: %w", table.Name, err) 43 | } 44 | insertQueries[table.Name] = query 45 | 46 | stmt, err := database.db.Prepare(query) 47 | if err != nil { 48 | return fmt.Errorf("preparing statement %q: %w", query, err) 49 | } 50 | insertStatements[table.Name] = stmt 51 | } 52 | 53 | insertQueries["_blocks_"] = fmt.Sprintf("INSERT INTO %s (number, hash, timestamp) VALUES ($1, $2, $3) RETURNING number", tableName(database.schema.Name, "_blocks_")) 54 | bs, err := database.db.Prepare(insertQueries["_blocks_"]) 55 | if err != nil { 56 | return fmt.Errorf("preparing statement %q: %w", insertQueries["_blocks_"], err) 57 | } 58 | insertStatements["_blocks_"] = bs 59 | 60 | insertQueries["_cursor_"] = fmt.Sprintf("INSERT INTO %s (name, cursor) VALUES ($1, $2) ON CONFLICT (name) DO UPDATE SET cursor = $2", tableName(database.schema.Name, "_cursor_")) 61 | cs, err := database.db.Prepare(insertQueries["_cursor_"]) 62 | if err != nil { 63 | return fmt.Errorf("preparing statement %q: %w", insertQueries["_cursor_"], err) 64 | } 65 | insertStatements["_cursor_"] = cs 66 | 67 | i.insertQueries = insertQueries 68 | i.insertStatements = insertStatements 69 | 70 | return nil 71 | } 72 | 73 | func createInsertFromDescriptor(table *schema.Table, dialect sql2.Dialect) (string, error) { 74 | tableName := dialect.FullTableName(table) 75 | fields := table.Columns 76 | 77 | var fieldNames []string 78 | var placeholders []string 79 | 80 | fieldCount := 0 81 | returningField := "" 82 | if table.PrimaryKey != nil { 83 | returningField = table.PrimaryKey.Name 84 | } 85 | 86 | fieldCount++ 87 | fieldNames = append(fieldNames, sql2.DialectFieldBlockNumber) 88 | placeholders = append(placeholders, fmt.Sprintf("$%d", fieldCount)) 89 | fieldCount++ 90 | fieldNames = append(fieldNames, sql2.DialectFieldBlockTimestamp) 91 | placeholders = append(placeholders, fmt.Sprintf("$%d", fieldCount)) 92 | 93 | if pk := table.PrimaryKey; pk != nil { 94 | fieldCount++ 95 | returningField = pk.Name 96 | fieldNames = append(fieldNames, pk.Name) 97 | placeholders = append(placeholders, fmt.Sprintf("$%d", fieldCount)) //$1 98 | } 99 | 100 | if table.ChildOf != nil { 101 | fieldCount++ 102 | fieldNames = append(fieldNames, table.ChildOf.ParentTableField) 103 | placeholders = append(placeholders, fmt.Sprintf("$%d", fieldCount)) 104 | } 105 | 106 | for _, field := range fields { 107 | if field.Name == returningField { 108 | continue 109 | } 110 | if field.IsExtension { //not a direct child 111 | continue 112 | } 113 | if field.IsRepeated && field.Nested == nil { 114 | // Check if it's a repeated message (which should be skipped) or repeated scalar (which should be processed) 115 | if field.IsMessage { 116 | continue 117 | } 118 | // Allow repeated scalar fields to be processed as arrays 119 | } 120 | fieldCount++ 121 | fieldNames = append(fieldNames, field.QuotedName()) 122 | placeholders = append(placeholders, fmt.Sprintf("$%d", fieldCount)) 123 | } 124 | 125 | return fmt.Sprintf("INSERT INTO %s (%s) VALUES (%s)", 126 | tableName, 127 | strings.Join(fieldNames, ", "), 128 | strings.Join(placeholders, ", "), 129 | ), nil 130 | 131 | } 132 | 133 | func (i *RowInserter) insert(table string, values []any, database *Database) error { 134 | i.logger.Debug("inserting row", zap.String("table", table), zap.Any("values", values)) 135 | stmt := i.insertStatements[table] 136 | stmt = database.wrapInsertStatement(stmt) 137 | 138 | t := i.database.dialect.TableRegistry[table] 139 | 140 | fieldIndexOffset := 2 141 | if t != nil && t.ChildOf != nil { 142 | fieldIndexOffset = 3 //remove foreign key 143 | } 144 | 145 | for i, value := range values { 146 | 147 | var column *schema.Column 148 | fieldIndex := i - fieldIndexOffset //remove _block_number and _block_timestamp + foreign key 149 | 150 | if t != nil && fieldIndex >= 0 { 151 | column = t.Columns[fieldIndex] 152 | } 153 | 154 | switch v := value.(type) { 155 | case string: 156 | if column != nil && column.ConvertTo != nil && column.ConvertTo.Convertion != nil { 157 | if v == "" { 158 | values[i] = 0 159 | } 160 | } 161 | case uint64: 162 | values[i] = strconv.FormatUint(v, 10) 163 | case []uint8: 164 | if database.dialect.bytesEncoding.IsStringType() { 165 | encoded, err := database.dialect.bytesEncoding.EncodeBytes(v) 166 | if err != nil { 167 | return fmt.Errorf("failed to encode bytes: %v", err) 168 | } 169 | values[i] = encoded.(string) 170 | continue 171 | } 172 | values[i] = "'" + base64.StdEncoding.EncodeToString(v) + "'" 173 | case *timestamppb.Timestamp: 174 | values[i] = "'" + v.AsTime().Format(time.RFC3339) + "'" 175 | case []interface{}: 176 | // Handle arrays by converting to PostgreSQL array format 177 | var elements []string 178 | for _, elem := range v { 179 | elements = append(elements, ValueToString(elem, database.dialect.bytesEncoding)) 180 | } 181 | values[i] = "{" + strings.Join(elements, ",") + "}" 182 | } 183 | } 184 | 185 | _, err := stmt.Exec(values...) 186 | if err != nil { 187 | insert := i.insertQueries[table] 188 | return fmt.Errorf("pg accumalator inserter: querying insert %q: %w", insert, err) 189 | } 190 | 191 | return nil 192 | } 193 | 194 | func (i *RowInserter) flush(database *Database) error { 195 | return nil 196 | } 197 | -------------------------------------------------------------------------------- /db_changes/db/cursor.go: -------------------------------------------------------------------------------- 1 | package db 2 | 3 | import ( 4 | "context" 5 | "database/sql" 6 | "errors" 7 | "fmt" 8 | "strings" 9 | 10 | "github.com/lithammer/dedent" 11 | sink "github.com/streamingfast/substreams-sink" 12 | "go.uber.org/zap" 13 | ) 14 | 15 | var ErrCursorNotFound = errors.New("cursor not found") 16 | 17 | type cursorRow struct { 18 | ID string 19 | Cursor string 20 | BlockNum uint64 21 | BlockID string 22 | } 23 | 24 | // GetAllCursors returns an unordered map given for each module's hash recorded 25 | // the active cursor for it. 26 | func (l *Loader) GetAllCursors(ctx context.Context) (out map[string]*sink.Cursor, err error) { 27 | query := l.dialect.GetAllCursorsQuery(l.cursorTable.identifier) 28 | rows, err := l.DB.QueryContext(ctx, query) 29 | if err != nil { 30 | return nil, fmt.Errorf("query all cursors: %w", err) 31 | } 32 | 33 | out = make(map[string]*sink.Cursor) 34 | for rows.Next() { 35 | c := &cursorRow{} 36 | if err := rows.Scan(&c.ID, &c.Cursor, &c.BlockNum, &c.BlockID); err != nil { 37 | return nil, fmt.Errorf("getting all cursors: %w", err) 38 | } 39 | 40 | out[c.ID], err = sink.NewCursor(c.Cursor) 41 | if err != nil { 42 | return nil, fmt.Errorf("database corrupted: stored cursor %q is not a valid cursor", c.Cursor) 43 | } 44 | } 45 | 46 | return out, nil 47 | } 48 | 49 | func (l *Loader) GetCursor(ctx context.Context, outputModuleHash string) (cursor *sink.Cursor, mistmatchDetected bool, err error) { 50 | cursors, err := l.GetAllCursors(ctx) 51 | if err != nil { 52 | return nil, false, fmt.Errorf("get cursor: %w", err) 53 | } 54 | 55 | if len(cursors) == 0 { 56 | return sink.NewBlankCursor(), false, ErrCursorNotFound 57 | } 58 | 59 | activeCursor, found := cursors[outputModuleHash] 60 | if found { 61 | return activeCursor, false, err 62 | } 63 | 64 | // It's not found at this point, look for one with highest block, we will report 65 | // (maybe) a warning if the module hash is different, which is the case here. 66 | actualOutputModuleHash, activeCursor := cursorAtHighestBlock(cursors) 67 | 68 | switch l.moduleMismatchMode { 69 | case OnModuleHashMismatchIgnore: 70 | return activeCursor, true, err 71 | 72 | case OnModuleHashMismatchWarn: 73 | l.logger.Warn( 74 | fmt.Sprintf("cursor module hash mismatch, continuing using cursor at highest block %s, this warning can be made silent by using '--on-module-hash-mistmatch=ignore'", activeCursor.Block()), 75 | zap.String("expected_module_hash", outputModuleHash), 76 | zap.String("actual_module_hash", actualOutputModuleHash), 77 | ) 78 | 79 | return activeCursor, true, err 80 | 81 | case OnModuleHashMismatchError: 82 | return nil, true, fmt.Errorf("cursor module hash mismatch, refusing to continue because flag '--on-module-hash-mistmatch=error' (defaults) is set, you can change to 'warn' or 'ignore': your module's hash is %q but cursor with highest block (%d) module hash is actually %q in the database", 83 | outputModuleHash, 84 | activeCursor.Block().Num(), 85 | actualOutputModuleHash, 86 | ) 87 | 88 | default: 89 | panic(fmt.Errorf("unknown module mismatch mode %q", l.moduleMismatchMode)) 90 | } 91 | } 92 | 93 | func cursorAtHighestBlock(in map[string]*sink.Cursor) (hash string, highest *sink.Cursor) { 94 | for moduleHash, cursor := range in { 95 | if highest == nil || cursor.Block().Num() > highest.Block().Num() { 96 | highest = cursor 97 | hash = moduleHash 98 | } 99 | } 100 | 101 | return 102 | } 103 | 104 | func (l *Loader) InsertCursor(ctx context.Context, moduleHash string, c *sink.Cursor) error { 105 | query := fmt.Sprintf("INSERT INTO %s (id, cursor, block_num, block_id) values ('%s', '%s', %d, '%s')", 106 | l.cursorTable.identifier, 107 | moduleHash, 108 | c, 109 | c.Block().Num(), 110 | c.Block().ID(), 111 | ) 112 | if _, err := l.DB.ExecContext(ctx, query); err != nil { 113 | return fmt.Errorf("insert cursor: %w", err) 114 | } 115 | 116 | return nil 117 | } 118 | 119 | // UpdateCursor updates the active cursor. If no cursor is active and no update occurred, returns 120 | // ErrCursorNotFound. If the update was not successful on the database, returns an error. 121 | // You can use tx=nil to run the query outside of a transaction. 122 | func (l *Loader) UpdateCursor(ctx context.Context, tx Tx, moduleHash string, c *sink.Cursor) error { 123 | l.logger.Debug("updating cursor", zap.String("module_hash", moduleHash), zap.Stringer("cursor", c)) 124 | _, err := l.runModifiyQuery(ctx, tx, "update", l.dialect.GetUpdateCursorQuery( 125 | l.cursorTable.identifier, moduleHash, c, c.Block().Num(), c.Block().ID(), 126 | )) 127 | return err 128 | } 129 | 130 | // DeleteCursor deletes the active cursor for the given 'moduleHash'. If no cursor is active and 131 | // no delete occurrred, returns ErrCursorNotFound. If the delete was not successful on the database, returns an error. 132 | func (l *Loader) DeleteCursor(ctx context.Context, moduleHash string) error { 133 | _, err := l.runModifiyQuery(ctx, nil, "delete", fmt.Sprintf("DELETE FROM %s WHERE id = '%s'", l.cursorTable.identifier, moduleHash)) 134 | return err 135 | } 136 | 137 | // DeleteAllCursors deletes the active cursor for the given 'moduleHash'. If no cursor is active and 138 | // no delete occurrred, returns ErrCursorNotFound. If the delete was not successful on the database, returns an error. 139 | func (l *Loader) DeleteAllCursors(ctx context.Context) (deletedCount int64, err error) { 140 | deletedCount, err = l.runModifiyQuery(ctx, nil, "delete", fmt.Sprintf("DELETE FROM %s", l.cursorTable.identifier)) 141 | if err != nil && errors.Is(err, ErrCursorNotFound) { 142 | return 0, nil 143 | } 144 | 145 | return deletedCount, nil 146 | } 147 | 148 | type sqlExecutor interface { 149 | ExecContext(ctx context.Context, query string, args ...any) (sql.Result, error) 150 | } 151 | 152 | // runModifiyQuery runs the logic to execute a query that is supposed to modify the database in some form affecting 153 | // at least 1 row. 154 | // 155 | // If `tx` is nil, we use `l.DB` as the execution context, so an operations happening outside 156 | // a transaction. Otherwise, tx is the execution context. 157 | func (l *Loader) runModifiyQuery(ctx context.Context, tx Tx, action string, query string) (rowsAffected int64, err error) { 158 | var executor sqlExecutor = l.DB 159 | if tx != nil { 160 | executor = tx 161 | } 162 | 163 | result, err := executor.ExecContext(ctx, query) 164 | if err != nil { 165 | return 0, fmt.Errorf("%s cursor: %w", action, err) 166 | } 167 | 168 | rowsAffected, err = result.RowsAffected() 169 | if err != nil { 170 | return 0, fmt.Errorf("rows affected: %w", err) 171 | } 172 | 173 | if l.dialect.DriverSupportRowsAffected() && rowsAffected <= 0 { 174 | return 0, ErrCursorNotFound 175 | } 176 | 177 | return rowsAffected, nil 178 | } 179 | 180 | func query(in string, args ...any) string { 181 | return fmt.Sprintf(strings.TrimSpace(dedent.Dedent(in)), args...) 182 | } 183 | -------------------------------------------------------------------------------- /db_changes/bundler/bundler.go: -------------------------------------------------------------------------------- 1 | package bundler 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "path" 8 | "time" 9 | 10 | "github.com/streamingfast/bstream" 11 | "github.com/streamingfast/dhammer" 12 | "github.com/streamingfast/dstore" 13 | "github.com/streamingfast/shutter" 14 | "github.com/streamingfast/substreams-sink-sql/db_changes/bundler/writer" 15 | "go.uber.org/zap" 16 | ) 17 | 18 | type Bundler struct { 19 | *shutter.Shutter 20 | 21 | blockCount uint64 22 | stats *boundaryStats 23 | boundaryWriter writer.Writer 24 | outputStore dstore.Store 25 | Header []byte 26 | HeaderWritten bool 27 | 28 | activeBoundary *bstream.Range 29 | stopBlock uint64 30 | uploadQueue *dhammer.Nailer 31 | zlogger *zap.Logger 32 | } 33 | 34 | var ErrStopBlockReached = errors.New("stop block reached") 35 | 36 | func New( 37 | size uint64, 38 | stopBlock uint64, 39 | boundaryWriter writer.Writer, 40 | outputStore dstore.Store, 41 | zlogger *zap.Logger, 42 | header []byte, 43 | ) (*Bundler, error) { 44 | 45 | b := &Bundler{ 46 | Shutter: shutter.New(), 47 | boundaryWriter: boundaryWriter, 48 | outputStore: outputStore, 49 | blockCount: size, 50 | stopBlock: stopBlock, 51 | stats: newStats(), 52 | zlogger: zlogger, 53 | Header: header, 54 | HeaderWritten: false, 55 | } 56 | 57 | b.uploadQueue = dhammer.NewNailer(5, b.uploadBoundary, dhammer.NailerLogger(zlogger)) 58 | 59 | // switch boundaryWriter.Type() { 60 | // case writer.FileTypeJSONL: 61 | // b.encoder = JSONLEncode 62 | // case writer.FileTypeCSV: 63 | // b.encoder = JSONLEncode 64 | // default: 65 | // return nil, fmt.Errorf("invalid file type %q", boundaryWriter.Type()) 66 | // } 67 | return b, nil 68 | } 69 | 70 | func (b *Bundler) name() string { 71 | return path.Base(b.outputStore.BaseURL().Path) 72 | } 73 | 74 | func (b *Bundler) Launch(ctx context.Context) { 75 | b.OnTerminating(func(err error) { 76 | b.zlogger.Info("shutting down bundler", zap.String("store", b.name()), zap.Error(err)) 77 | b.Close() 78 | }) 79 | b.uploadQueue.Start(ctx) 80 | 81 | go func() { 82 | for v := range b.uploadQueue.Out { 83 | bf := v.(*boundaryFile) 84 | b.zlogger.Debug("uploaded file", zap.String("filename", bf.name)) 85 | } 86 | if b.uploadQueue.Err() != nil { 87 | b.Shutdown(fmt.Errorf("upload queue failed: %w", b.uploadQueue.Err())) 88 | } 89 | }() 90 | 91 | b.uploadQueue.OnTerminating(func(_ error) { 92 | b.Shutdown(fmt.Errorf("upload queue failed: %w", b.uploadQueue.Err())) 93 | }) 94 | } 95 | 96 | func (b *Bundler) Close() { 97 | b.zlogger.Debug("closing upload queue") 98 | b.uploadQueue.Close() 99 | b.zlogger.Debug("waiting till queue is drained") 100 | b.uploadQueue.WaitUntilEmpty(context.Background()) 101 | b.zlogger.Debug("boundary upload completed") 102 | } 103 | 104 | func (b *Bundler) Roll(ctx context.Context, blockNum uint64) (rolled bool, err error) { 105 | if b.activeBoundary.Contains(blockNum) { 106 | return false, nil 107 | } 108 | 109 | boundaries := boundariesToSkip(b.activeBoundary, blockNum, b.blockCount) 110 | 111 | b.zlogger.Info("block_num is not in active boundary", 112 | zap.Stringer("active_boundary", b.activeBoundary), 113 | zap.Int("boundaries_to_skip", len(boundaries)), 114 | zap.Uint64("block_num", blockNum), 115 | ) 116 | 117 | if err := b.stop(ctx); err != nil { 118 | return false, fmt.Errorf("stop active boundary: %w", err) 119 | } 120 | 121 | // Empty boundaries are before `blockNum`, we must flush them also before checking if we should quit 122 | for _, boundary := range boundaries { 123 | if err := b.Start(boundary.StartBlock()); err != nil { 124 | return false, fmt.Errorf("start skipping boundary: %w", err) 125 | } 126 | if err := b.stop(ctx); err != nil { 127 | return false, fmt.Errorf("stop skipping boundary: %w", err) 128 | } 129 | } 130 | 131 | if blockNum >= b.stopBlock { 132 | return false, ErrStopBlockReached 133 | } 134 | 135 | if err := b.Start(blockNum); err != nil { 136 | return false, fmt.Errorf("start active boundary: %w", err) 137 | } 138 | 139 | return true, nil 140 | } 141 | 142 | func (b *Bundler) TrackBlockProcessDuration(elapsed time.Duration) { 143 | b.stats.addProcessingDataDur(elapsed) 144 | } 145 | 146 | func (b *Bundler) Writer() writer.Writer { 147 | return b.boundaryWriter 148 | } 149 | 150 | func (b *Bundler) Start(blockNum uint64) error { 151 | boundaryRange := b.newBoundary(blockNum) 152 | b.activeBoundary = boundaryRange 153 | 154 | b.zlogger.Debug("starting new file boundary", zap.Stringer("boundary", boundaryRange)) 155 | if err := b.boundaryWriter.StartBoundary(boundaryRange); err != nil { 156 | return fmt.Errorf("start file: %w", err) 157 | } 158 | 159 | b.stats.startBoundary(boundaryRange) 160 | b.zlogger.Debug("boundary started", zap.Stringer("boundary", boundaryRange)) 161 | return nil 162 | } 163 | 164 | func (b *Bundler) stop(ctx context.Context) error { 165 | b.zlogger.Debug("stopping file boundary") 166 | 167 | file, err := b.boundaryWriter.CloseBoundary(ctx) 168 | if err != nil { 169 | return fmt.Errorf("closing file: %w", err) 170 | } 171 | 172 | if b.boundaryWriter.IsWritten() { 173 | b.zlogger.Debug("queuing boundary upload", zap.Stringer("boundary", b.activeBoundary)) 174 | 175 | b.uploadQueue.In <- &boundaryFile{ 176 | name: b.activeBoundary.String(), 177 | file: file, 178 | } 179 | } else { 180 | b.zlogger.Debug("boundary not written, skipping upload of files", zap.Stringer("boundary", b.activeBoundary)) 181 | } 182 | 183 | // Reset state 184 | b.HeaderWritten = false 185 | b.activeBoundary = nil 186 | b.stats.endBoundary() 187 | 188 | b.zlogger.Info("bundler stats", b.stats.Log()...) 189 | return nil 190 | } 191 | 192 | func (b *Bundler) newBoundary(containingBlockNum uint64) *bstream.Range { 193 | startBlock := containingBlockNum - (containingBlockNum % b.blockCount) 194 | endBlock := startBlock + b.blockCount 195 | if b.stopBlock < endBlock { 196 | endBlock = b.stopBlock 197 | } 198 | return bstream.NewRangeExcludingEnd(startBlock, endBlock) 199 | } 200 | 201 | func boundariesToSkip(lastBoundary *bstream.Range, blockNum uint64, size uint64) (out []*bstream.Range) { 202 | iter := *lastBoundary.EndBlock() 203 | endBlock := computeEndBlock(iter, size) 204 | for blockNum >= endBlock { 205 | out = append(out, bstream.NewRangeExcludingEnd(iter, endBlock)) 206 | iter = endBlock 207 | endBlock = computeEndBlock(iter, size) 208 | } 209 | return out 210 | } 211 | 212 | func computeEndBlock(startBlockNum, size uint64) uint64 { 213 | return (startBlockNum + size) - (startBlockNum+size)%size 214 | } 215 | 216 | type boundaryFile struct { 217 | name string 218 | file writer.Uploadeable 219 | } 220 | 221 | func (b *Bundler) uploadBoundary(ctx context.Context, v interface{}) (interface{}, error) { 222 | bf := v.(*boundaryFile) 223 | 224 | outputPath, err := bf.file.Upload(ctx, b.outputStore) 225 | if err != nil { 226 | return nil, fmt.Errorf("unable to upload: %w", err) 227 | } 228 | b.zlogger.Debug("boundary file uploaded", 229 | zap.String("boundary", bf.name), 230 | zap.String("output_path", outputPath), 231 | ) 232 | 233 | return bf, nil 234 | } 235 | -------------------------------------------------------------------------------- /db_changes/state/file.go: -------------------------------------------------------------------------------- 1 | package state 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "os" 8 | "sync" 9 | "time" 10 | 11 | "github.com/streamingfast/bstream" 12 | "github.com/streamingfast/dhammer" 13 | "github.com/streamingfast/dstore" 14 | "github.com/streamingfast/shutter" 15 | sink "github.com/streamingfast/substreams-sink" 16 | "github.com/streamingfast/substreams-sink-sql/db_changes/bundler/writer" 17 | "go.uber.org/zap" 18 | "gopkg.in/yaml.v3" 19 | ) 20 | 21 | var _ Store = (*FileStateStore)(nil) 22 | 23 | type FileStateStore struct { 24 | *shutter.Shutter 25 | 26 | startOnce sync.Once 27 | 28 | outputPath string 29 | outputStore dstore.Store 30 | uploadQueue *dhammer.Nailer 31 | 32 | logger *zap.Logger 33 | 34 | state *FileState 35 | } 36 | 37 | func NewFileStateStore( 38 | outputPath string, 39 | outputStore dstore.Store, 40 | logger *zap.Logger, 41 | ) (*FileStateStore, error) { 42 | s := &FileState{} 43 | 44 | content, err := os.ReadFile(outputPath) 45 | if err != nil && !os.IsNotExist(err) { 46 | return nil, fmt.Errorf("read file: %w", err) 47 | } 48 | if err != nil && os.IsNotExist(err) { 49 | s = newFileState() 50 | } 51 | 52 | if err := yaml.Unmarshal(content, s); err != nil { 53 | return nil, fmt.Errorf("unmarshal state file %q: %w", outputPath, err) 54 | } 55 | outputStore.SetOverwrite(true) 56 | f := &FileStateStore{ 57 | Shutter: shutter.New(), 58 | outputPath: outputPath, 59 | outputStore: outputStore, 60 | state: s, 61 | logger: logger, 62 | } 63 | f.uploadQueue = dhammer.NewNailer(5, f.uploadCursor, dhammer.NailerLogger(logger)) 64 | return f, nil 65 | } 66 | 67 | func (s *FileStateStore) Start(ctx context.Context) { 68 | s.OnTerminating(func(err error) { 69 | s.logger.Info("shutting down file cursor", zap.String("store", s.outputPath), zap.Error(err)) 70 | s.Close() 71 | }) 72 | 73 | s.uploadQueue.Start(ctx) 74 | 75 | go func() { 76 | for v := range s.uploadQueue.Out { 77 | bf := v.(*cursorFile) 78 | s.logger.Debug("uploaded file", zap.String("filename", bf.name)) 79 | } 80 | if s.uploadQueue.Err() != nil { 81 | s.Shutdown(fmt.Errorf("upload queue failed: %w", s.uploadQueue.Err())) 82 | } 83 | }() 84 | 85 | s.uploadQueue.OnTerminating(func(err error) { 86 | s.Shutdown(fmt.Errorf("upload queue failed: %w", s.uploadQueue.Err())) 87 | }) 88 | } 89 | 90 | type cursorFile struct { 91 | name string 92 | file writer.Uploadeable 93 | } 94 | 95 | func (s *FileStateStore) uploadCursor(ctx context.Context, v interface{}) (interface{}, error) { 96 | bf := v.(*cursorFile) 97 | 98 | outputPath, err := bf.file.Upload(ctx, s.outputStore) 99 | if err != nil { 100 | return nil, fmt.Errorf("unable to upload: %w", err) 101 | } 102 | s.logger.Debug("boundary file uploaded", 103 | zap.String("boundary", bf.name), 104 | zap.String("output_path", outputPath), 105 | ) 106 | 107 | return bf, nil 108 | } 109 | 110 | // this whole code needs to be reworked, too many things that aren't needed 111 | type localFile struct { 112 | localFilePath string 113 | outputFilename string 114 | } 115 | 116 | func (l *localFile) Upload(ctx context.Context, store dstore.Store) (string, error) { 117 | if err := store.PushLocalFile(ctx, l.localFilePath, l.outputFilename); err != nil { 118 | return "", fmt.Errorf("pushing object: %w", err) 119 | } 120 | return store.ObjectPath(l.outputFilename), nil 121 | } 122 | 123 | func (s *FileStateStore) UploadCursor(saveable Saveable) { 124 | s.uploadQueue.In <- &cursorFile{ 125 | name: "cursor.yaml", 126 | file: saveable.GetUploadeable(), 127 | } 128 | } 129 | 130 | // until here 131 | 132 | func (s *FileStateStore) Close() { 133 | s.uploadQueue.Close() 134 | s.logger.Debug("waiting till queue is drained") 135 | s.uploadQueue.WaitUntilEmpty(context.Background()) 136 | } 137 | 138 | func (s *FileStateStore) ReadCursor(ctx context.Context) (cursor *sink.Cursor, err error) { 139 | fl, err := s.outputStore.OpenObject(ctx, "state.yaml") 140 | if err != nil && err != dstore.ErrNotFound { 141 | return nil, fmt.Errorf("opening csv: %w", err) 142 | } 143 | 144 | if err != nil && err == dstore.ErrNotFound { 145 | s.state = newFileState() 146 | } else { 147 | defer fl.Close() 148 | buf := new(bytes.Buffer) 149 | buf.ReadFrom(fl) 150 | content := buf.Bytes() 151 | 152 | if err := yaml.Unmarshal(content, s.state); err != nil { 153 | return nil, fmt.Errorf("unmarshal state file %q: %w", s.outputPath, err) 154 | } 155 | } 156 | 157 | return sink.NewCursor(s.state.Cursor) 158 | } 159 | 160 | func (s *FileStateStore) NewBoundary(boundary *bstream.Range) { 161 | s.state.ActiveBoundary.StartBlockNumber = boundary.StartBlock() 162 | s.state.ActiveBoundary.EndBlockNumber = *boundary.EndBlock() 163 | } 164 | 165 | func (s *FileStateStore) SetCursor(cursor *sink.Cursor) { 166 | s.startOnce.Do(func() { 167 | restartAt := time.Now() 168 | if s.state.StartedAt.IsZero() { 169 | s.state.StartedAt = restartAt 170 | } 171 | s.state.RestartedAt = restartAt 172 | }) 173 | 174 | s.state.Cursor = cursor.String() 175 | s.state.Block = BlockState{ 176 | ID: cursor.Block().ID(), 177 | Number: cursor.Block().Num(), 178 | } 179 | } 180 | 181 | func (s *FileStateStore) GetState() (Saveable, error) { 182 | cnt, err := yaml.Marshal(s.state) 183 | if err != nil { 184 | return nil, fmt.Errorf("marshall: %w", err) 185 | } 186 | return &stateInstance{ 187 | data: cnt, 188 | path: s.outputPath + "-" + s.state.Block.ID, 189 | }, nil 190 | } 191 | 192 | type FileState struct { 193 | Cursor string `yaml:"cursor" json:"cursor"` 194 | Block BlockState `yaml:"block" json:"block"` 195 | ActiveBoundary ActiveBoundary `yaml:"active_boundary" json:"active_boundary"` 196 | 197 | // StartedAt is the time this process was launching initially without accounting to any restart, once set, this 198 | // value, it's never re-written (unless the file does not exist anymore). 199 | StartedAt time.Time `yaml:"started_at,omitempty" json:"started_at,omitempty"` 200 | // RestartedAt is the time this process was last launched meaning it's reset each time the process start. This value 201 | // in contrast to `StartedAt` change over time each time the process is restarted. 202 | RestartedAt time.Time `yaml:"restarted_at,omitempty" json:"restarted_at,omitempty"` 203 | } 204 | 205 | func newFileState() *FileState { 206 | return &FileState{ 207 | Cursor: "", 208 | Block: BlockState{"", 0}, 209 | } 210 | } 211 | 212 | type BlockState struct { 213 | ID string `yaml:"id" json:"id"` 214 | Number uint64 `yaml:"number" json:"number"` 215 | } 216 | 217 | type ActiveBoundary struct { 218 | StartBlockNumber uint64 `yaml:"start_block_number" json:"start_block_number"` 219 | EndBlockNumber uint64 `yaml:"end_block_number" json:"end_block_number"` 220 | } 221 | 222 | type stateInstance struct { 223 | data []byte 224 | path string 225 | } 226 | 227 | func (s *stateInstance) GetUploadeable() writer.Uploadeable { 228 | return &localFile{ 229 | localFilePath: s.path, 230 | outputFilename: "state.yaml", 231 | } 232 | } 233 | 234 | func (s *stateInstance) Save() error { 235 | if err := os.WriteFile(s.path, s.data, os.ModePerm); err != nil { 236 | return fmt.Errorf("unable to write state file: %w", err) 237 | } 238 | return nil 239 | } 240 | --------------------------------------------------------------------------------