├── .clippy.toml ├── .github └── workflows │ └── rust.yml ├── .gitignore ├── CLI.md ├── Cargo.toml ├── Dbdbgen.md ├── DbdbgenTutorial.md ├── Download.md ├── LICENSE ├── README.md ├── SchemaGen.md ├── Template.md ├── TemplateAdvanced.md ├── benches └── benchmark.rs ├── build.rs ├── dbdbgen ├── Cargo.toml ├── dbdbgen.libsonnet └── src │ ├── bin │ └── dbdbgen.rs │ ├── cli.rs │ ├── error.rs │ ├── jsvm.rs │ └── lib.rs ├── dbgen-playground ├── Cargo.toml ├── index.html └── src │ └── lib.rs ├── deny.toml ├── fuzz.sh ├── fuzz ├── .gitignore ├── Cargo.toml ├── corpus │ └── fuzz_target_1 │ │ ├── a.seed │ │ ├── b.seed │ │ ├── c.seed │ │ ├── d.seed │ │ ├── e.seed │ │ ├── f.seed │ │ ├── g.seed │ │ └── h.seed └── fuzz_targets │ └── fuzz_target_1.rs ├── release ├── Dockerfile ├── README.md ├── package.sh ├── playground.sh ├── publish-playground.sh └── release.sh ├── res ├── sysbench │ ├── bulk_insert.sql │ ├── oltp_uniform_mysql.sql │ ├── oltp_uniform_postgresql.sql │ └── oltp_uniform_sqlite3.sql └── tpcc │ ├── README.md │ └── tpcc.jsonnet ├── rustfmt.toml ├── src ├── array.rs ├── bin │ ├── dbgen.rs │ └── dbschemagen.rs ├── bytes.rs ├── cli.rs ├── error.rs ├── eval.rs ├── format.rs ├── functions │ ├── array.rs │ ├── codec.rs │ ├── debug.rs │ ├── mod.rs │ ├── ops.rs │ ├── rand.rs │ ├── string.rs │ └── time.rs ├── lexctr.rs ├── lib.rs ├── number.rs ├── parser.pest ├── parser.rs ├── schemagen_cli.rs ├── span.rs ├── value.rs └── writer.rs └── tests ├── check.rs └── data ├── array ├── flags.json ├── result.1.sql └── template.sql ├── check_eval_result ├── flags.json ├── result.1.sql └── template.sql ├── comment-expr ├── flags.json ├── result.1.sql └── template.sql ├── compress ├── flags.json ├── result-schema.sql ├── result.1.sql.xz └── template.sql ├── csv ├── flags.json ├── result-schema.sql ├── result.1.csv └── template.sql ├── derived-tables ├── flags.json ├── template.sql ├── test-schema-create.sql ├── test.animal-schema.sql ├── test.animal.1.sql ├── test.animal.2.sql ├── test.head-schema.sql ├── test.head.1.sql ├── test.head.2.sql ├── test.limb-schema.sql ├── test.limb.1.sql ├── test.limb.2.sql ├── test.toe-schema.sql ├── test.toe.1.sql └── test.toe.2.sql ├── div-mod ├── flags.json ├── result.1.sql └── template.sql ├── escape-backslash ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql ├── expr-in-middle ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql ├── file-size ├── a.1000.csv ├── a.1001.csv ├── a.2000.csv ├── a.2001.csv ├── b.1000.csv ├── b.1001.csv ├── b.1002.csv ├── b.2000.csv ├── b.2001.csv ├── b.2002.csv ├── b.2003.csv ├── b.2004.csv ├── b.2005.csv ├── flags.json └── template.sql ├── global-exprs ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql ├── headers-csv ├── a-schema.sql ├── a.1.csv ├── a.2.csv ├── b-schema.sql ├── b.1.csv ├── b.2.csv ├── flags.json └── template.sql ├── headers-sql ├── a-schema.sql ├── a.1.sql ├── a.2.sql ├── b-schema.sql ├── b.1.sql ├── b.2.sql ├── flags.json └── template.sql ├── jagged-output ├── flags.json ├── result-schema.sql ├── result.1.sql ├── result.2.sql ├── result.3.sql └── template.sql ├── lazy-array ├── flags.json ├── result.1.csv └── template.sql ├── rand-finite-float ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql ├── rand-weighted ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql ├── seeded-hc128 ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql ├── shuffle-with-restarts ├── flags.json ├── result-schema.sql ├── result.1.sql ├── result.2.sql └── template.sql ├── substring ├── flags.json ├── result.1.sql └── template.sql ├── uuid ├── flags.json ├── result-schema.sql ├── result.1.sql └── template.sql └── zero-children ├── flags.json ├── template.sql ├── test.a-schema.sql ├── test.a.1.sql ├── test.b-schema.sql ├── test.b.1.sql ├── test.c-schema.sql └── test.c.1.sql /.clippy.toml: -------------------------------------------------------------------------------- 1 | doc-valid-idents = [ 2 | 'ChaCha12', 3 | 'ChaCha20', 4 | 'MySQL', 5 | 'PostgreSQL', 6 | 'SQLite', 7 | '..', 8 | ] -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | timeout-minutes: 5 15 | strategy: 16 | fail-fast: true 17 | matrix: 18 | rustup: 19 | - toolchain: stable 20 | components: "clippy, rustfmt" 21 | - toolchain: nightly 22 | components: "" 23 | steps: 24 | - uses: actions/checkout@v2 25 | - uses: actions-rs/toolchain@v1 26 | name: Install Rust 27 | with: 28 | toolchain: ${{ matrix.rustup.toolchain }} 29 | profile: minimal 30 | components: ${{ matrix.rustup.components }} 31 | default: true 32 | - name: Clippy 33 | run: cargo clippy 34 | if: contains(matrix.rustup.components, 'clippy') 35 | - name: Format 36 | run: cargo fmt -- --check 37 | if: contains(matrix.rustup.components, 'rustfmt') 38 | - name: Test 39 | run: cargo test 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | rls*.log 5 | playground_bg.wasm 6 | playground.js 7 | *.tar.xz 8 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dbgen" 3 | version = "0.8.0" 4 | authors = ["kennytm "] 5 | edition = "2024" 6 | license = "MIT" 7 | description = "Generate random test cases for databases" 8 | repository = "https://github.com/kennytm/dbgen" 9 | exclude = ["fuzz.sh", "release/*"] 10 | readme = "README.md" 11 | keywords = ["cli", "generator", "database", "fake"] 12 | categories = ["command-line-utilities", "simulation"] 13 | rust-version = "1.85.0" 14 | 15 | [workspace] 16 | members = ["dbgen-playground", "dbdbgen"] 17 | 18 | [workspace.dependencies] 19 | chrono = { version = "0.4.26", default-features = false, features = ["std", "serde", "clock"] } 20 | clap = "4.5" 21 | data-encoding = "2.4" 22 | parse-size = { version = "1.0", features = ["std"] } 23 | rand = { version = "0.8.5", default-features = false, features = ["getrandom"]} 24 | rand_hc = "0.3" 25 | serde = { version = "1.0", features = ["derive"]} 26 | serde_json = "1.0" 27 | thiserror = "1.0" 28 | 29 | [dependencies] 30 | clap = { workspace = true, optional = true, features = ["derive"] } 31 | pest = "2.7" 32 | pest_derive = "2.7" 33 | thiserror = { workspace = true } 34 | rand = { workspace = true } 35 | data-encoding = { workspace = true } 36 | # syn is currently duplicated because of this, wait until next release that includes https://github.com/ia0/data-encoding/pull/108 37 | data-encoding-macro = "0.1" 38 | regex-syntax = "0.8" 39 | # switch back to `pbr` once https://github.com/a8m/pb/pull/120 is officially released 40 | pbr-120 = { version = "1.1", optional = true } 41 | num-traits = "0.2" 42 | rayon = { version = "1.7", optional = true } 43 | chrono = { workspace = true } 44 | ryu = "1.0" 45 | serde = { workspace = true } 46 | muldiv = { version = "1.0", optional = true } 47 | rand_distr = { version = "0.4", default-features = false, features = ["alloc"] } 48 | rand_regex = "0.17" 49 | rand_pcg = { version = "0.3", optional = true } 50 | rand_isaac = { version = "0.3", optional = true } 51 | rand_chacha = { version = "0.3", optional = true } 52 | rand_hc = { workspace = true } 53 | rand_xorshift = { version = "0.3", optional = true } 54 | shlex = { version = "1.1", optional = true } 55 | flate2 = { version = "1.0", optional = true } 56 | xz2 = { version = "0.1", optional = true } 57 | zstd = { version = "0.13", default-features = false, optional = true } 58 | smallvec = { version = "1.13", default-features = false, features = ["union"] } 59 | memchr = "2.5" 60 | numcmp = "0.1" 61 | parse-size = { workspace = true, optional = true } 62 | fastrand = { version = "2.1", default-features = false } 63 | auto_enums = "0.8" 64 | 65 | [dev-dependencies] 66 | regex = { version = "1.9", default-features = false } 67 | tempfile = "3.7" 68 | serde_json = { workspace = true } 69 | diff = "0.1" 70 | criterion = "0.5" 71 | 72 | [[bench]] 73 | name = "benchmark" 74 | harness = false 75 | 76 | [build-dependencies] 77 | vergen = { version = "8.2", default-features = false, features = ["git", "gitcl", "cargo"] } 78 | 79 | [features] 80 | default = ["cli"] 81 | cli = [ 82 | "dep:clap", 83 | "dep:pbr-120", 84 | "dep:rayon", 85 | "serde/derive", 86 | "dep:muldiv", 87 | "dep:rand_pcg", 88 | "dep:rand_isaac", 89 | "dep:rand_chacha", 90 | "dep:rand_xorshift", 91 | "dep:shlex", 92 | "dep:flate2", 93 | "dep:xz2", 94 | "dep:zstd", 95 | "dep:parse-size", 96 | ] 97 | nightly = ["rand/nightly"] 98 | 99 | [[bin]] 100 | name = "dbgen" 101 | required-features = ["cli"] 102 | 103 | [[bin]] 104 | name = "dbschemagen" 105 | required-features = ["cli"] 106 | 107 | [profile.release] 108 | panic = "abort" 109 | lto = true 110 | codegen-units = 1 111 | debug = 2 112 | split-debuginfo = "packed" 113 | strip = true 114 | -------------------------------------------------------------------------------- /Dbdbgen.md: -------------------------------------------------------------------------------- 1 | `dbdbgen` Reference 2 | =================== 3 | 4 | ```sh 5 | dbdbgen res/tpcc.jsonnet -w 50 -o ./tpcc_out 6 | ``` 7 | 8 | `dbdbgen` is a metaprogram generating a random database of many tables through 9 | multiple invocation of `dbgen`. It can be used to populate a set of related 10 | tables such as those used in TPC-C and TPC-H benchmarks. 11 | 12 | Usage 13 | ----- 14 | 15 | ```sh 16 | dbdbgen [--dry-run] [--allow-import] program.jsonnet ... 17 | ``` 18 | 19 | * `program.jsonnet ...` 20 | 21 | The program describing how the database is generated, followed by the 22 | arguments passed into the program. 23 | 24 | * `--dry-run` 25 | 26 | Runs the program without running `dbgen`. The evaluated steps are printed to 27 | stdout as JSON with comment, which can in turn be used again as a program. 28 | 29 | * `--allow-import` 30 | 31 | Allows the `import` and `importstr` constructs to read from the file system. 32 | The default is false, meaning only `import 'dbdbgen.libsonnet'` is allowed. 33 | 34 | The input to `dbdbgen` is a [Jsonnet](https://jsonnet.org/) file, specifying the 35 | command line interface and what arguments to be passed into `dbgen`. The 36 | workflow is like this: 37 | 38 | 1. First, it evaluates `$.args` to construct a command line parser, consuming 39 | the remaining arguments passed into `dbdbgen`. 40 | 2. Then, it evaluates `$.steps(m)` where `m` is the matches from CLI, and 41 | produces a list of instructions. 42 | 3. Finally, it executes those `dbgen` commands sequentially. 43 | 44 | Specification 45 | ------------- 46 | 47 | The content of the Jsonnet file should be typed like `Spec` below. 48 | 49 | ```typescript 50 | type Spec = { 51 | steps: ((matches: {[key: string]: Match}) => Step[]) | Step[], 52 | 53 | name: string, 54 | version: string, 55 | about: string, 56 | args: {[key: string]: Arg}, 57 | }; 58 | ``` 59 | 60 | ### Arg 61 | 62 | ```typescript 63 | type Arg = { 64 | short: string, 65 | long: string, 66 | help: string, 67 | required: boolean, 68 | default: string | null, 69 | type: 'bool' | 'str' | 'int' | 'size' | {choices: {choices: string[], multiple: boolean}} 70 | }; 71 | ``` 72 | 73 | * **short** (default: `''`) 74 | 75 | Short name of the argument, e.g. `short: 'f'` means the CLI accepts `-f`. 76 | 77 | If this field is empty, the argument does not have any short name. 78 | 79 | * **long** (default: *key*) 80 | 81 | Long name of the argument, e.g. `long: 'output-format'` means the CLI 82 | accepts `--output-format`. 83 | 84 | If this field is absent, the key used to introduced the argument is taken as 85 | the long name, e.g. in 86 | 87 | ```js 88 | { 89 | args: { 90 | example: { // <-- key = 'example' 91 | help: '...' 92 | } 93 | } 94 | } 95 | ``` 96 | 97 | the argument "example" implicitly contains `long: 'example'`, meaning the 98 | CLI accepts `--example`. 99 | 100 | * **help** (default: `''`) 101 | 102 | Human readable description of the argument shown in the `--help` screen. 103 | 104 | * **required** (default: `false`) 105 | 106 | Whether this argument is required. If set to true, the user must provide 107 | this argument or otherwise `dbdbgen` will exit. 108 | 109 | * **default** (default: `null`) 110 | 111 | The default input to use if the argument is absent. It represents the user 112 | input and thus must be a string regardless of the output type. 113 | 114 | This field is ignored when `required: true` or `type: 'bool'`. 115 | 116 | * **type** (default: `'str'`) 117 | 118 | The output type. Should be one of: 119 | 120 | | Value | Description | 121 | |----------|-------------| 122 | | `'bool'` | The argument is a flag, taking no input (present = true, absent = false). | 123 | | `'str'` | The argument is an arbitrary string. | 124 | | `'int'` | The argument is an unsigned decimal integer. Errors on non-integer. | 125 | | `'size'` | The argument is an unsigned integer for file byte size (e.g. `1 MiB`). Errors on non-integer. | 126 | | `'float'` | The argument is a floating point number. Errors on non-number. | 127 | | `{choices:…}` | The argument must be selected from the strings listed in the choices. | 128 | 129 | The function `dbdbgen.choices(['x', 'y'], multiple=b)` is equivalent 130 | to the object `{choices: {choices: ['x', 'y'], multiple: b}}`. Using the 131 | function is recommended. 132 | 133 | When **choices**.**multiple** is false, the output is a string. When 134 | **choices**.**multiple** is true, the output is a string array. 135 | 136 | Besides the **args** field, the **name**, **version** and **about** fields 137 | provide additional human-readable description of the program. They are shown in 138 | the `--help` screen. 139 | 140 | ### Match 141 | 142 | ```typescript 143 | type Match = boolean | string | number | string[]; 144 | ``` 145 | 146 | The **steps** field can either by an array of steps, or a function returning an 147 | array of steps. The function takes a map of "matches". 148 | 149 | The map keys form a subset of the keys of **args**. When the user did not 150 | provide an argument and it has no default value, the key will be missing from 151 | the matches. 152 | 153 | A special treatment is made for the `seed` key. If the `seed` key is missing 154 | from the matches, `dbdbgen` will generate a random 64-digit hex string and add 155 | to the final matches. This gives the program a source of randomness always. 156 | 157 | ### Step 158 | 159 | ```typescript 160 | interface Step { 161 | qualified: boolean, 162 | table_name: string | null, 163 | schema_name: string | null, 164 | out_dir: string, 165 | total_count: number, 166 | rows_per_file: number, 167 | size: number | null, 168 | escape_backslash: boolean, 169 | template_string: string, 170 | seed: string | null, 171 | jobs: number, 172 | rng: 'chacha12' | 'chacha20' | 'hc128' | 'isaac' | 'isaac64' | 'xorshift' | 'pcg32' | 'step', 173 | quiet: boolean, 174 | now: string | null, 175 | format: 'sql' | 'csv' | 'sql-insert-set', 176 | format_true: string | null, 177 | format_false: string | null, 178 | format_null: string | null, 179 | headers: boolean, 180 | compression: 'gzip' | 'xz' | 'zstd' | null, 181 | compress_level: number, 182 | components: ('schema' | 'table' | 'data')[], 183 | initialize: string[], 184 | } 185 | ``` 186 | 187 | Each step describes the arguments sent to `dbgen`. They correspond to the 188 | [`dbgen` CLI arguments](CLI.md). 189 | 190 | | Field | `dbgen` CLI argument | Default value | 191 | |-------|----------------------|---------------| 192 | | qualified | `--qualified` | false | 193 | | table_name | `--table-name` | null | 194 | | schema_name | `--schema-name` | null | 195 | | out_dir | `-o`/`--out-dir` | **required** | 196 | | total_count | `-N`/`--total-count` | 1 | 197 | | rows_per_file | `-R`/`--rows-per-file` | 1 | 198 | | size | `-z`/`--size` | null | 199 | | escape_backslash | `--escape-backslash` | false | 200 | | template_string | `-e`/`--template-string` | **required** | 201 | | seed | `-s`/`--seed` | null | 202 | | jobs | `-j`/`--jobs` | 0 | 203 | | rng | `--rng` | 'hc128' | 204 | | quiet | `-q`/`--quiet` | false | 205 | | now | `--now` | null | 206 | | format | `-f`/`--format` | 'sql' | 207 | | format_true | `--format-true` | null | 208 | | format_false | `--format-false` | null | 209 | | format_null | `--format-null` | null | 210 | | headers | `--headers` | false | 211 | | compression | `-c`/`--compression` | null | 212 | | compress_level | `--compress-level` | 6 | 213 | | components | `--components` | ['table', 'data'] | 214 | | initialize | `-D`/`--initialize` | [] | 215 | 216 | Supplemental library 217 | -------------------- 218 | 219 | Besides the standard Jsonnet library (`std`), `dbdbgen` also bundles with a 220 | [supplemental library](dbdbgen/dbdbgen.libsonnet) which can be imported with 221 | 222 | ```jsonnet 223 | local dbdbgen = import 'dbdbgen.libsonnet'; 224 | ``` 225 | 226 | The name `'dbdbgen.libsonnet'` always refer this built-in library. Even if a 227 | file with this name exists in the local file system and `--allow-import` is 228 | enabled, `dbdbgen` will still read the built-in one instead. 229 | 230 | The library currently consists of the following fields: 231 | 232 | * `dbdbgen.stdArgs` 233 | 234 | The standard `dbgen`-compatible arguments that can be used as the **args** 235 | field in the program. 236 | 237 | Note that this is a field, not a function. 238 | 239 | * `dbdbgen.choices(choices, multiple=false)` 240 | 241 | Produces a value used in **args[].type**, representing an argument taking 242 | value from one of the given choices. 243 | 244 | * `dbdbgen.xorSeed(seed, salt)` 245 | 246 | Given two strings of hex-digits, computes their bitwise-XOR. Example: 247 | 248 | ```jsonnet 249 | std.assertEqual(dbdbgen.xorSeed('1234abcd', '1357fedc'), '01635511') 250 | ``` 251 | 252 | * `dbdbgen.sha256(s)` 253 | 254 | Computes the SHA-256 hash of a string. Example: 255 | 256 | ```jsonnet 257 | std.assertEqual( 258 | dbdbgen.sha256('dbgen'), 259 | 'c069fb143dccd2e66d526e631d13d8511934a34f1cf4df95f0137ffe2d8287a8') 260 | ``` 261 | -------------------------------------------------------------------------------- /DbdbgenTutorial.md: -------------------------------------------------------------------------------- 1 | `dbdbgen` Tutorial 2 | ================== 3 | 4 | In this example, we will to create a `dbdbgen` program to generate a table with 5 | M columns and N rows populated with 0 or 1 randomly. 6 | 7 | `dbdbgen` programs are written in [Jsonnet](https://jsonnet.org/). We strongly 8 | recommend learning the Jsonnet language before continuing. You should also be 9 | familiar with the `dbgen` [template](./Template.md) language. 10 | 11 | ## Getting started 12 | 13 | We first create a Jsonnet file filled with basic information about the program: 14 | 15 | ```jsonnet 16 | // rand01.jsonnet 17 | { 18 | name: 'rand01', 19 | version: '0.1.0', 20 | about: 'Just a sample.', 21 | } 22 | ``` 23 | 24 | `dbdbgen` is able to recognize this basic skeleton. Its help screen shows the 25 | program's name, version and description: 26 | 27 | ```console 28 | $ dbdbgen rand01.jsonnet --help 29 | Just a sample. 30 | 31 | Usage: dbdbgen rand01 32 | 33 | Options: 34 | -h, --help 35 | Print help 36 | -V, --version 37 | Print version 38 | ``` 39 | 40 | ## First step 41 | 42 | In a `dbdbgen` program, we place the instructions into the **steps** field. 43 | 44 | ```jsonnet 45 | { 46 | steps: [], 47 | } 48 | ``` 49 | 50 | Let's start by producing a table with 1 column and 1 row. The `dbgen` template 51 | is: 52 | 53 | ```sql 54 | CREATE TABLE rand01 ( 55 | col1 integer /*{{ rand.range_inclusive(0, 1) }}*/ 56 | ); 57 | ``` 58 | 59 | In a `dbdbgen` program, we put this as a string into the **template_string** 60 | field in a **steps** item. The output directory can also be placed into 61 | **out_dir** field: 62 | 63 | ```jsonnet 64 | { 65 | steps: [ 66 | { 67 | out_dir: 'rand01', 68 | template_string: ||| 69 | CREATE TABLE rand01 ( 70 | col1 integer /*{{ rand.range_inclusive(0, 1) }}*/ 71 | ); 72 | |||, 73 | }, 74 | ], 75 | } 76 | ``` 77 | 78 | Running this will produce the `./rand01` directory, containing the SQL dump of 79 | this 1×1 table. 80 | 81 | ```console 82 | $ dbdbgen rand01.jsonnet 83 | step 1 / 1 84 | Using seed: f376c0d7eb308d19858fe3286cc8900fe3a50bbd3f1daafab9f80cec72b5e22d 85 | Done! 86 | Size 31 B / 31 B 🕒 61 B/s 87 | 88 | $ ls rand01 89 | rand01.1.sql rand01-schema.sql 90 | 91 | $ cat rand01/rand01.1.sql 92 | INSERT INTO rand01 VALUES 93 | (0); 94 | ``` 95 | 96 | ## Simple arguments 97 | 98 | It is a bad idea to hard-code the output directory into the program. `dbdbgen` 99 | thus allows programs to accept additional command line arguments through the 100 | **args** field. 101 | 102 | ```jsonnet 103 | { 104 | args: {}, 105 | steps: [], 106 | } 107 | ``` 108 | 109 | Here, we require the user to provide an `--out-dir`/`-o` argument: 110 | 111 | ```jsonnet 112 | { 113 | name: 'rand01', 114 | version: '0.2.0', 115 | about: 'Demonstrating the --out-dir flag.', 116 | args: { 117 | out_dir: { 118 | short: 'o', // we accept '-o' 119 | long: 'out-dir', // we accept '--out-dir' 120 | help: 'Output directory.', 121 | required: true, // the argument must be provided 122 | type: 'str', // the argument returns a string 123 | }, 124 | }, 125 | steps: [], 126 | } 127 | ``` 128 | 129 | If we run the program without `--out-dir`/`-o`, an error would occur. We can 130 | also check the updated help screen. 131 | 132 | ```console 133 | $ dbdbgen rand01.jsonnet 134 | error: the following required arguments were not provided: 135 | --out-dir 136 | 137 | Usage: dbdbgen rand01 --out-dir 138 | 139 | For more information, try '--help'. 140 | 141 | $ dbdbgen rand01.jsonnet --help 142 | Demonstrating the --out-dir flag. 143 | 144 | Usage: dbdbgen rand01 --out-dir 145 | 146 | Options: 147 | -o, --out-dir 148 | Output directory. 149 | -h, --help 150 | Print help 151 | -V, --version 152 | Print version 153 | ``` 154 | 155 | When the arguments are defined, the **steps** should be defined as a function to 156 | accept the result: 157 | 158 | ```jsonnet 159 | { 160 | name: 'rand01', 161 | version: '0.2.1', 162 | about: 'Demonstrating the --out-dir flag.', 163 | args: { 164 | out_dir: { 165 | short: 'o', 166 | long: 'out-dir', 167 | help: 'Output directory.', 168 | required: true, 169 | type: 'str', 170 | }, 171 | }, 172 | 173 | steps(matches):: [ // matches is a map of CLI matches. 174 | { 175 | out_dir: matches.out_dir, // corresponds to the "out_dir" key in args 176 | template_string: ||| 177 | CREATE TABLE rand01 ( 178 | col1 integer /*{{ rand.range_inclusive(0, 1) }}*/ 179 | ); 180 | |||, 181 | } 182 | ], 183 | } 184 | ``` 185 | 186 | Now we can use the `-o` flag to set the output directory. 187 | 188 | ```console 189 | $ dbdbgen rand01.jsonnet -o rand01_2 190 | step 1 / 1 191 | Using seed: 58a5ca50c2b1abd3b8006524d74304ab65fd04750dbe4ff624bc057b30c71ed1 192 | Done! 193 | Size 31 B / 31 B 🕒 62 B/s 194 | 195 | $ ls rand01_2/ 196 | rand01.1.sql rand01-schema.sql 197 | ``` 198 | 199 | ## Standard arguments 200 | 201 | `dbgen` has a lot of configuration flags, it would be annoying to define them 202 | manually in every `dbdbgen` program. Therefore, `dbdbgen` provides a 203 | [supplemental library "`dbdbgen.libsonnet`"](dbdbgen/dbdbgen.libsonnet) with a 204 | predefined set of standard arguments. 205 | 206 | ```jsonnet 207 | local dbdbgen = import 'dbdbgen.libsonnet'; // import the supplemental library 208 | { 209 | name: 'rand01', 210 | version: '0.3.0', 211 | about: 'Demonstrating the standard arguments', 212 | args: dbdbgen.stdArgs, // use the standard dbgen arguments. 213 | steps: [], 214 | } 215 | ``` 216 | 217 | The matches returned by `dbdbgen.stdArgs` are compatible with **steps** items 218 | and can be used directly. 219 | 220 | ```jsonnet 221 | local dbdbgen = import 'dbdbgen.libsonnet'; 222 | { 223 | name: 'rand01', 224 | version: '0.3.1', 225 | about: 'Demonstrating the standard arguments', 226 | args: dbdbgen.stdArgs, 227 | steps(matches): [ 228 | matches { // just add the template_string field to the matches. 229 | template_string: ||| 230 | CREATE TABLE rand01 ( 231 | col1 integer /*{{ rand.range_inclusive(0, 1) }}*/ 232 | ); 233 | |||, 234 | }, 235 | ], 236 | } 237 | ``` 238 | 239 | We can then use the standard `dbgen` CLI arguments like `--total-count`/`-N` and `--rows-per-file`/`-R` in this custom program: 240 | 241 | ```console 242 | $ dbdbgen rand01.jsonnet -h 243 | dbdbgen-rand01 0.3.1 244 | Demonstrating the standard arguments 245 | 246 | Usage: dbdbgen rand01 [OPTIONS] --out-dir 247 | 248 | Options: 249 | --components 250 | Components to write. [default: table,data] [possible values: schema, table, data] 251 | 252 | — « skipped » — 253 | 254 | -N, --total-count 255 | Total number of rows of the main table. 256 | -h, --help 257 | Print help 258 | -V, --version 259 | Print version 260 | 261 | $ dbdbgen rand01.jsonnet -o rand01_3 -N 3 -R 3 -f csv 262 | step 1 / 1 263 | Using seed: 12f1f00389a3034ad192e27dcec5631353b29cc18f6744b774b97051d2c868a1 264 | Done! 265 | Size 6 B / 6 B 🕒 12 B/s 266 | 267 | $ ls rand01_3/ 268 | rand01.1.csv rand01-schema.sql 269 | 270 | $ cat rand01_3/rand01.1.csv 271 | 1 272 | 1 273 | 0 274 | ``` 275 | 276 | ## Final result 277 | 278 | Finally, we construct the program for the original purpose: generates a table 279 | with M columns and N rows. We want the user to provide the number M: 280 | 281 | ```jsonnet 282 | local dbdbgen = import 'dbdbgen.libsonnet'; 283 | { 284 | name: 'rand01', 285 | version: '0.4.0', 286 | about: 'Generates an M×N table.', 287 | args: dbdbgen.stdArgs { 288 | columns_count: { 289 | short: 'M', 290 | long: 'columns-count', 291 | help: 'Number of columns.', 292 | required: true, 293 | type: 'int', 294 | }, 295 | }, 296 | steps: [], 297 | } 298 | ``` 299 | 300 | We can also modify the standard argument. For instance, we want to 301 | * Make `--total-count`/`-N` required too, instead of the default 1 302 | * Make `--rows-per-file`/`-R` default to a larger number, instead of the default 1 303 | * Disable irrelevant flags like `--escape-backslash` and `--now`. 304 | 305 | The outcome is this: 306 | 307 | ```jsonnet 308 | local dbdbgen = import 'dbdbgen.libsonnet'; 309 | { 310 | name: 'rand01', 311 | version: '0.4.1', 312 | about: 'Generates an M×N table.', 313 | args: dbdbgen.stdArgs { 314 | columns_count: { 315 | short: 'M', 316 | long: 'columns-count', 317 | help: 'Number of columns.', 318 | required: true, 319 | type: 'int', 320 | }, 321 | 322 | // modify the existing arguments with the `+:` syntax 323 | total_count+: { required: true }, 324 | rows_per_file+: { default: '1000' }, 325 | 326 | // remove existing arguments by hiding them 327 | escape_backslash:: null, 328 | now:: null, 329 | }, 330 | steps: [], 331 | } 332 | ``` 333 | 334 | Finally, fill in the step: 335 | 336 | ```jsonnet 337 | local dbdbgen = import 'dbdbgen.libsonnet'; 338 | { 339 | name: 'rand01', 340 | version: '0.4.2', 341 | about: 'Generates an M×N table.', 342 | args: dbdbgen.stdArgs { 343 | columns_count: { 344 | short: 'M', 345 | long: 'columns-count', 346 | help: 'Number of columns.', 347 | required: true, 348 | type: 'int', 349 | }, 350 | total_count+: { required: true }, 351 | rows_per_file+: { default: '1000' }, 352 | escape_backslash:: null, 353 | now:: null, 354 | }, 355 | 356 | steps(m): 357 | local col_fmt = 'col%d integer /*{{ rand.range_inclusive(0, 1) }}*/'; 358 | local columns = [col_fmt % i for i in std.range(1, m.columns_count)]; 359 | local template = 'CREATE TABLE rand01(%s);' % std.join(',', columns); 360 | [ m { template_string: template } ], 361 | } 362 | ``` 363 | 364 | Execute this to get our desired result: 365 | 366 | ```console 367 | $ dbdbgen rand01.jsonnet -M 6 -N 5 -o rand01_4 -f csv 368 | step 1 / 1 369 | Using seed: 394dd95f85ccea9f2c001a8cf3df6732dc05c24ff649462e6586bc5d76c40269 370 | Done! 371 | Size 60 B / 60 B 🕒 120 B/s 372 | 373 | $ ls rand01_4/ 374 | rand01.1.csv rand01-schema.sql 375 | 376 | $ cat rand01_4/rand01-schema.sql 377 | CREATE TABLE rand01 (col1 integer,col2 integer,col3 integer,col4 integer,col5 integer,col6 integer); 378 | 379 | $ cat rand01_4/rand01.1.csv 380 | 0,0,0,0,1,1 381 | 0,0,1,0,1,0 382 | 0,0,1,0,0,1 383 | 0,1,1,1,0,1 384 | 1,0,0,1,1,0 385 | ``` 386 | -------------------------------------------------------------------------------- /Download.md: -------------------------------------------------------------------------------- 1 | Download and Install 2 | ==================== 3 | 4 | Pre-built binaries (Linux x86_64) 5 | --------------------------------- 6 | 7 | Pre-built binaries are provided for Linux (with glibc 2.14+) in x86_64 (amd64) architecture. 8 | 9 | 1. Go to the GitHub releases page . 10 | 11 | 2. Scroll to the **Assets** section. 12 | 13 | 3. Download the file with name like `dbgen-vX.Y.Z-x86_64-unknown-linux-gnu.tar.xz`. 14 | 15 | 4. Extract the archive. The executables inside can be run immediately. 16 | 17 | ```sh 18 | tar xf dbgen-*.tar.xz 19 | 20 | chmod a+x bin/* 21 | 22 | bin/dbgen --help 23 | ``` 24 | 25 | Install via cargo 26 | ----------------- 27 | 28 | On other platforms, `dbgen` can be built from source via `cargo install`. 29 | 30 | 1. Install a C compiler (e.g. `gcc` or `clang`) and `pkg-config` tool. 31 | These can typically be found in the system package manager. 32 | 33 | 2. Install the latest stable 34 | [Rust compiler with Cargo package manager](https://www.rust-lang.org/tools/install). 35 | 36 | 3. Once `cargo` is installed, run 37 | 38 | ```sh 39 | cargo install dbgen 40 | ``` 41 | 42 | to build and install `dbgen` into `~/.cargo/bin/`. 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Kenny Chan 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | `dbgen`: Database generator 2 | =========================== 3 | 4 | [![Crates.io](https://img.shields.io/crates/v/dbgen.svg)](https://crates.io/crates/dbgen) 5 | [![Build status](https://github.com/kennytm/dbgen/workflows/Rust/badge.svg)](https://github.com/kennytm/dbgen/actions?query=workflow%3ARust) 6 | [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE.txt) 7 | 8 | `dbgen` is a program to quickly generate random SQL dump of a table following a given set of 9 | expressions. 10 | 11 | * Usage 12 | * [Download and install](Download.md) 13 | * [Table generator `dbgen`](CLI.md) 14 | * [Schema generator `dbschemagen`](SchemaGen.md) 15 | 16 | * Reference 17 | * [Template reference](Template.md) 18 | * [Advanced template features](TemplateAdvanced.md) 19 | 20 | * Database generator `dbdbgen` 21 | * [`dbdbgen` tutorial](DbdbgenTutorial.md) 22 | * [`dbdbgen` reference](Dbdbgen.md) 23 | 24 | * [WASM playground](https://kennytm.github.io/dbgen/) 25 | -------------------------------------------------------------------------------- /SchemaGen.md: -------------------------------------------------------------------------------- 1 | Schema generator CLI usage 2 | ========================== 3 | 4 | ```sh 5 | dbschemagen -d mysql -s test_db -z 1e9 -t 5 -- --escape-backslash > gen.sh 6 | sh gen.sh 7 | ``` 8 | 9 | Common options 10 | -------------- 11 | 12 | * `-d «DIALECT»`, `--dialect «DIALECT»` 13 | 14 | Choose the SQL dialect of the generated schema files. This mainly controls the data type names. 15 | 16 | * `-s «NAME»`, `--schema-name «NAME»` 17 | 18 | The qualified schema name. 19 | 20 | * `-z «SIZE»`, `--size «SIZE»` 21 | 22 | The estimated total size of the generated data file. 23 | 24 | * `-t «N»`, `--tables-count «N»` 25 | 26 | Number of tables to generate. 27 | 28 | Note that `dbschemagen` will *not* uniformly distribute the same size to every file; rather, 29 | they're assigned following to Lomax distribution to simulate the size of real-world databases. 30 | 31 | * `-- «args»...` 32 | 33 | Any extra arguments will be passed to the `dbgen` invocations. 34 | 35 | 36 | More options 37 | ------------ 38 | 39 | * `-n «N»`, `--inserts-count «N»` 40 | 41 | Number of INSERT statements per file. 42 | 43 | * `-r «N»`, `--rows-count «N»` 44 | 45 | Number of rows per INSERT statement. 46 | 47 | * `--seed «SEED»` 48 | 49 | Provide a 64-digit hex number to seed the random number generator, so that the output becomes 50 | reproducible. If not specified, the seed will be obtained from the system entropy. 51 | 52 | (Note: There is no guarantee that the same seed will produce the same output across major 53 | versions of `dbschemagen`.) 54 | 55 | -------------------------------------------------------------------------------- /TemplateAdvanced.md: -------------------------------------------------------------------------------- 1 | Advanced template features 2 | ========================== 3 | 4 | ## Global expressions 5 | 6 | The `{{ … }}` blocks can be placed before the first CREATE TABLE statement. These expressions would 7 | be evaluated once, and will not be written into the generated files. This is useful to define global 8 | constants used by all rows. 9 | 10 | ```sql 11 | {{ @dirs := array['North', 'West', 'East', 'South'] }} 12 | CREATE TABLE cardinals ( 13 | t INTEGER {{ rownum }}, 14 | d1 VARCHAR(5) {{ @dirs[rand.zipf(4, 0.8)] }}, 15 | d2 VARCHAR(5) {{ @dirs[rand.zipf(4, 0.8)] }} 16 | ); 17 | ``` 18 | 19 | Variables assigned in global expressions can be re-assigned, but the change is localized in the 20 | current file generator thread. Every new thread would be initialized by the same evaluated values. 21 | For instance if we generate 2 files given this template: 22 | 23 | ```sql 24 | {{ @value := rand.range(0, 100000) }} 25 | CREATE TABLE _ ( 26 | p INTEGER {{ @value }}, 27 | n INTEGER {{ @value := rand.range(0, 100000) }} 28 | ); 29 | ``` 30 | 31 | We may get 32 | 33 | ```sql 34 | ------ first file ------- 35 | INSERT INTO _ VALUES 36 | (58405, 87322), 37 | (87322, 41735), 38 | (41735, 91701); 39 | 40 | ------ second file ------ 41 | INSERT INTO _ VALUES 42 | (58405, 3046), 43 | (3046, 8087), 44 | (8087, 26211); 45 | ``` 46 | 47 | Note that the initial `@value` are the same for both files (`58405`), because `rand.range()` is only 48 | evaluated once. After generation started, though, each file acquires its own state and we see they 49 | evaluate `@value` differently without any interference. 50 | 51 | ## Derived tables 52 | 53 | In a relational database, contents of tables are related to each other, e.g. 54 | 55 | ```sql 56 | CREATE TABLE "parent" ( 57 | "parent_id" UUID PRIMARY KEY, 58 | "child_count" INT UNSIGNED NOT NULL 59 | ); 60 | 61 | CREATE TABLE "child" ( 62 | "child_id" UUID PRIMARY KEY, 63 | "parent_id" UUID NOT NULL REFERENCES "parent"("parent_id") 64 | ); 65 | ``` 66 | 67 | We want the two tables to be related such that: 68 | 69 | * `child.parent_id` refer to real IDs in the `parent` table 70 | * `parent.child_count` is an actual count of rows in `child` table having the specified `parent_id`. 71 | * `parent.child_count` are still random. 72 | 73 | These two tables therefore must be generated together. `dbgen` supports generating *derived tables* 74 | from the previous tables with this syntax: 75 | 76 | ```sql 77 | CREATE TABLE "parent" ( 78 | "parent_id" UUID PRIMARY KEY, 79 | /*{{ @parent_id := rand.uuid() }}*/ 80 | "child_count" INT UNSIGNED NOT NULL 81 | /*{{ @child_count := rand.range_inclusive(0, 4) }}*/ 82 | ); 83 | 84 | /*{{ for each row of "parent" generate @child_count rows of "child" }}*/ 85 | CREATE TABLE "child" ( 86 | "child_id" UUID PRIMARY KEY, 87 | /*{{ rand.uuid() }}*/ 88 | "parent_id" UUID NOT NULL REFERENCES "parent"("parent_id") 89 | /*{{ @parent_id }}*/ 90 | ); 91 | ``` 92 | 93 | This may produce 94 | 95 | ```sql 96 | ------ parent.1.sql ------ 97 | INSERT INTO "parent" VALUES 98 | ('451b789a-3438-4d6b-847e-ac6bb0d61988', 0), 99 | ('55200ffe-2304-4b68-a1a8-8467fbcbb339', 4), 100 | ('0082fa2d-c553-46df-aa61-7182accf1ea7', 2), 101 | ('c488c641-a92e-405c-870b-1e10a213e456', 1), 102 | … 103 | 104 | ------ child.1.sql ------- 105 | INSERT INTO "child" VALUES 106 | ('49188e47-d0da-4f1e-8c82-156138bb4887', '55200ffe-2304-4b68-a1a8-8467fbcbb339'), 107 | ('0251ec50-8039-4e59-a04f-fc8143a9d278', '55200ffe-2304-4b68-a1a8-8467fbcbb339'), 108 | ('4dddc583-b175-4814-a677-02fa4ec295b8', '55200ffe-2304-4b68-a1a8-8467fbcbb339'), 109 | ('fb8bab0d-8f3a-4cf8-891d-d2ad6e7aac28', '55200ffe-2304-4b68-a1a8-8467fbcbb339'), 110 | ('1feb2f81-6000-4191-8cc3-95acbd3f1723', '0082fa2d-c553-46df-aa61-7182accf1ea7'), 111 | ('63e44b85-1779-4508-9598-c94df3eee10e', '0082fa2d-c553-46df-aa61-7182accf1ea7'), 112 | ('77d13d62-12ea-4fe7-98c5-35cb0f1daece', 'c488c641-a92e-405c-870b-1e10a213e456'), 113 | … 114 | ``` 115 | 116 | There can be multiple derived tables, and it can refer to any table before it as the generator. 117 | 118 | ```sql 119 | CREATE TABLE A ( … ); 120 | /*{{ for each row of A generate 2 rows of B }}*/ 121 | CREATE TABLE B ( … ); 122 | /*{{ for each row of B generate 1 row of C }}*/ 123 | CREATE TABLE C ( … ); 124 | /*{{ for each row of A generate 4 rows of D }}*/ 125 | CREATE TABLE D ( … ); 126 | ``` 127 | 128 | All derived rows share the same set of variables. Variables can be used to establish common values 129 | among the group of tables. 130 | 131 | ### `rownum` and `subrownum` 132 | 133 | In a derived table, `rownum` refers to the row number of the *main* table. If we generate 10 derived 134 | rows for each main row, all 10 rows will produce the same `rownum`. 135 | 136 | You can distinguish between derived rows of the same `rownum` using the `subrownum` symbol, which 137 | has values 1, 2, …, 10 if we generate 10 rows. 138 | 139 | ```sql 140 | -- INPUT: template.sql 141 | CREATE TABLE main ( … ); 142 | /*{{ for each row of main generate 3 rows of derived }}*/ 143 | CREATE TABLE derived ( 144 | rn INT /*{{ rownum }}*/, 145 | srn INT /*{{ subrownum }}*/, 146 | … 147 | ); 148 | 149 | -- RESULT: derived.1.sql 150 | INSERT INTO derived VALUES 151 | (1, 1, …), 152 | (1, 2, …), 153 | (1, 3, …), 154 | (2, 1, …), 155 | (2, 2, …), 156 | (2, 3, …), 157 | … 158 | ``` 159 | 160 | With a derived table hierarchy, the `rownum` always refer to the top table, and `subrownum` always 161 | refer to the current table. If you need the row numbers of the tables in between, store them into a 162 | variable, e.g. 163 | 164 | ```sql 165 | -- INPUT: template.sql 166 | CREATE TABLE "top" ( top_id INT /*{{ rownum }}*/, … ); 167 | /*{{ for each row of "top" generate 2 rows of "middle" }}*/ 168 | CREATE TABLE "middle" ( middle_id INT /*{{ @middle_id := subrownum }}*/, … ); 169 | /*{{ for each row of "middle" generate 2 rows of "bottom" }}*/ 170 | CREATE TABLE "bottom" ( 171 | top_id INT /*{{ rownum }}*/, 172 | middle_id INT /*{{ @middle_id }}*/, 173 | bottom_id INT /*{{ subrownum }}*/, 174 | … 175 | ); 176 | ``` 177 | 178 | ### File size concern 179 | 180 | Derived tables do not have individual `--total-count`, `--rows-per-file` and `--rows-count` 181 | settings. In particular, if we set `for each row of "main" generate N rows of "derived"`, the actual 182 | number of rows per INSERT statements of the derived table will be N times `--row-count` of the main 183 | table (deeper derivatives will cascade). This may produce excessively large tables when the number 184 | of rows to generate is huge. Therefore, if it is possible to generate the values independently, we 185 | recommend using two separate templates instead of derived tables. 186 | 187 | 188 | 189 | 190 | 191 |
NOT recommended. File size of main and derived data cannot be balanced.Recommended. File size of main and derived data can be balanced.
192 | 193 | ```sql 194 | -- template.sql 195 | CREATE TABLE main ( 196 | main_id INT PRIMARY KEY {{ rownum }} 197 | ); 198 | {{ for each row of main generate 3000 rows of derived }} 199 | CREATE TABLE derived ( 200 | main_id INT NOT NULL {{ rownum }}, 201 | sub_id INT NOT NULL {{ subrownum }}, 202 | PRIMARY KEY (main_id, sub_id) 203 | ); 204 | ``` 205 | 206 | 207 | 208 | ```sql 209 | -- main.sql 210 | CREATE TABLE main ( 211 | main_id INT PRIMARY KEY {{ rownum }} 212 | ); 213 | ``` 214 | 215 | ```sql 216 | -- derived.sql 217 | CREATE TABLE derived ( 218 | main_id INT NOT NULL {{ div(rownum-1, 3000)+1 }}, 219 | sub_id INT NOT NULL {{ mod(rownum-1, 3000)+1 }}, 220 | PRIMARY KEY (main_id, sub_id) 221 | ); 222 | ``` 223 | 224 |
225 | -------------------------------------------------------------------------------- /benches/benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{Bencher, Criterion, black_box, criterion_group, criterion_main}; 2 | use dbgen::{ 3 | eval::{CompileContext, State}, 4 | format::Options, 5 | parser::Template, 6 | span::Registry, 7 | }; 8 | use rand::SeedableRng; 9 | use rand_hc::Hc128Rng; 10 | use std::{ 11 | fs::read_to_string, 12 | io::{Write, sink}, 13 | }; 14 | 15 | fn run_benchmark(b: &mut Bencher<'_>, path: &str) { 16 | let mut registry = Registry::default(); 17 | let mut template = Template::parse(&read_to_string(path).unwrap(), &[], None, &mut registry).unwrap(); 18 | let ctx = CompileContext::new(template.variables_count); 19 | let row = ctx.compile_row(template.tables.swap_remove(0).exprs).unwrap(); 20 | let mut state = State::new(1, Box::new(Hc128Rng::from_seed([0x41; 32])), ctx); 21 | let options = Options::default(); 22 | let mut sink: Box = Box::new(sink()); 23 | 24 | b.iter(move || { 25 | let values = black_box(&row).eval(black_box(&mut state)).unwrap(); 26 | for value in values { 27 | options.write_sql_value(black_box(&mut *sink), &value).unwrap(); 28 | } 29 | }); 30 | } 31 | 32 | fn bench_templates(c: &mut Criterion) { 33 | c.bench_function("sysbench_oltp_uniform", |b| { 34 | run_benchmark(b, "res/sysbench/oltp_uniform_mysql.sql"); 35 | }); 36 | } 37 | 38 | criterion_group!(benches, bench_templates); 39 | criterion_main!(benches); 40 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use vergen::EmitBuilder; 2 | 3 | fn main() { 4 | EmitBuilder::builder() 5 | .git_sha(false) 6 | .cargo_target_triple() 7 | .emit() 8 | .unwrap(); 9 | } 10 | -------------------------------------------------------------------------------- /dbdbgen/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dbdbgen" 3 | version = "0.8.0" 4 | authors = ["kennytm "] 5 | edition = "2024" 6 | description = "Generate complete database using dbgen" 7 | 8 | [dependencies] 9 | dbgen = { path = "../" } 10 | jsonnet-rs = "0.17" 11 | serde = { workspace = true } 12 | serde_json = { workspace = true } 13 | clap = { workspace = true, features = ["string"] } 14 | thiserror = { workspace = true } 15 | rand = { workspace = true } 16 | data-encoding = { workspace = true } 17 | sha2 = "0.10" 18 | parse-size = { workspace = true } 19 | -------------------------------------------------------------------------------- /dbdbgen/dbdbgen.libsonnet: -------------------------------------------------------------------------------- 1 | { 2 | /// Decodes the given hex string into an array of u32. 3 | local hexDecodeU32(str) = 4 | [std.parseHex(str[(i<<3)-8:i<<3]) for i in std.range(1, std.length(str)>>3)], 5 | 6 | /// Encodes the array of u32 into a hex string. 7 | local hexEncodeU32(u32s) = 8 | std.join('', [std.format('%08x', b) for b in u32s]), 9 | 10 | /// Generates a new seed by xor with a salt hex string. 11 | xorSeed(seed, salt):: 12 | local seedBytes = hexDecodeU32(seed); 13 | local saltBytes = hexDecodeU32(salt); 14 | local xor = std.mapWithIndex(function(i, a) a ^ saltBytes[i], seedBytes); 15 | hexEncodeU32(xor), 16 | 17 | /// Computes the SHA-256 hash of a string. Returns a hex string. 18 | sha256(s):: std.native('sha256')(s), 19 | 20 | /// Used in argument values, indicating the argument should be chosen from a 21 | /// list of possible choices. 22 | choices(choices, multiple=false):: 23 | {choices: {choices: choices, multiple: multiple}}, 24 | 25 | /// The standard CLI arguments. 26 | stdArgs:: { 27 | qualified: { 28 | help: 'Keep the qualified name when writing the SQL statements.', 29 | type: 'bool', 30 | }, 31 | schema_name: { 32 | long: 'schema-name', 33 | help: 'Override the schema name.', 34 | }, 35 | out_dir: { 36 | short: 'o', 37 | long: 'out-dir', 38 | help: 'Output directory.', 39 | required: true, 40 | }, 41 | rows_count: { 42 | short: 'r', 43 | long: 'rows-count', 44 | help: 'Number of rows per INSERT statement.', 45 | type: 'int', 46 | default: '1', 47 | }, 48 | total_count: { 49 | short: 'N', 50 | long: 'total-count', 51 | help: 'Total number of rows of the main table.', 52 | type: 'size', 53 | }, 54 | rows_per_file: { 55 | short: 'R', 56 | long: 'rows-per-file', 57 | help: 'Number of rows per file.', 58 | type: 'size', 59 | }, 60 | size: { 61 | short: 'z', 62 | help: 'Target pre-compressed size of each file.', 63 | type: 'size', 64 | }, 65 | escape_backslash: { 66 | long: 'escape-backslash', 67 | help: 'Escape backslashes when writing a string.', 68 | type: 'bool', 69 | }, 70 | seed: { 71 | short: 's', 72 | help: 'Random number generator seed (should have 64 hex digits).', 73 | }, 74 | jobs: { 75 | short: 'j', 76 | help: 'Number of jobs to run in parallel, default to number of CPUs.', 77 | type: 'int', 78 | default: '0', 79 | }, 80 | rng: { 81 | help: 'Random number generator engine.', 82 | type: $.choices(['chacha12', 'chacha20', 'hc128', 'isaac', 'isaac64', 'xorshift', 'pcg32', 'step']), 83 | default: 'hc128', 84 | }, 85 | quiet: { 86 | short: 'q', 87 | help: 'Disable progress bar.', 88 | type: 'bool', 89 | }, 90 | now: { 91 | help: 'Override the current timestamp (always in UTC), in the format "YYYY-mm-dd HH:MM:SS.fff".', 92 | }, 93 | format: { 94 | short: 'f', 95 | help: 'Output format.', 96 | type: $.choices(['sql', 'csv', 'sql-insert-set']), 97 | default: 'sql', 98 | }, 99 | format_true: { 100 | long: 'format-true', 101 | help: 'The keyword to print for a boolean TRUE value.', 102 | }, 103 | format_false: { 104 | long: 'format-false', 105 | help: 'The keyword to print for a boolean FALSE value.', 106 | }, 107 | format_null: { 108 | long: 'format-null', 109 | help: 'The keyword to print for a NULL value.', 110 | }, 111 | headers: { 112 | help: 'Include column names or headers in the output.', 113 | type: 'bool', 114 | }, 115 | compression: { 116 | short: 'c', 117 | help: 'Compress data output.', 118 | type: $.choices(['gzip', 'xz', 'zstd']), 119 | }, 120 | compress_level: { 121 | long: 'compress-level', 122 | help: 'Compression level (0-9 for gzip and xz, 1-21 for zstd).', 123 | type: 'int', 124 | default: '6', 125 | }, 126 | components: { 127 | help: 'Components to write.', 128 | type: $.choices(['schema', 'table', 'data'], multiple=true), 129 | default: 'table,data', 130 | }, 131 | }, 132 | } 133 | -------------------------------------------------------------------------------- /dbdbgen/src/bin/dbdbgen.rs: -------------------------------------------------------------------------------- 1 | use clap::{Arg, ArgAction, Command}; 2 | use dbdbgen::{cli::ensure_seed, error::Error, jsvm::Vm}; 3 | use dbgen::{FULL_VERSION, span::Registry}; 4 | use std::error::Error as StdError; 5 | 6 | fn run() -> Result<(), Error> { 7 | let global_matches = Command::new("dbdbgen") 8 | .long_version(FULL_VERSION) 9 | .trailing_var_arg(true) 10 | .args(&[ 11 | Arg::new("dry-run") 12 | .long("dry-run") 13 | .action(ArgAction::SetTrue) 14 | .help("Only display the evaluated dbdbgen result without generating data."), 15 | Arg::new("allow-import") 16 | .long("allow-import") 17 | .action(ArgAction::SetTrue) 18 | .help("Allows `import` and `importstr` to read files."), 19 | Arg::new("file") 20 | .help("The Jsonnet file to execute, followed by the arguments passed to it.") 21 | .action(ArgAction::Append) 22 | .required(true) 23 | .allow_hyphen_values(true), 24 | ]) 25 | .get_matches(); 26 | let mut args = global_matches.get_raw("file").unwrap(); 27 | let src_file = args.next().unwrap(); 28 | 29 | let mut vm = Vm::new(src_file, global_matches.get_flag("allow-import"))?; 30 | let app = vm.eval_arguments()?; 31 | let mut matches = app.get_matches(args); 32 | ensure_seed(&mut matches); 33 | let steps = vm.eval_steps(matches)?; 34 | 35 | if global_matches.get_flag("dry-run") { 36 | println!( 37 | "/* dbdbgen{}\n*/\n{{\"steps\": {}}}", 38 | FULL_VERSION, 39 | serde_json::to_string_pretty(&steps).unwrap() 40 | ); 41 | return Ok(()); 42 | } 43 | 44 | let steps_count = steps.len(); 45 | for (step, arg) in steps.into_iter().enumerate() { 46 | if !arg.quiet { 47 | eprintln!("step {} / {}", step + 1, steps_count); 48 | } 49 | let mut registry = Registry::default(); 50 | dbgen::cli::run(arg, &mut registry).map_err(|e| Error::Dbgen { 51 | step, 52 | message: registry.describe(&e), 53 | })?; 54 | } 55 | 56 | Ok(()) 57 | } 58 | 59 | fn main() { 60 | if let Err(e) = run() { 61 | eprintln!("{}\n", e); 62 | let mut err: &dyn StdError = &e; 63 | while let Some(source) = err.source() { 64 | eprintln!("Cause: {}", source); 65 | err = source; 66 | } 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /dbdbgen/src/cli.rs: -------------------------------------------------------------------------------- 1 | use clap::{ 2 | self, ArgAction, Command, 3 | builder::{PossibleValuesParser, ValueParser}, 4 | value_parser, 5 | }; 6 | use data_encoding::HEXLOWER_PERMISSIVE; 7 | use rand::{RngCore as _, rngs::OsRng}; 8 | use serde::{Deserialize, Serialize}; 9 | use std::{ 10 | collections::{BTreeMap, HashMap}, 11 | ffi::OsString, 12 | }; 13 | 14 | #[derive(Deserialize, Debug, Clone, PartialEq)] 15 | #[serde(rename_all = "lowercase")] 16 | pub enum ArgType { 17 | Bool, 18 | Str, 19 | Int, 20 | Size, 21 | Float, 22 | Choices { choices: Vec, multiple: bool }, 23 | } 24 | 25 | impl ArgType { 26 | fn arg_action(&self) -> ArgAction { 27 | match self { 28 | Self::Bool => ArgAction::SetTrue, 29 | Self::Choices { multiple: true, .. } => ArgAction::Append, 30 | _ => ArgAction::Set, 31 | } 32 | } 33 | 34 | fn value_parser(&self) -> ValueParser { 35 | fn specialized_parse_size(s: &str) -> Result { 36 | parse_size::parse_size(s) 37 | } 38 | 39 | match self { 40 | Self::Bool => ValueParser::bool(), 41 | Self::Str => ValueParser::string(), 42 | Self::Int => value_parser!(u64).into(), 43 | Self::Size => ValueParser::new(specialized_parse_size), 44 | Self::Float => value_parser!(f64).into(), 45 | Self::Choices { choices, .. } => ValueParser::new(PossibleValuesParser::new(choices)), 46 | } 47 | } 48 | } 49 | 50 | impl Default for ArgType { 51 | fn default() -> Self { 52 | Self::Str 53 | } 54 | } 55 | 56 | #[derive(Deserialize, Default, Debug)] 57 | #[serde(default)] 58 | pub struct Arg { 59 | pub short: String, 60 | pub long: String, 61 | pub help: String, 62 | pub required: bool, 63 | pub default: Option, 64 | pub r#type: ArgType, 65 | } 66 | 67 | #[derive(Deserialize, Default, Debug)] 68 | #[serde(default)] 69 | pub struct App { 70 | pub name: String, 71 | pub version: String, 72 | pub about: String, 73 | pub args: BTreeMap, 74 | } 75 | 76 | #[derive(Serialize, Debug)] 77 | #[serde(untagged)] 78 | pub enum Match { 79 | Bool(bool), 80 | Str(String), 81 | Int(u64), 82 | Float(f64), 83 | Array(Vec), 84 | } 85 | 86 | pub type Matches<'a> = HashMap<&'a str, Match>; 87 | 88 | impl App { 89 | /// Constructs the clap App from this simplified specification. 90 | fn to_clap_app(&self) -> Command { 91 | use clap::builder::{OsStr, Resettable}; 92 | 93 | Command::new(&self.name) 94 | .bin_name(format!("dbdbgen {}", self.name)) 95 | .version(&self.version) 96 | .about(&self.about) 97 | .no_binary_name(true) 98 | .next_line_help(true) 99 | .args(self.args.iter().map(|(name, arg)| { 100 | let arg_action = arg.r#type.arg_action(); 101 | let mut clap_arg = clap::Arg::new(name); 102 | if matches!(arg_action, ArgAction::Append) { 103 | clap_arg = clap_arg.value_delimiter(','); 104 | } 105 | clap_arg 106 | .long(if arg.long.is_empty() { name } else { &arg.long }) 107 | .help(&arg.help) 108 | .short(arg.short.chars().next()) 109 | .action(arg_action) 110 | .value_parser(arg.r#type.value_parser()) 111 | .required(arg.required) 112 | .default_value(Resettable::from(arg.default.as_ref().map(OsStr::from))) 113 | })) 114 | } 115 | 116 | /// Obtains the matches from the command line. 117 | pub fn get_matches(&self, args: I) -> Matches<'_> 118 | where 119 | I: IntoIterator, 120 | I::Item: Into + Clone, 121 | { 122 | let clap_app = self.to_clap_app(); 123 | let matches = clap_app.get_matches_from(args); 124 | let mut result = HashMap::with_capacity(self.args.len()); 125 | for (name, arg) in &self.args { 126 | macro_rules! get_one { 127 | ($ty:ty) => { 128 | if let Some(value) = matches.get_one(name) { 129 | let value: &$ty = value; 130 | value.clone() 131 | } else { 132 | continue; 133 | } 134 | }; 135 | } 136 | 137 | let value = match &arg.r#type { 138 | ArgType::Bool => Match::Bool(matches.get_flag(name)), 139 | ArgType::Str | ArgType::Choices { multiple: false, .. } => Match::Str(get_one!(String)), 140 | ArgType::Int | ArgType::Size => Match::Int(get_one!(u64)), 141 | ArgType::Float => Match::Float(get_one!(f64)), 142 | ArgType::Choices { multiple: true, .. } => { 143 | if let Some(values) = matches.get_many(name) { 144 | Match::Array(values.cloned().collect()) 145 | } else { 146 | continue; 147 | } 148 | } 149 | }; 150 | result.insert(&**name, value); 151 | } 152 | result 153 | } 154 | } 155 | 156 | pub fn ensure_seed(matches: &mut Matches<'_>) { 157 | matches.entry("seed").or_insert_with(|| { 158 | let mut buf = [0u8; 32]; 159 | OsRng.fill_bytes(&mut buf); 160 | Match::Str(HEXLOWER_PERMISSIVE.encode(&buf)) 161 | }); 162 | } 163 | -------------------------------------------------------------------------------- /dbdbgen/src/error.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, io}; 2 | use thiserror::Error as ThisError; 3 | 4 | /// The purpose of Jsonnet evaluation. 5 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 6 | pub enum Purpose { 7 | /// Produce argument specifications as CLI for itself. 8 | Arguments, 9 | /// Produce configurations for dbgen execution. 10 | Execution { step: usize }, 11 | } 12 | 13 | impl fmt::Display for Purpose { 14 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 15 | match self { 16 | Self::Arguments => f.write_str("arguments"), 17 | Self::Execution { step } => write!(f, "execution (index={})", step), 18 | } 19 | } 20 | } 21 | 22 | #[derive(ThisError, Debug)] 23 | pub enum Error { 24 | #[error(transparent)] 25 | Io(#[from] io::Error), 26 | 27 | #[error("failed to evaluate Jsonnet template for {}:\n{}", .purpose, .message)] 28 | Jsonnet { purpose: Purpose, message: String }, 29 | 30 | #[error("cannot deserialize Jsonnet output for {}\n\n{}", .purpose, .src)] 31 | Serde { 32 | purpose: Purpose, 33 | src: String, 34 | #[source] 35 | error: serde_json::Error, 36 | }, 37 | 38 | #[error("cannot execute dbgen (index={}):\n{}", .step, .message)] 39 | Dbgen { step: usize, message: String }, 40 | } 41 | -------------------------------------------------------------------------------- /dbdbgen/src/jsvm.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | cli::{App, Matches}, 3 | error::{Error, Purpose}, 4 | }; 5 | use data_encoding::HEXLOWER_PERMISSIVE; 6 | use dbgen::cli::Args; 7 | use jsonnet::{JsonVal, JsonValue, JsonnetVm}; 8 | use serde::Deserialize; 9 | use sha2::{Digest, Sha256}; 10 | use std::{ffi::OsStr, fs::read_to_string, path::Path}; 11 | 12 | pub struct Vm<'p> { 13 | vm: JsonnetVm, 14 | path: &'p OsStr, 15 | } 16 | 17 | fn deserialize<'a, T: Deserialize<'a>>(js: &'a str, purpose: Purpose) -> Result { 18 | serde_json::from_str(js).map_err(|error| { 19 | use std::fmt::Write; 20 | 21 | let mut src = String::new(); 22 | let end_line = error.line(); 23 | let start_line = end_line.saturating_sub(5); 24 | for (line, line_num) in js.lines().skip(start_line).zip(start_line..end_line) { 25 | writeln!(&mut src, "{:5} | {}", line_num + 1, line).unwrap(); 26 | } 27 | src.push_str(&" ".repeat(7 + error.column())); 28 | src.push('^'); 29 | 30 | Error::Serde { purpose, src, error } 31 | }) 32 | } 33 | 34 | impl<'p> Vm<'p> { 35 | pub fn new(path: &'p OsStr, allow_import: bool) -> Result { 36 | let content = read_to_string(path)?; 37 | let mut vm = JsonnetVm::new(); 38 | vm.import_callback(|_, base, rel| { 39 | if rel == Path::new("dbdbgen.libsonnet") { 40 | Ok((rel.to_owned(), include_str!("../dbdbgen.libsonnet").to_owned())) 41 | } else if allow_import { 42 | let path = base.join(rel); 43 | let text = read_to_string(&path).map_err(|e| e.to_string())?; 44 | Ok((path, text)) 45 | } else { 46 | Err("external import is disabled".to_owned()) 47 | } 48 | }); 49 | vm.native_callback("sha256", vm_sha256, &["s"]); 50 | vm.tla_code("src", &content); 51 | Ok(Self { vm, path }) 52 | } 53 | 54 | pub fn eval_arguments(&mut self) -> Result { 55 | let app_js = self 56 | .vm 57 | .evaluate_snippet( 58 | self.path, 59 | "function(src) {[k]: src[k] for k in ['name', 'version', 'about', 'args'] if k in src}", 60 | ) 61 | .map_err(|error| Error::Jsonnet { 62 | purpose: Purpose::Arguments, 63 | message: error.to_string(), 64 | })?; 65 | let app = deserialize(&app_js, Purpose::Arguments)?; 66 | Ok(app) 67 | } 68 | 69 | pub fn eval_steps(&mut self, matches: Matches<'_>) -> Result, Error> { 70 | let matches_js = serde_json::to_string(&matches).unwrap(); 71 | self.vm.tla_code("matches", &matches_js); 72 | 73 | let steps_js_stream = self 74 | .vm 75 | .evaluate_snippet_stream( 76 | self.path, 77 | "function(src, matches) if std.isArray(src.steps) then src.steps else src.steps(matches)", 78 | ) 79 | .map_err(|error| Error::Jsonnet { 80 | purpose: Purpose::Execution { step: 0 }, 81 | message: error.to_string(), 82 | })?; 83 | 84 | let mut steps = Vec::new(); 85 | for (i, steps_js) in steps_js_stream.iter().enumerate() { 86 | let step = deserialize(steps_js, Purpose::Execution { step: i })?; 87 | steps.push(step); 88 | } 89 | 90 | Ok(steps) 91 | } 92 | } 93 | 94 | fn vm_sha256<'a>(vm: &'a JsonnetVm, args: &[JsonVal<'a>]) -> Result, String> { 95 | let input = args[0].as_str().ok_or("expected a string")?; 96 | let mut hasher = Sha256::new(); 97 | hasher.update(input); 98 | let result = hasher.finalize(); 99 | Ok(JsonValue::from_str(vm, &HEXLOWER_PERMISSIVE.encode(&result))) 100 | } 101 | -------------------------------------------------------------------------------- /dbdbgen/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod cli; 2 | pub mod error; 3 | pub mod jsvm; 4 | -------------------------------------------------------------------------------- /dbgen-playground/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dbgen-playground" 3 | version = "0.8.0" 4 | authors = ["kennytm "] 5 | edition = "2024" 6 | description = "dbgen playground" 7 | publish = false 8 | 9 | [lib] 10 | crate-type = ["cdylib"] 11 | 12 | [dependencies] 13 | dbgen = { path = "../", default-features = false } 14 | chrono = { workspace = true } 15 | wasm-bindgen = "0.2" 16 | serde-wasm-bindgen = "0.6" 17 | rand = { workspace = true } 18 | rand_hc = { workspace = true } 19 | serde = { workspace = true } 20 | getrandom = { version = "*", features = ["js"]} -------------------------------------------------------------------------------- /dbgen-playground/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | dbgen playground 6 | 91 | 92 | 93 | 94 |
95 | 106 | 107 | 108 |
109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 |
119 |
120 |
121 |
122 |
123 | 124 | 125 | 304 | 305 | -------------------------------------------------------------------------------- /dbgen-playground/src/lib.rs: -------------------------------------------------------------------------------- 1 | use chrono::NaiveDateTime; 2 | use dbgen::{ 3 | FULL_VERSION, 4 | error::Error, 5 | eval::{CompileContext, Schema, State}, 6 | format::Options, 7 | parser::Template, 8 | span::{Registry, ResultExt, S}, 9 | value::{TIMESTAMP_FORMAT, Value}, 10 | writer::{Env, Writer}, 11 | }; 12 | use rand::{Rng, SeedableRng}; 13 | use rand_hc::Hc128Rng; 14 | use serde::Serialize; 15 | use std::{borrow::Cow, convert::TryFrom, mem}; 16 | use wasm_bindgen::prelude::*; 17 | 18 | #[derive(Default)] 19 | struct TableWriter { 20 | rows: Vec>, 21 | } 22 | 23 | #[derive(Serialize)] 24 | struct Table { 25 | name: String, 26 | column_names: Vec, 27 | rows: Vec>, 28 | } 29 | 30 | impl Writer for TableWriter { 31 | fn write_value(&mut self, value: &Value) -> Result<(), S> { 32 | let options = Options { 33 | true_string: Cow::Borrowed("TRUE"), 34 | false_string: Cow::Borrowed("FALSE"), 35 | ..Options::default() 36 | }; 37 | 38 | let mut output = Vec::new(); 39 | options.write_sql_value(&mut output, value).unwrap_throw(); 40 | let output = String::from_utf8(output).unwrap_throw(); 41 | self.rows.last_mut().unwrap_throw().push(output); 42 | Ok(()) 43 | } 44 | 45 | fn write_file_header(&mut self, _: &Schema<'_>) -> Result<(), S> { 46 | Ok(()) 47 | } 48 | 49 | fn write_header(&mut self, _: &Schema<'_>) -> Result<(), S> { 50 | self.write_row_separator() 51 | } 52 | 53 | fn write_value_header(&mut self, _: &str) -> Result<(), S> { 54 | Ok(()) 55 | } 56 | 57 | fn write_value_separator(&mut self) -> Result<(), S> { 58 | Ok(()) 59 | } 60 | 61 | fn write_row_separator(&mut self) -> Result<(), S> { 62 | let columns = self.rows.last().map_or(0, |r| r.len()); 63 | self.rows.push(Vec::with_capacity(columns)); 64 | Ok(()) 65 | } 66 | 67 | fn write_trailer(&mut self) -> Result<(), S> { 68 | Ok(()) 69 | } 70 | } 71 | 72 | fn try_generate_rows( 73 | template: &str, 74 | rows: usize, 75 | now: &str, 76 | seed: &[u8], 77 | span_registry: &mut Registry, 78 | ) -> Result, S> { 79 | let now = NaiveDateTime::parse_from_str(now, TIMESTAMP_FORMAT).no_span_err()?; 80 | let seed = <&::Seed>::try_from(seed) 81 | .map_err(|e| Error::InvalidArguments(format!("invalid seed: {}", e))) 82 | .no_span_err()?; 83 | 84 | let template = Template::parse(template, &[], None, span_registry)?; 85 | let mut ctx = CompileContext::new(template.variables_count); 86 | ctx.current_timestamp = now; 87 | let tables = template 88 | .tables 89 | .into_iter() 90 | .map(|t| ctx.compile_table(t)) 91 | .collect::, _>>()?; 92 | 93 | // we perform this double seeding to be compatible with the CLI. 94 | let mut seeding_rng = Hc128Rng::from_seed(*seed); 95 | let mut rng = move || Box::new(Hc128Rng::from_seed(seeding_rng.r#gen())); 96 | 97 | if !template.global_exprs.is_empty() { 98 | let row_gen = ctx.compile_row(template.global_exprs)?; 99 | let mut state = State::new(0, rng(), ctx); 100 | row_gen.eval(&mut state)?; 101 | ctx = state.into_compile_context(); 102 | } 103 | 104 | let mut state = State::new(1, rng(), ctx); 105 | let mut env = Env::new(&tables, &mut state, false, |_| Ok(TableWriter::default()))?; 106 | for _ in 0..rows { 107 | env.write_row()?; 108 | } 109 | 110 | Ok(env 111 | .tables() 112 | .map(|(table, writer)| { 113 | let schema = table.schema(false); 114 | Table { 115 | name: schema.name.to_owned(), 116 | column_names: schema.column_names().map(|s| s.to_owned()).collect(), 117 | rows: mem::take(&mut writer.rows), 118 | } 119 | }) 120 | .collect()) 121 | } 122 | 123 | #[wasm_bindgen] 124 | pub fn generate_rows(template: &str, rows: usize, now: &str, seed: &[u8]) -> Result { 125 | let mut registry = Registry::default(); 126 | match try_generate_rows(template, rows, now, seed, &mut registry) { 127 | Ok(result) => serde_wasm_bindgen::to_value(&result).map_err(|e| e.to_string().into()), 128 | Err(e) => Err(registry.describe(&e).into()), 129 | } 130 | } 131 | 132 | #[wasm_bindgen] 133 | pub fn version() -> String { 134 | FULL_VERSION.to_owned() 135 | } 136 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | [licenses] 2 | allow = ['MIT'] 3 | 4 | [[licenses.exceptions]] 5 | allow = ['Apache-2.0'] 6 | name = 'ryu' 7 | version = '1.0' 8 | 9 | [[licenses.exceptions]] 10 | allow = ['Unicode-DFS-2016'] 11 | name = 'unicode-ident' 12 | version = '1.0.11' -------------------------------------------------------------------------------- /fuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cargo +nightly fuzz run fuzz_target_1 -------------------------------------------------------------------------------- /fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | artifacts 3 | corpus/*/* 4 | !corpus/*/*.seed 5 | -------------------------------------------------------------------------------- /fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "dbgen-fuzz" 4 | version = "0.0.1" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | edition = "2024" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | tempfile = "3.0" 14 | dbgen = { path = ".." } 15 | libfuzzer-sys = "0.2.1" 16 | 17 | # Prevent this from interfering with workspaces 18 | [workspace] 19 | members = ["."] 20 | 21 | [[bin]] 22 | name = "fuzz_target_1" 23 | path = "fuzz_targets/fuzz_target_1.rs" 24 | -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/a.seed: -------------------------------------------------------------------------------- 1 | /*Yt0idEQBIu,I!H~F&U{ylX@:!Qg^*/Create Table Foo.Bar(a Integer{{@a:=rownum}},b Integer{{@a}},c Integer{{rownum}},d Real{{rand.uniform(0,1)}}); -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/b.seed: -------------------------------------------------------------------------------- 1 | --#F&"x}Gk@95--"C1AxsGGJ=BUDo*/create table [contacts] ( 2 | [contact id] integer primary key {{ rownum + 0x1000 }}, 3 | [first name] text not null {{ @first := rand.regex('[A-Z][a-z]*') }}, 4 | [last name] text not null {{ @last := rand.regex('[A-Z][a-z]*') }}, 5 | [email] text not null unique {{ @first || '.' || @last || '@' || rand.regex('\w+\.com') }}, 6 | [phone] text unique {{ case rand.bool(0.9) when true then rand.regex('[0-9#*]{8}') end }}, 7 | [last updated] datetime not null {{ timestamp '2015-01-01 00:00:00' + interval rownum day }} 8 | ); 9 | -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/d.seed: -------------------------------------------------------------------------------- 1 | /*FS4^X;,8lV8i#pG?7uxEpf"DiP9s*/cREaTE tABle "αブ$"("èç¡"TexT{{rAND.rEgEx('éı[&①]')}} 2 | {{0x1+(2-3)*4.0/-5 oR FalSE AnD NOt 0.0}}{{rOwNUm/(rowNUM-1)}}COmMeNT'☺≠6¿ū',"ßK~“"iNTeGeR,"¢_""++"DOUble NuLL,iNDex("èç¡"))/*ω★*/; -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/e.seed: -------------------------------------------------------------------------------- 1 | a^zqe)u_eC2rSpO$D@u_m)+JfVKseOo\create database xyz; -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/f.seed: -------------------------------------------------------------------------------- 1 | (R7ByrB=s$n2M,MU[xfN^{P`g`pjo398create table x({{5-}}); -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/g.seed: -------------------------------------------------------------------------------- 1 | wG?d?zosInBxQ+\_y5BJFks80G?449@Xcreate table m({{@}}); -------------------------------------------------------------------------------- /fuzz/corpus/fuzz_target_1/h.seed: -------------------------------------------------------------------------------- 1 | G%K0O7KYjB;_!kxOBS'@;[hh6b$5i8*Qcreate table "x"({{timestamp '9999-12-31T23:59:59.999999'}}); -------------------------------------------------------------------------------- /fuzz/fuzz_targets/fuzz_target_1.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | 3 | use std::fs::write; 4 | use dbgen::cli::{Args, run}; 5 | use libfuzzer_sys::fuzz_target; 6 | use tempfile::tempdir; 7 | 8 | fuzz_target!(|data: &[u8]| { 9 | if data.len() < 32 { 10 | return; 11 | } 12 | let mut seed = [0_u8; 32]; 13 | seed.copy_from_slice(&data[..32]); 14 | 15 | let out_dir = tempdir().unwrap(); 16 | let template_path = out_dir.path().join("template"); 17 | write(&template_path, &data[32..]).unwrap(); 18 | 19 | drop(run(Args { 20 | out_dir: out_dir.path().to_owned(), 21 | files_count: 5, 22 | inserts_count: 3, 23 | rows_count: 6, 24 | template: template_path, 25 | seed: Some(seed), 26 | jobs: 1, 27 | no_data: true, 28 | ..Args::default() 29 | })); 30 | }); 31 | -------------------------------------------------------------------------------- /release/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM rockylinux:9 2 | 3 | RUN dnf install g++ git -y 4 | 5 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain 1.87.0 --profile minimal 6 | ENV PATH /root/.cargo/bin:$PATH 7 | 8 | # sudo docker build --network=host -t kennytm/dbgen-build-env . 9 | -------------------------------------------------------------------------------- /release/README.md: -------------------------------------------------------------------------------- 1 | Release procedure 2 | ================= 3 | 4 | ## Pre-check 5 | 6 | 1. Push master branch to GitHub. 7 | 2. Ensure the GitHub Action check passes. 8 | 3. Verify the version number in `/Cargo.toml`. 9 | 10 | ## Build package 11 | 12 | 4. Update `Dockerfile` and the Docker image if necessary (always use latest stable rustc). 13 | 5. Run `release/release.py` to build the Linux binaries. 14 | 6. Run `release/package.sh` to package into `*.tar.xz`. 15 | 16 | ## Publish playground 17 | 18 | 7. Run `release/playground.sh` to build the WASM module. 19 | 8. Run `release/publish-playground.sh` to commit playground into gh-pages branch and push to GitHub pages. 20 | 21 | ## Publish package 22 | 23 | 9. `cargo publish`. 24 | 10. Create GitHub release. 25 | -------------------------------------------------------------------------------- /release/package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # release/package.sh packages the built binaries into a .tar.xz package. 4 | 5 | set -eux 6 | 7 | P=$(dirname "$(realpath "$(dirname "$0")")") 8 | 9 | mkdir bin 10 | cp $P/target/x86_64-unknown-linux-gnu/release/{dbgen,dbschemagen} bin/ 11 | tar cfvJ dbgen-v$(cargo read-manifest | jq -r .version)-x86_64-unknown-linux-gnu.tar.xz bin 12 | rm -r bin 13 | -------------------------------------------------------------------------------- /release/playground.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # release/playground.sh builds the web playground. 4 | 5 | set -eux 6 | 7 | CARGO_PROFILE_RELEASE_OPT_LEVEL=z \ 8 | cargo +nightly build \ 9 | -p dbgen-playground \ 10 | --release \ 11 | --no-default-features \ 12 | -Z avoid-dev-deps \ 13 | --target wasm32-unknown-unknown 14 | 15 | wasm-opt -Oz \ 16 | -o target/wasm32-unknown-unknown/release/playground.wasm \ 17 | target/wasm32-unknown-unknown/release/dbgen_playground.wasm 18 | 19 | wasm-bindgen target/wasm32-unknown-unknown/release/playground.wasm \ 20 | --out-dir dbgen-playground \ 21 | --target no-modules \ 22 | --no-typescript 23 | -------------------------------------------------------------------------------- /release/publish-playground.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # release/publish-playground.sh copies the playground WASM to the gh-pages branch. 4 | 5 | set -eux 6 | 7 | git worktree add gh-pages gh-pages 8 | COMMIT="$(git rev-parse HEAD)" 9 | cp dbgen-playground/index.html dbgen-playground/playground_bg.wasm dbgen-playground/playground.js gh-pages 10 | cd gh-pages 11 | git add index.html playground_bg.wasm playground.js 12 | git commit -m "Publish v$(cargo read-manifest | jq -r .version) ($COMMIT)" 13 | git push origin gh-pages 14 | cd .. 15 | git worktree remove gh-pages 16 | -------------------------------------------------------------------------------- /release/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # release/release.sh builds the dbgen binaries for release to Linux x86_64 via Docker. 4 | 5 | set -ex 6 | 7 | P=$(dirname "$(realpath "$(dirname "$0")")") 8 | 9 | sudo docker run --rm \ 10 | --volume "$P":/dbgen \ 11 | --volume "$HOME/.cargo/git":/root/.cargo/git:ro \ 12 | --volume "$HOME/.cargo/registry":/root/.cargo/registry:ro \ 13 | --workdir /dbgen \ 14 | --network host \ 15 | kennytm/dbgen-build-env \ 16 | cargo build --release --locked \ 17 | -p dbgen \ 18 | -p dbdbgen \ 19 | --target x86_64-unknown-linux-gnu 20 | -------------------------------------------------------------------------------- /res/sysbench/bulk_insert.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE sbtest1 ( 2 | id INTEGER NOT NULL, 3 | /*{{ rownum }}*/ 4 | k INTEGER DEFAULT '0' NOT NULL, 5 | /*{{ rownum }}*/ 6 | PRIMARY KEY (id) 7 | ); 8 | -------------------------------------------------------------------------------- /res/sysbench/oltp_uniform_mysql.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE sbtest1 ( 2 | id INTEGER PRIMARY KEY AUTO_INCREMENT, 3 | /*{{ rownum }}*/ 4 | k INTEGER DEFAULT '0' NOT NULL, 5 | /*{{ rand.range_inclusive(-0x80000000, 0x7fffffff) }}*/ 6 | c CHAR(120) DEFAULT '' NOT NULL, 7 | /*{{ rand.regex('([0-9]{11}-){9}[0-9]{11}') }}*/ 8 | pad CHAR(60) DEFAULT '' NOT NULL, 9 | /*{{ rand.regex('([0-9]{11}-){4}[0-9]{11}') }}*/ 10 | KEY(k) 11 | ); 12 | -------------------------------------------------------------------------------- /res/sysbench/oltp_uniform_postgresql.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE sbtest1 ( 2 | id SERIAL PRIMARY KEY, 3 | /*{{ rownum }}*/ 4 | k INTEGER DEFAULT '0' NOT NULL, 5 | /*{{ rand.range_inclusive(-0x80000000, 0x7fffffff) }}*/ 6 | c CHAR(120) DEFAULT '' NOT NULL, 7 | /*{{ rand.regex('([0-9]{11}-){9}[0-9]{11}') }}*/ 8 | pad CHAR(60) DEFAULT '' NOT NULL 9 | /*{{ rand.regex('([0-9]{11}-){4}[0-9]{11}') }}*/ 10 | ); 11 | CREATE INDEX k ON sbtest1(k); 12 | -------------------------------------------------------------------------------- /res/sysbench/oltp_uniform_sqlite3.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE sbtest1 ( 2 | id INTEGER PRIMARY KEY AUTOINCREMENT, 3 | /*{{ rownum }}*/ 4 | k INTEGER DEFAULT '0' NOT NULL, 5 | /*{{ rand.range_inclusive(-0x80000000, 0x7fffffff) }}*/ 6 | c CHAR(120) DEFAULT '' NOT NULL, 7 | /*{{ rand.regex('([0-9]{11}-){9}[0-9]{11}') }}*/ 8 | pad CHAR(60) DEFAULT '' NOT NULL 9 | /*{{ rand.regex('([0-9]{11}-){4}[0-9]{11}') }}*/ 10 | ); 11 | CREATE INDEX k ON sbtest1(k); 12 | -------------------------------------------------------------------------------- /res/tpcc/README.md: -------------------------------------------------------------------------------- 1 | TPC-C-compatible templates for `dbgen` 2 | ====================================== 3 | 4 | This folder provides template files and a Python script to produce SQL dump compatible with 5 | the [TPC-C] v5.11.0 benchmark. The table names are compatible with [BenchmarkSQL]. 6 | 7 | | | `dbgen` | BenchmarkSQL | 8 | |--------------------------|----------------------------------:|------------------:| 9 | | Output format | SQL dump split into 256 MiB files | 8 large CSV files | 10 | | Total size per warehouse | 80 MiB | 70 MiB | 11 | | Speed (-j 8, W = 30) | 30s | 50s | 12 | | Speed (-j 8, W = 50) | 40s | 80s | 13 | 14 | ## Usage 15 | 16 | 1. Download or build `dbdbgen`. 17 | 18 | Pre-compiled binaries can be downloaded from . 19 | Decompress the `*.tar.xz` from the assets of the latest release to get the `dbdbgen` executable. 20 | 21 | You can also build `dbdbgen` from source with Rust 1.40 (or above). After installing Rust, run 22 | `cargo build --release -p dbdbgen`. 23 | 24 | 2. Execute the `dbdbgen` program. Suppose we want to create a 30-warehouse dump in the `tpcc-out/` 25 | folder: 26 | 27 | ```sh 28 | dbdbgen res/tpcc/tpcc.jsonnet -o tpcc-out -w 30 29 | ``` 30 | 31 |
The SQL dump is split into multiple files in subdirectories of 32 | tpcc-out. They are lexicographically sorted by the proper import order. 33 | 34 | ``` 35 | tpcc-out/ 36 | 0_config/ 37 | tpcc-schema-create.sql 38 | tpcc.bmsql_config-schema.sql 39 | tpcc.bmsql_config.1.sql 40 | 1_item/ 41 | tpcc.bmsql_item-schema.sql 42 | tpcc.bmsql_item.1.sql 43 | 2_warehouse/ 44 | tpcc.bmsql_warehouse-schema.sql 45 | tpcc.bmsql_warehouse.1.sql 46 | 3_stock/ 47 | tpcc.bmsql_stock-schema.sql 48 | tpcc.bmsql_stock.001.sql 49 | tpcc.bmsql_stock.002.sql 50 | … 51 | 4_district/ 52 | tpcc.bmsql_district-schema.sql 53 | tpcc.bmsql_district.1.sql 54 | 5_customer/ 55 | tpcc.bmsql_customer-schema.sql 56 | tpcc.bmsql_customer.01.sql 57 | tpcc.bmsql_customer.02.sql 58 | … 59 | 6_history/ 60 | tpcc.bmsql_history-schema.sql 61 | tpcc.bmsql_history.01.sql 62 | tpcc.bmsql_history.12.sql 63 | … 64 | 7_order/ 65 | tpcc.bmsql_oorder-schema.sql 66 | tpcc.bmsql_oorder.001.sql 67 | tpcc.bmsql_oorder.002.sql 68 | … 69 | tpcc.bmsql_order_line-schema.sql 70 | tpcc.bmsql_order_line.001.sql 71 | tpcc.bmsql_order_line.002.sql 72 | … 73 | 8_new_order/ 74 | tpcc.bmsql_new_order-schema.sql 75 | tpcc.bmsql_new_order.1.sql 76 | tpcc.bmsql_new_order.2.sql 77 | … 78 | ``` 79 | 80 |
81 | 82 | 5. Load the SQL dump into the database. Typically you can simply pipe the files into the database 83 | client, e.g. 84 | 85 | * **SQLite3** 86 | 87 | ```sh 88 | export LANG=C 89 | rm -f tpcc.db 90 | for f in tpcc-out/*/*.*.sql; do 91 | echo "$f" 92 | sqlite3 tpcc.db < "$f" || break 93 | done 94 | ``` 95 | 96 | * **PostgreSQL via `psql`** 97 | 98 | Make sure you have CREATE privilege in the chosen database to create the `tpcc` schema. 99 | 100 | ```sh 101 | export LANG=C # make sure '-' is sorted before '.' 102 | export PGOPTIONS=--search_path=tpcc 103 | psql postgres -c 'drop schema if exists tpcc cascade;' 104 | for f in tpcc-out/*/*.sql; do 105 | echo "$f" 106 | psql postgres -q -1 -v ON_ERROR_STOP=1 -f "$f" || break 107 | done 108 | ``` 109 | 110 | * **MySQL via `mysql`** 111 | 112 | Make sure you have CREATE privilege to create the `tpcc` database. 113 | 114 | ```sh 115 | export LANG=C 116 | mysql -u root 'drop schema if exists tpcc; create schema tpcc;' 117 | for f in tpcc-out/*/*.sql; do 118 | echo "$f" 119 | mysql -u root < "$f" || break 120 | done 121 | ``` 122 | 123 | * **MySQL via [myloader]** 124 | 125 | `myloader` restores an SQL dump into MySQL in parallel. It automatically manages the import 126 | order but expects SQL files in a flat directory, so we first need to flatten it. Then we can 127 | ingest the entire directory in one go. 128 | 129 | ```sh 130 | # Transform the output directory into mydumper structure. 131 | mv tpcc-out/*/* tpcc-out/ 132 | touch tpcc-out/metadata 133 | # Disable foreign key checks, since the files are imported in no particular order. 134 | mysql -u root -e 'set @@global.foreign_key_checks = 0;' 135 | # Now import the entire directory. 136 | myloader -u root -B tpcc -d tpcc-out/ 137 | # Re-enable foreign key checks. 138 | mysql -u root -e 'set @@global.foreign_key_checks = 1;' 139 | ``` 140 | 141 | * **[TiDB] via [TiDB Lightning]** 142 | 143 | The output structure is directly compatible with TiDB Lightning and can be used directly. 144 | 145 | Note that, before v4.0, TiDB does not support the SERIAL alias. You may need to manually 146 | replace its use in `6_history/tpcc.bmsql_history-schema.sql` as 147 | BIGINT UNSIGNED AUTO_INCREMENT first. 148 | 149 | ```sh 150 | sed -i'' 's/serial/bigint unsigned auto_increment/' tpcc-out/6_history/tpcc.bmsql_history-schema.sql 151 | # ^ Not needed for TiDB Lightning 4.0 or above 152 | tidb-lightning -d tpcc-out/ --tidb-host 127.0.0.1 153 | ``` 154 | 155 | [TPC-C]: http://www.tpc.org/tpcc/ 156 | [BenchmarkSQL]: https://sourceforge.net/projects/benchmarksql/ 157 | [myloader]: https://github.com/maxbube/mydumper 158 | [TiDB]: https://pingcap.com/docs/ 159 | [TiDB Lightning]: https://pingcap.com/docs/stable/reference/tools/tidb-lightning/overview/ -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | edition = "2024" 2 | max_width = 120 -------------------------------------------------------------------------------- /src/bin/dbgen.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser as _; 2 | use dbgen::{ 3 | cli::{Args, run}, 4 | span::Registry, 5 | }; 6 | 7 | fn main() { 8 | let mut registry = Registry::default(); 9 | if let Err(e) = run(Args::parse(), &mut registry) { 10 | eprintln!("{}", registry.describe(&e)); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/bin/dbschemagen.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser as _; 2 | use dbgen::schemagen_cli::{Args, print_script}; 3 | 4 | fn main() { 5 | let args = Args::parse(); 6 | print_script(&args); 7 | 8 | // if let Err(err) = run(args) { 9 | // eprintln!("{}\n", err); 10 | // for (e, i) in err.iter_causes().zip(1..) { 11 | // eprintln!("{:=^80}\n{}\n", format!(" ERROR CAUSE #{} ", i), e); 12 | // } 13 | // exit(1); 14 | // } 15 | 16 | // let mut rng = thread_rng(); 17 | // let table = gen_table(Dialect::MySQL, &mut rng, 1e9); 18 | 19 | // println!("{}", table.schema); 20 | // println!("rows: {}", table.rows_count); 21 | } 22 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | //! Error types for the `dbgen` library. 2 | 3 | #![allow(clippy::used_underscore_binding)] 4 | 5 | use crate::{parser::Rule, span::S}; 6 | use std::{convert::Infallible, fmt, path::PathBuf}; 7 | use thiserror::Error as ThisError; 8 | 9 | /// Errors produced by the `dbgen` library. 10 | #[derive(ThisError, Debug)] 11 | #[non_exhaustive] 12 | pub enum Error { 13 | /// Failed to parse template. 14 | #[error("failed to parse template")] 15 | ParseTemplate(#[source] Box>), 16 | 17 | /// Unknown SQL function. 18 | #[error("unknown function")] 19 | UnknownFunction, 20 | 21 | /// Integer is too big. 22 | #[error("integer '{0}' is too big")] 23 | IntegerOverflow( 24 | /// The string representation of the expression that produced the overflow. 25 | String, 26 | ), 27 | 28 | /// Not enough arguments provided to the SQL function. 29 | #[error("not enough arguments")] 30 | NotEnoughArguments, 31 | 32 | /// Invalid regex. 33 | #[error("invalid regex")] 34 | InvalidRegex(#[from] rand_regex::Error), 35 | 36 | /// Unknown regex flag. 37 | #[error("unknown regex flag '{0}'")] 38 | UnknownRegexFlag( 39 | /// The regex flag. 40 | char, 41 | ), 42 | 43 | /// Hex/Base64 decode error. 44 | #[error("decode error")] 45 | DecodeError(#[from] data_encoding::DecodeError), 46 | 47 | /// Invalid arguments. 48 | #[error("{0}")] 49 | InvalidArguments( 50 | /// Cause of the error. 51 | String, 52 | ), 53 | 54 | /// The timestamp string is invalid 55 | #[error("invalid timestamp")] 56 | InvalidTimestampString(#[from] chrono::format::ParseError), 57 | 58 | /// Cannot find parent table for derived table directive. 59 | #[error("cannot find parent table {parent} to generate derived rows")] 60 | UnknownParentTable { 61 | /// Expected parent table name. 62 | parent: String, 63 | }, 64 | 65 | /// Derived table name does not match that of the derived table directive. 66 | #[error( 67 | "derived table name in the FOR EACH ROW and CREATE TABLE statements do not match ({for_each_row} vs {create_table})" 68 | )] 69 | DerivedTableNameMismatch { 70 | /// The table name in the FOR EACH ROW statement 71 | for_each_row: String, 72 | /// The table name in the CREATE TABLE statement 73 | create_table: String, 74 | }, 75 | 76 | /// Unexpected value type. 77 | #[error("cannot convert {value} into {expected}")] 78 | UnexpectedValueType { 79 | /// The expected value type. 80 | expected: &'static str, 81 | /// The actual value. 82 | value: String, 83 | }, 84 | 85 | /// Generic IO error. 86 | #[error("failed to {action} at {path}")] 87 | Io { 88 | /// Action causing the error. 89 | action: &'static str, 90 | /// File path causing the I/O error. 91 | path: PathBuf, 92 | /// Source of error. 93 | source: std::io::Error, 94 | }, 95 | 96 | /// Failed to configure a Rayon thread pool. 97 | #[cfg(feature = "cli")] 98 | #[error("failed to configure thread pool")] 99 | Rayon(#[from] rayon::ThreadPoolBuildError), 100 | 101 | /// Cannot use `--table-name` when template contains multiple tables. 102 | #[error("cannot use --table-name when template contains multiple tables")] 103 | CannotUseTableNameForMultipleTables, 104 | 105 | /// Unsupported CLI parameter. 106 | #[error("unsupported {kind} {value}")] 107 | UnsupportedCliParameter { 108 | /// The parameter name. 109 | kind: &'static str, 110 | /// Value provided by user. 111 | value: String, 112 | }, 113 | 114 | /// Forced panic. 115 | #[error("runtime panic: {message}")] 116 | Panic { 117 | /// The panic message. 118 | message: String, 119 | }, 120 | } 121 | 122 | // ensure the size of error is ≤56 bytes 123 | const _: usize = 56 - std::mem::size_of::(); 124 | 125 | impl fmt::Display for S { 126 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 127 | self.inner.fmt(f) 128 | } 129 | } 130 | 131 | impl std::error::Error for S { 132 | fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { 133 | self.inner.source() 134 | } 135 | } 136 | 137 | impl From for Error { 138 | fn from(never: Infallible) -> Self { 139 | match never {} 140 | } 141 | } 142 | 143 | impl From> for Error { 144 | fn from(e: pest::error::Error) -> Self { 145 | Self::ParseTemplate(Box::new(e)) 146 | } 147 | } 148 | 149 | impl From for Error { 150 | fn from(e: regex_syntax::Error) -> Self { 151 | Self::InvalidRegex(e.into()) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/functions/array.rs: -------------------------------------------------------------------------------- 1 | //! Array functions. 2 | 3 | use super::{Arguments, Function, args_1, args_2, args_3}; 4 | use crate::{ 5 | array::{Array, Permutation}, 6 | error::Error, 7 | eval::{C, CompileContext}, 8 | span::{ResultExt as _, S, Span, SpanExt as _}, 9 | value::Value, 10 | }; 11 | use std::{cmp::Ordering, sync::Arc}; 12 | 13 | /// The array constructor. 14 | #[derive(Debug)] 15 | pub struct ArrayConstructor; 16 | 17 | impl Function for ArrayConstructor { 18 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 19 | Ok(C::Constant(Value::Array(Array::from_values( 20 | args.into_iter().map(|arg| arg.inner), 21 | )))) 22 | } 23 | } 24 | 25 | /// The array subscript operator. 26 | #[derive(Debug)] 27 | pub struct Subscript; 28 | 29 | impl Function for Subscript { 30 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 31 | let (base, index) = args_2::(span, args, None, None)?; 32 | Ok(C::Constant(if index == 0 || index > base.len() { 33 | Value::Null 34 | } else { 35 | base.get(index - 1) 36 | })) 37 | } 38 | } 39 | 40 | /// The `generate_series` SQL function. 41 | #[derive(Debug)] 42 | pub struct GenerateSeries; 43 | 44 | impl Function for GenerateSeries { 45 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 46 | let (start, end, step) = args_3::(span, args, None, None, Some(Value::Number(1.into())))?; 47 | let len_number = (|| end.sql_sub(&start)?.sql_add(&step)?.sql_div(&step))().span_err(span)?; 48 | 49 | let len = if len_number.sql_sign() == Ordering::Greater { 50 | len_number 51 | .try_into() 52 | .map_err(|_| Error::InvalidArguments("generated series will be too long".to_owned()).span(span))? 53 | } else { 54 | 0 55 | }; 56 | 57 | Ok(C::Constant(Value::Array(Array::new_series(start, step, len)))) 58 | } 59 | } 60 | 61 | /// The `rand.shuffle` SQL function. 62 | #[derive(Debug)] 63 | pub struct Shuffle; 64 | 65 | impl Function for Shuffle { 66 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 67 | let array = args_1::(span, args, None)?; 68 | Ok(C::RandShuffle { 69 | permutation: Box::new(Permutation::prepare(array.len())), 70 | inner: Arc::new(array), 71 | }) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/functions/codec.rs: -------------------------------------------------------------------------------- 1 | //! Encoding and decoding functions. 2 | 3 | use data_encoding::{BASE64, BASE64URL_NOPAD, Encoding}; 4 | use data_encoding_macro::new_encoding; 5 | 6 | use super::{Arguments, Function, args_1}; 7 | use crate::{ 8 | bytes::ByteString, 9 | error::Error, 10 | eval::{C, CompileContext}, 11 | span::{ResultExt, S, Span}, 12 | }; 13 | 14 | const HEX_ENCODING: Encoding = new_encoding! { 15 | symbols: "0123456789ABCDEF", 16 | translate_from: "abcdef", 17 | translate_to: "ABCDEF", 18 | ignore: " \t\r\n", 19 | }; 20 | 21 | const BASE64_ENCODING: Encoding = new_encoding! { 22 | symbols: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", 23 | translate_from: "-_", 24 | translate_to: "+/", 25 | ignore: " \t\r\n=", 26 | }; 27 | 28 | //------------------------------------------------------------------------------ 29 | 30 | /// The `decode.*` SQL functions 31 | #[derive(Debug)] 32 | pub struct Decode { 33 | encoding: &'static Encoding, 34 | } 35 | 36 | /// The `decode.hex` (a.k.a. `x`) SQL function. 37 | pub const DECODE_HEX: Decode = Decode { 38 | encoding: &HEX_ENCODING, 39 | }; 40 | /// The `decode.base64` and `decode.base64url` SQL functions. 41 | pub const DECODE_BASE64: Decode = Decode { 42 | encoding: &BASE64_ENCODING, 43 | }; 44 | 45 | impl Function for Decode { 46 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 47 | let encoded = args_1::(span, args, None)?; 48 | let decoded = self.encoding.decode(encoded.as_bytes()).span_err(span)?; 49 | Ok(C::Constant(decoded.into())) 50 | } 51 | } 52 | 53 | //------------------------------------------------------------------------------ 54 | 55 | /// The `encode.*` SQL functions 56 | #[derive(Debug)] 57 | pub struct Encode { 58 | encoding: &'static Encoding, 59 | } 60 | 61 | /// The `encode.hex` SQL function. 62 | pub const ENCODE_HEX: Encode = Encode { 63 | encoding: &HEX_ENCODING, 64 | }; 65 | /// The `encode.base64` SQL function. 66 | pub const ENCODE_BASE64: Encode = Encode { encoding: &BASE64 }; 67 | /// The `encode.base64url` SQL function. 68 | pub const ENCODE_BASE64URL: Encode = Encode { 69 | encoding: &BASE64URL_NOPAD, 70 | }; 71 | 72 | impl Function for Encode { 73 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 74 | let decoded = args_1::(span, args, None)?; 75 | let encoded = self.encoding.encode(decoded.as_bytes()); 76 | Ok(C::Constant(encoded.into())) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/functions/debug.rs: -------------------------------------------------------------------------------- 1 | //! Debug functions. 2 | 3 | use super::{Arguments, Function}; 4 | use crate::{ 5 | error::Error, 6 | eval::{C, CompileContext}, 7 | span::{S, Span, SpanExt}, 8 | }; 9 | 10 | /// The `debug.panic` function. 11 | #[derive(Debug)] 12 | pub struct Panic; 13 | 14 | impl Function for Panic { 15 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 16 | use std::fmt::Write; 17 | let mut message = String::new(); 18 | for (arg, i) in args.into_iter().zip(1..) { 19 | write!(&mut message, "\n {}. {}", i, arg.inner).unwrap(); 20 | } 21 | Err(Error::Panic { message }.span(span)) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/functions/mod.rs: -------------------------------------------------------------------------------- 1 | //! Defines functions for evaluation. 2 | 3 | use crate::{ 4 | error::Error, 5 | eval::{C, CompileContext}, 6 | span::{ResultExt, S, Span, SpanExt}, 7 | value::Value, 8 | }; 9 | 10 | use std::{convert::TryFrom, fmt::Debug}; 11 | 12 | pub mod array; 13 | pub mod codec; 14 | pub mod debug; 15 | pub mod ops; 16 | pub mod rand; 17 | pub mod string; 18 | pub mod time; 19 | 20 | /// Container of the arguments passed to functions. 21 | pub type Arguments = smallvec::SmallVec<[S; 2]>; 22 | 23 | /// An SQL function. 24 | pub trait Function: Sync + Debug { 25 | /// Compiles or evaluates this function taking the provided arguments. 26 | fn compile(&self, ctx: &CompileContext, span: Span, args: Arguments) -> Result>; 27 | } 28 | 29 | trait TryFromSpannedValue: Sized { 30 | fn try_from_spanned_value(value: S) -> Result>; 31 | } 32 | 33 | impl TryFromSpannedValue for T 34 | where 35 | T: TryFrom, 36 | Error: From, 37 | { 38 | fn try_from_spanned_value(value: S) -> Result> { 39 | let span = value.span; 40 | Self::try_from(value.inner).span_err(span) 41 | } 42 | } 43 | 44 | impl TryFromSpannedValue for S { 45 | fn try_from_spanned_value(value: S) -> Result> { 46 | String::try_from(value.inner).span_ok_err(value.span) 47 | } 48 | } 49 | 50 | impl TryFromSpannedValue for S { 51 | fn try_from_spanned_value(value: S) -> Result> { 52 | Ok(value) 53 | } 54 | } 55 | 56 | macro_rules! declare_arg_fn { 57 | ( 58 | $(#[$meta:meta])* 59 | fn $name:ident($($def:ident: $ty:ident),+); 60 | ) => { 61 | $(#[$meta])* 62 | fn $name<$($ty),+>(span: Span, args: Arguments, $($def: Option<$ty>),+) -> Result<($($ty),+), S> 63 | where 64 | $($ty: TryFromSpannedValue,)+ 65 | { 66 | let mut it = args.into_iter(); 67 | $( 68 | let $def = if let Some(arg) = it.next() { 69 | $ty::try_from_spanned_value(arg) 70 | } else { 71 | $def.ok_or(Error::NotEnoughArguments.span(span)) 72 | }?; 73 | )+ 74 | Ok(($($def),+)) 75 | } 76 | } 77 | } 78 | 79 | declare_arg_fn! { 80 | /// Extracts one value from the list of arguments. 81 | // ALLOW_REASON: we *do* want args_1 to return the value instead of 1-tuple. 82 | #[allow(unused_parens)] 83 | fn args_1(d1: T1); 84 | } 85 | declare_arg_fn! { 86 | /// Extracts two values from the list of arguments. 87 | fn args_2(d1: T1, d2: T2); 88 | } 89 | declare_arg_fn! { 90 | /// Extracts three values from the list of arguments. 91 | fn args_3(d1: T1, d2: T2, d3: T3); 92 | } 93 | declare_arg_fn! { 94 | /// Extracts four values from the list of arguments. 95 | fn args_4(d1: T1, d2: T2, d3: T3, d4: T4); 96 | } 97 | 98 | /// Converts a slice of arguments all into a specific type. 99 | fn iter_args(args: Arguments) -> impl Iterator>> 100 | where 101 | T: TryFromSpannedValue, 102 | { 103 | args.into_iter().map(T::try_from_spanned_value) 104 | } 105 | 106 | fn require(span: Span, cond: bool, cause: impl FnOnce() -> String) -> Result<(), S> { 107 | if cond { 108 | Ok(()) 109 | } else { 110 | Err(Error::InvalidArguments(cause()).span(span)) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/functions/ops.rs: -------------------------------------------------------------------------------- 1 | //! Numerical and logical functions. 2 | 3 | use super::{Arguments, Function, args_1, args_2, iter_args}; 4 | use crate::{ 5 | error::Error, 6 | eval::{C, CompileContext}, 7 | span::{ResultExt, S, Span}, 8 | value::Value, 9 | }; 10 | use std::cmp::Ordering; 11 | 12 | //------------------------------------------------------------------------------ 13 | 14 | /// The unary negation SQL function 15 | #[derive(Debug)] 16 | pub struct Neg; 17 | 18 | impl Function for Neg { 19 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 20 | let inner = args_1::(span, args, None)?; 21 | Ok(C::Constant(inner.sql_neg().span_err(span)?)) 22 | } 23 | } 24 | 25 | //------------------------------------------------------------------------------ 26 | 27 | /// The value comparison (`<`, `=`, `>`, `<=`, `<>`, `>=`) SQL functions. 28 | #[derive(Debug)] 29 | pub struct Compare { 30 | /// Whether a less-than result is considered TRUE. 31 | lt: bool, 32 | /// Whether an equals result is considered TRUE. 33 | eq: bool, 34 | /// Whether a greater-than result is considered TRUE. 35 | gt: bool, 36 | } 37 | 38 | /// The `<` SQL function. 39 | pub const LT: Compare = Compare { 40 | lt: true, 41 | eq: false, 42 | gt: false, 43 | }; 44 | /// The `=` SQL function. 45 | pub const EQ: Compare = Compare { 46 | lt: false, 47 | eq: true, 48 | gt: false, 49 | }; 50 | /// The `>` SQL function. 51 | pub const GT: Compare = Compare { 52 | lt: false, 53 | eq: false, 54 | gt: true, 55 | }; 56 | /// The `<=` SQL function. 57 | pub const LE: Compare = Compare { 58 | lt: true, 59 | eq: true, 60 | gt: false, 61 | }; 62 | /// The `<>` SQL function. 63 | pub const NE: Compare = Compare { 64 | lt: true, 65 | eq: false, 66 | gt: true, 67 | }; 68 | /// The `>=` SQL function. 69 | pub const GE: Compare = Compare { 70 | lt: false, 71 | eq: true, 72 | gt: true, 73 | }; 74 | 75 | impl Function for Compare { 76 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 77 | if let [lhs, rhs] = &*args { 78 | Ok(C::Constant(match lhs.inner.sql_cmp(&rhs.inner).span_err(span)? { 79 | None => Value::Null, 80 | Some(Ordering::Less) => self.lt.into(), 81 | Some(Ordering::Equal) => self.eq.into(), 82 | Some(Ordering::Greater) => self.gt.into(), 83 | })) 84 | } else { 85 | panic!("should have exactly 2 arguments"); 86 | } 87 | } 88 | } 89 | 90 | //------------------------------------------------------------------------------ 91 | 92 | /// The identity comparison (`IS`, `IS NOT`) SQL functions. 93 | #[derive(Debug)] 94 | pub struct Identical { 95 | /// Whether an identical result is considered TRUE. 96 | eq: bool, 97 | } 98 | 99 | /// The `IS` SQL function. 100 | pub const IS: Identical = Identical { eq: true }; 101 | /// The `IS NOT` SQL function. 102 | pub const IS_NOT: Identical = Identical { eq: false }; 103 | 104 | impl Function for Identical { 105 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 106 | if let [lhs, rhs] = &*args { 107 | let is_eq = lhs.inner == rhs.inner; 108 | Ok(C::Constant((is_eq == self.eq).into())) 109 | } else { 110 | panic!("should have exactly 2 arguments"); 111 | } 112 | } 113 | } 114 | 115 | //------------------------------------------------------------------------------ 116 | 117 | /// The logical `NOT` SQL function. 118 | #[derive(Debug)] 119 | pub struct Not; 120 | 121 | impl Function for Not { 122 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 123 | let inner = args_1::>(span, args, None)?; 124 | Ok(C::Constant(inner.map(|b| !b).into())) 125 | } 126 | } 127 | 128 | //------------------------------------------------------------------------------ 129 | 130 | /// The bitwise-NOT `~` SQL function. 131 | #[derive(Debug)] 132 | pub struct BitNot; 133 | 134 | impl Function for BitNot { 135 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 136 | let inner = args_1::(span, args, None)?; 137 | Ok(C::Constant((!inner).into())) 138 | } 139 | } 140 | 141 | //------------------------------------------------------------------------------ 142 | 143 | /// The logical `AND`/`OR` SQL functions. 144 | #[derive(Debug)] 145 | pub struct Logic { 146 | /// The identity value. True means `AND` and false means `OR`. 147 | identity: bool, 148 | } 149 | 150 | /// The logical `AND` SQL function. 151 | pub const AND: Logic = Logic { identity: true }; 152 | /// The logical `OR` SQL function. 153 | pub const OR: Logic = Logic { identity: false }; 154 | 155 | impl Function for Logic { 156 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 157 | let mut result = Some(self.identity); 158 | 159 | for arg in iter_args::>(args) { 160 | if let Some(v) = arg? { 161 | if v == self.identity { 162 | continue; 163 | } 164 | return Ok(C::Constant(v.into())); 165 | } 166 | result = None; 167 | } 168 | Ok(C::Constant(result.into())) 169 | } 170 | } 171 | 172 | //------------------------------------------------------------------------------ 173 | 174 | /// The arithmetic (`+`, `-`, `*`, `/`) SQL functions. 175 | #[derive(Debug)] 176 | pub enum Arith { 177 | /// Addition (`+`) 178 | Add, 179 | /// Subtraction (`-`) 180 | Sub, 181 | /// Multiplication (`*`) 182 | Mul, 183 | /// Floating-point division (`/`) 184 | FloatDiv, 185 | } 186 | 187 | impl Function for Arith { 188 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 189 | let func = match self { 190 | Self::Add => Value::sql_add, 191 | Self::Sub => Value::sql_sub, 192 | Self::Mul => Value::sql_mul, 193 | Self::FloatDiv => Value::sql_float_div, 194 | }; 195 | 196 | let result = args.into_iter().try_fold(None, |accum, cur| -> Result<_, S> { 197 | Ok(Some(if let Some(prev) = accum { 198 | func(&prev, &cur.inner).span_err(cur.span)? 199 | } else { 200 | cur.inner 201 | })) 202 | }); 203 | Ok(C::Constant(result?.expect("at least 1 argument"))) 204 | } 205 | } 206 | 207 | //------------------------------------------------------------------------------ 208 | 209 | /// The bitwise binary (`&`, `|`, `^`) SQL functions. 210 | #[derive(Debug)] 211 | pub enum Bitwise { 212 | /// Bitwise-AND (`&`) 213 | And, 214 | /// Bitwise-OR (`|`) 215 | Or, 216 | /// Bitwise-XOR (`^`) 217 | Xor, 218 | } 219 | 220 | impl Function for Bitwise { 221 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 222 | use std::ops::{BitAnd, BitOr, BitXor}; 223 | 224 | let (func, init): (fn(i128, i128) -> i128, _) = match self { 225 | Self::And => (i128::bitand, -1), 226 | Self::Or => (i128::bitor, 0), 227 | Self::Xor => (i128::bitxor, 0), 228 | }; 229 | 230 | let result = iter_args::(args).try_fold(init, |a, b| b.map(|bb| func(a, bb)))?; 231 | Ok(C::Constant(result.into())) 232 | } 233 | } 234 | 235 | //------------------------------------------------------------------------------ 236 | 237 | /// The extremum (`least`, `greatest`) SQL functions. 238 | #[derive(Debug)] 239 | pub struct Extremum { 240 | /// The order to drive the extremum. 241 | order: Ordering, 242 | } 243 | 244 | /// The `greatest` SQL function. 245 | pub const GREATEST: Extremum = Extremum { 246 | order: Ordering::Greater, 247 | }; 248 | /// The `least` SQL function. 249 | pub const LEAST: Extremum = Extremum { order: Ordering::Less }; 250 | 251 | impl Function for Extremum { 252 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 253 | let mut res = Value::Null; 254 | for value in args { 255 | let should_replace = if let Some(order) = value.inner.sql_cmp(&res).span_err(value.span)? { 256 | order == self.order 257 | } else { 258 | res == Value::Null 259 | }; 260 | if should_replace { 261 | res = value.inner; 262 | } 263 | } 264 | Ok(C::Constant(res)) 265 | } 266 | } 267 | 268 | //------------------------------------------------------------------------------ 269 | 270 | /// The `round` SQL function. 271 | #[derive(Debug)] 272 | pub struct Round; 273 | 274 | impl Function for Round { 275 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 276 | let (value, digits) = args_2::(span, args, None, Some(0))?; 277 | let scale = 10.0_f64.powi(digits); 278 | let result = if scale.is_finite() { 279 | (value * scale).round() / scale 280 | } else { 281 | value 282 | }; 283 | Ok(C::Constant(Value::from_finite_f64(result))) 284 | } 285 | } 286 | 287 | //------------------------------------------------------------------------------ 288 | 289 | /// The `div` SQL function. 290 | #[derive(Debug)] 291 | pub struct Div; 292 | 293 | impl Function for Div { 294 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 295 | let (n, d) = args_2::(span, args, None, None)?; 296 | Ok(C::Constant(n.sql_div(&d).span_err(span)?)) 297 | } 298 | } 299 | 300 | /// The `mod` SQL function. 301 | #[derive(Debug)] 302 | pub struct Mod; 303 | 304 | impl Function for Mod { 305 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 306 | let (n, d) = args_2::(span, args, None, None)?; 307 | Ok(C::Constant(n.sql_rem(&d).span_err(span)?)) 308 | } 309 | } 310 | 311 | //------------------------------------------------------------------------------ 312 | 313 | /// The `coalesce` SQL function. 314 | #[derive(Debug)] 315 | pub struct Coalesce; 316 | 317 | impl Function for Coalesce { 318 | fn compile(&self, _: &CompileContext, _: Span, args: Arguments) -> Result> { 319 | let res = args 320 | .into_iter() 321 | .map(|v| v.inner) 322 | .find(|v| *v != Value::Null) 323 | .unwrap_or(Value::Null); 324 | Ok(C::Constant(res)) 325 | } 326 | } 327 | 328 | //------------------------------------------------------------------------------ 329 | 330 | /// The statement terminator `;`. 331 | #[derive(Debug)] 332 | pub struct Last; 333 | 334 | impl Function for Last { 335 | fn compile(&self, _: &CompileContext, _: Span, mut args: Arguments) -> Result> { 336 | Ok(C::Constant(args.pop().expect("at least one expression").inner)) 337 | } 338 | } 339 | -------------------------------------------------------------------------------- /src/functions/rand.rs: -------------------------------------------------------------------------------- 1 | //! Random generator functions. 2 | 3 | use super::{Arguments, Function, args_1, args_2, args_3, require}; 4 | use crate::{ 5 | array::Array, 6 | error::Error, 7 | eval::{C, CompileContext}, 8 | number::Number, 9 | span::{ResultExt, S, Span, SpanExt}, 10 | }; 11 | use std::convert::TryFrom as _; 12 | 13 | //------------------------------------------------------------------------------ 14 | 15 | /// The `rand.range` SQL function. 16 | #[derive(Debug)] 17 | pub struct Range; 18 | 19 | /// The `rand.range_inclusive` SQL function. 20 | #[derive(Debug)] 21 | pub struct RangeInclusive; 22 | 23 | macro_rules! impl_rand_range { 24 | ($name:expr, $cmp:tt, $new:ident) => { 25 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 26 | let (lower, upper) = args_2::(span, args, None, None)?; 27 | require(span, lower $cmp upper, || format!("assertion failed: {} {} {}", lower, stringify!($cmp), upper))?; 28 | if let (Ok(a), Ok(b)) = (u64::try_from(lower), u64::try_from(upper)) { 29 | Ok(C::RandUniformU64(rand_distr::Uniform::$new(a, b))) 30 | } else if let (Ok(a), Ok(b)) = (i64::try_from(lower), i64::try_from(upper)) { 31 | Ok(C::RandUniformI64(rand_distr::Uniform::$new(a, b))) 32 | } else { 33 | Err(Error::IntegerOverflow(format!("{}({}, {})", $name, lower, upper)).span(span)) 34 | } 35 | } 36 | } 37 | } 38 | 39 | impl Function for Range { 40 | impl_rand_range!("rand.range", <, new); 41 | } 42 | 43 | impl Function for RangeInclusive { 44 | impl_rand_range!("rand.range_inclusive", <=, new_inclusive); 45 | } 46 | 47 | //------------------------------------------------------------------------------ 48 | 49 | /// The `rand.uniform` SQL function. 50 | #[derive(Debug)] 51 | pub struct Uniform; 52 | 53 | /// The `rand.uniform_inclusive` SQL function. 54 | #[derive(Debug)] 55 | pub struct UniformInclusive; 56 | 57 | macro_rules! impl_rand_uniform { 58 | ($cmp:tt, $new:ident) => { 59 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 60 | let (lower, upper) = args_2::(span, args, None, None)?; 61 | require(span, lower $cmp upper, || format!("assertion failed: {} {} {}", lower, stringify!($cmp), upper))?; 62 | Ok(C::RandUniformF64(rand_distr::Uniform::$new(lower, upper))) 63 | } 64 | } 65 | } 66 | 67 | impl Function for Uniform { 68 | impl_rand_uniform!(<, new); 69 | } 70 | 71 | impl Function for UniformInclusive { 72 | impl_rand_uniform!(<=, new_inclusive); 73 | } 74 | 75 | //------------------------------------------------------------------------------ 76 | 77 | /// The `rand.zipf` SQL function. 78 | #[derive(Debug)] 79 | pub struct Zipf; 80 | 81 | impl Function for Zipf { 82 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 83 | let (count, exponent) = args_2(span, args, None, None)?; 84 | Ok(C::RandZipf(rand_distr::Zipf::new(count, exponent).map_err(|e| { 85 | Error::InvalidArguments(format!("{e} (n = {count}, s = {exponent})")).span(span) 86 | })?)) 87 | } 88 | } 89 | 90 | //------------------------------------------------------------------------------ 91 | 92 | /// The `rand.log_normal` SQL function. 93 | #[derive(Debug)] 94 | pub struct LogNormal; 95 | 96 | impl Function for LogNormal { 97 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 98 | let (mean, std_dev) = args_2::(span, args, None, None)?; 99 | let std_dev = std_dev.abs(); 100 | Ok(C::RandLogNormal(rand_distr::LogNormal::new(mean, std_dev).map_err( 101 | |e| Error::InvalidArguments(format!("standard deviation ({std_dev}) {e}")).span(span), 102 | )?)) 103 | } 104 | } 105 | 106 | //------------------------------------------------------------------------------ 107 | 108 | /// The `rand.bool` SQL function. 109 | #[derive(Debug)] 110 | pub struct Bool; 111 | 112 | impl Function for Bool { 113 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 114 | let p = args_1(span, args, None)?; 115 | Ok(C::RandBool(rand_distr::Bernoulli::new(p).map_err(|e| { 116 | Error::InvalidArguments(format!("probability ({p}) {e}")).span(span) 117 | })?)) 118 | } 119 | } 120 | 121 | //------------------------------------------------------------------------------ 122 | 123 | /// The `rand.finite_f32` SQL function. 124 | #[derive(Debug)] 125 | pub struct FiniteF32; 126 | 127 | /// The `rand.finite_f64` SQL function. 128 | #[derive(Debug)] 129 | pub struct FiniteF64; 130 | 131 | /// The `rand.u31_timestamp` SQL function. 132 | #[derive(Debug)] 133 | pub struct U31Timestamp; 134 | 135 | /// The `rand.uuid` SQL function. 136 | #[derive(Debug)] 137 | pub struct Uuid; 138 | 139 | impl Function for FiniteF32 { 140 | fn compile(&self, _: &CompileContext, _: Span, _: Arguments) -> Result> { 141 | Ok(C::RandFiniteF32(rand_distr::Uniform::new(0, 0xff00_0000))) 142 | } 143 | } 144 | 145 | impl Function for FiniteF64 { 146 | fn compile(&self, _: &CompileContext, _: Span, _: Arguments) -> Result> { 147 | Ok(C::RandFiniteF64(rand_distr::Uniform::new(0, 0xffe0_0000_0000_0000))) 148 | } 149 | } 150 | 151 | impl Function for U31Timestamp { 152 | fn compile(&self, _: &CompileContext, _: Span, _: Arguments) -> Result> { 153 | Ok(C::RandU31Timestamp(rand_distr::Uniform::new(1, 0x8000_0000))) 154 | } 155 | } 156 | 157 | impl Function for Uuid { 158 | fn compile(&self, _: &CompileContext, _: Span, _: Arguments) -> Result> { 159 | Ok(C::RandUuid) 160 | } 161 | } 162 | 163 | //------------------------------------------------------------------------------ 164 | 165 | /// The `rand.regex` SQL function. 166 | #[derive(Debug)] 167 | pub struct Regex; 168 | 169 | impl Function for Regex { 170 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 171 | let (regex, flags, max_repeat) = args_3::(span, args, None, Some(String::new()), Some(100))?; 172 | let generator = compile_regex_generator(®ex, &flags, max_repeat).span_err(span)?; 173 | Ok(C::RandRegex(generator)) 174 | } 175 | } 176 | 177 | fn compile_regex_generator(regex: &str, flags: &str, max_repeat: u32) -> Result { 178 | let mut parser = regex_syntax::ParserBuilder::new(); 179 | for flag in flags.chars() { 180 | match flag { 181 | 'o' => parser.octal(true), 182 | 'a' => parser.utf8(false).unicode(false), 183 | 'u' => parser.utf8(true).unicode(true), 184 | 'x' => parser.ignore_whitespace(true), 185 | 'i' => parser.case_insensitive(true), 186 | 'm' => parser.multi_line(true), 187 | 's' => parser.dot_matches_new_line(true), 188 | 'U' => parser.swap_greed(true), 189 | _ => return Err(Error::UnknownRegexFlag(flag)), 190 | }; 191 | } 192 | 193 | let hir = parser.build().parse(regex)?; 194 | Ok(rand_regex::Regex::with_hir(hir, max_repeat)?) 195 | } 196 | 197 | //------------------------------------------------------------------------------ 198 | 199 | /// The `rand.weighted` SQL function. 200 | #[derive(Debug)] 201 | pub struct Weighted; 202 | 203 | impl Function for Weighted { 204 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 205 | let weights = args_1::(span, args, None)? 206 | .iter() 207 | .map(f64::try_from) 208 | .collect::>() 209 | .span_err(span)?; 210 | Ok(C::RandWeighted( 211 | rand_distr::weighted_alias::WeightedAliasIndex::new(weights) 212 | .map_err(|e| Error::InvalidArguments(e.to_string()).span(span))?, 213 | )) 214 | } 215 | } 216 | 217 | //------------------------------------------------------------------------------ 218 | 219 | #[cfg(test)] 220 | mod tests { 221 | use super::*; 222 | use crate::value::Value; 223 | 224 | #[test] 225 | fn test_compile_checks() { 226 | struct TestCase { 227 | name: &'static str, 228 | function: &'static dyn Function, 229 | args: Vec, 230 | error: &'static str, 231 | } 232 | 233 | fn array_of_numbers(values: &[i64]) -> Value { 234 | Value::Array(Array::from_values(values.iter().map(|v| Value::Number((*v).into())))) 235 | } 236 | 237 | let test_cases = vec![ 238 | TestCase { 239 | name: "rand.weighted expected an array", 240 | function: &Weighted, 241 | args: vec![30.into()], 242 | error: "cannot convert 30 into array", 243 | }, 244 | TestCase { 245 | name: "rand.weighted must not be an empty array", 246 | function: &Weighted, 247 | args: vec![array_of_numbers(&[])], 248 | error: "No weights provided in distribution", 249 | }, 250 | TestCase { 251 | name: "rand.weighted must not have negative weight", 252 | function: &Weighted, 253 | args: vec![array_of_numbers(&[55, -5])], 254 | error: "A weight is invalid in distribution", 255 | }, 256 | ]; 257 | 258 | let ctx = CompileContext::new(0); 259 | let span = Span::default(); 260 | for tc in test_cases { 261 | let args = tc.args.into_iter().map(|v| v.no_span()).collect(); 262 | let actual_error = tc 263 | .function 264 | .compile(&ctx, span, args) 265 | .expect_err(tc.name) 266 | .inner 267 | .to_string(); 268 | assert_eq!(tc.error, actual_error, "{}", tc.name); 269 | } 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /src/functions/string.rs: -------------------------------------------------------------------------------- 1 | //! String functions. 2 | 3 | use super::{Arguments, Function, args_1, args_3, args_4}; 4 | use crate::{ 5 | bytes::ByteString, 6 | error::Error, 7 | eval::{C, CompileContext}, 8 | span::{S, Span, SpanExt}, 9 | value::Value, 10 | }; 11 | use std::{convert::TryInto, ops::Range}; 12 | 13 | //------------------------------------------------------------------------------ 14 | 15 | /// Converts the SQL "start, length" representation of a range of characters to 16 | /// Rust's range representation: 17 | /// 18 | /// * the index is converted from 1-based to 0-based. 19 | /// * negative length is treated as the same as zero length. 20 | /// * the range is clamped within `0..=isize::MAX`. 21 | fn sql_start_length_to_range(start: isize, length: isize) -> Range { 22 | let start = start - 1; 23 | let end = start.saturating_add(length.max(0)); 24 | let start = start.try_into().unwrap_or(0_usize); 25 | let end = end.try_into().unwrap_or(start); 26 | start..end 27 | } 28 | 29 | /// The unit used to index a (byte) string. 30 | #[derive(Debug, Copy, Clone)] 31 | pub enum Unit { 32 | /// Index the string using characters (code points). 33 | Characters, 34 | /// Index the string using bytes (code units). 35 | Octets, 36 | } 37 | 38 | impl Unit { 39 | fn parse_sql_range(self, input: &ByteString, start: isize, length: isize) -> Range { 40 | let range = sql_start_length_to_range(start, length); 41 | match self { 42 | Self::Octets => input.clamp_range(range), 43 | Self::Characters => input.char_range(range), 44 | } 45 | } 46 | 47 | fn length_of(self, input: &ByteString) -> usize { 48 | match self { 49 | Self::Octets => input.len(), 50 | Self::Characters => input.char_len(), 51 | } 52 | } 53 | } 54 | 55 | #[test] 56 | fn test_parse_sql_range() { 57 | let b = ByteString::from("123456789".to_owned()); 58 | for unit in &[Unit::Octets, Unit::Characters] { 59 | assert_eq!(unit.parse_sql_range(&b, 1, isize::MAX), 0..9); 60 | assert_eq!(unit.parse_sql_range(&b, 0, isize::MAX), 0..9); 61 | assert_eq!(unit.parse_sql_range(&b, -100, isize::MAX), 0..9); 62 | assert_eq!(unit.parse_sql_range(&b, 3, isize::MAX), 2..9); 63 | assert_eq!(unit.parse_sql_range(&b, 9, isize::MAX), 8..9); 64 | assert_eq!(unit.parse_sql_range(&b, 100, isize::MAX), 9..9); 65 | 66 | assert_eq!(unit.parse_sql_range(&b, 1, 1), 0..1); 67 | assert_eq!(unit.parse_sql_range(&b, 3, 5), 2..7); 68 | assert_eq!(unit.parse_sql_range(&b, 5, 99), 4..9); 69 | assert_eq!(unit.parse_sql_range(&b, 7, 0), 6..6); 70 | assert_eq!(unit.parse_sql_range(&b, 9, -99), 8..8); 71 | assert_eq!(unit.parse_sql_range(&b, 0, 5), 0..4); 72 | assert_eq!(unit.parse_sql_range(&b, -70, 77), 0..6); 73 | assert_eq!(unit.parse_sql_range(&b, 70, 77), 9..9); 74 | assert_eq!(unit.parse_sql_range(&b, -70, -77), 0..0); 75 | assert_eq!(unit.parse_sql_range(&b, 70, -77), 9..9); 76 | } 77 | 78 | let b = ByteString::from("ßs≠🥰".to_owned()); 79 | // char 1 (ß) = index 0 80 | // char 2 (s) = index 2 81 | // char 3 (≠) = index 3 82 | // char 4 (🥰) = index 6 83 | // char ∞ = index 10 84 | assert_eq!(Unit::Characters.parse_sql_range(&b, 1, isize::MAX), 0..10); 85 | assert_eq!(Unit::Characters.parse_sql_range(&b, 2, isize::MAX), 2..10); 86 | assert_eq!(Unit::Characters.parse_sql_range(&b, 3, isize::MAX), 3..10); 87 | assert_eq!(Unit::Characters.parse_sql_range(&b, 4, isize::MAX), 6..10); 88 | assert_eq!(Unit::Characters.parse_sql_range(&b, 5, isize::MAX), 10..10); 89 | assert_eq!(Unit::Characters.parse_sql_range(&b, 0, isize::MAX), 0..10); 90 | assert_eq!(Unit::Characters.parse_sql_range(&b, 100, isize::MAX), 10..10); 91 | assert_eq!(Unit::Characters.parse_sql_range(&b, -100, isize::MAX), 0..10); 92 | 93 | assert_eq!(Unit::Characters.parse_sql_range(&b, 1, 1), 0..2); 94 | assert_eq!(Unit::Characters.parse_sql_range(&b, 2, 2), 2..6); 95 | assert_eq!(Unit::Characters.parse_sql_range(&b, 3, 99), 3..10); 96 | assert_eq!(Unit::Characters.parse_sql_range(&b, 4, 0), 6..6); 97 | assert_eq!(Unit::Characters.parse_sql_range(&b, 5, -99), 10..10); 98 | assert_eq!(Unit::Characters.parse_sql_range(&b, -70, 77), 0..10); 99 | assert_eq!(Unit::Characters.parse_sql_range(&b, 70, 77), 10..10); 100 | assert_eq!(Unit::Characters.parse_sql_range(&b, -70, -77), 0..0); 101 | assert_eq!(Unit::Characters.parse_sql_range(&b, 70, -77), 10..10); 102 | 103 | let b = ByteString::from("a-úţf".to_owned()); 104 | // char 1 (a) = index 0 105 | // char 2 (-) = index 1 106 | // char 3 (ú) = index 2 107 | // char 4 (ţ) = index 4 108 | // char 5 (f) = index 6 109 | // char ∞ = index 7 110 | assert_eq!(Unit::Characters.parse_sql_range(&b, 1, isize::MAX), 0..7); 111 | assert_eq!(Unit::Characters.parse_sql_range(&b, 3, isize::MAX), 2..7); 112 | assert_eq!(Unit::Characters.parse_sql_range(&b, 5, isize::MAX), 6..7); 113 | assert_eq!(Unit::Characters.parse_sql_range(&b, 2, 3), 1..6); 114 | assert_eq!(Unit::Characters.parse_sql_range(&b, 4, 1), 4..6); 115 | assert_eq!(Unit::Characters.parse_sql_range(&b, 2, 0), 1..1); 116 | assert_eq!(Unit::Characters.parse_sql_range(&b, 4, 0), 4..4); 117 | } 118 | 119 | //------------------------------------------------------------------------------ 120 | 121 | /// The `substring` SQL function. 122 | #[derive(Debug)] 123 | pub struct Substring( 124 | /// The string unit used by the function. 125 | pub Unit, 126 | ); 127 | 128 | impl Function for Substring { 129 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 130 | let (mut input, start, length) = args_3(span, args, None, None, Some(None))?; 131 | let range = self.0.parse_sql_range(&input, start, length.unwrap_or(0)); 132 | if length.is_some() { 133 | input.truncate(range.end); 134 | } 135 | if range.start > 0 { 136 | input.drain_init(range.start); 137 | } 138 | Ok(C::Constant(input.into())) 139 | } 140 | } 141 | 142 | //------------------------------------------------------------------------------ 143 | 144 | /// The `char_length` SQL function. 145 | #[derive(Debug)] 146 | pub struct CharLength; 147 | 148 | /// The `octet_length` SQL function. 149 | #[derive(Debug)] 150 | pub struct OctetLength; 151 | 152 | impl Function for CharLength { 153 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 154 | let input = args_1::(span, args, None)?; 155 | Ok(C::Constant(input.char_len().into())) 156 | } 157 | } 158 | 159 | impl Function for OctetLength { 160 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 161 | let input = args_1::(span, args, None)?; 162 | Ok(C::Constant(input.len().into())) 163 | } 164 | } 165 | 166 | //------------------------------------------------------------------------------ 167 | 168 | /// The `overlay` SQL function. 169 | #[derive(Debug)] 170 | pub struct Overlay( 171 | /// The string unit used by the function. 172 | pub Unit, 173 | ); 174 | 175 | impl Function for Overlay { 176 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 177 | let (mut input, placing, start, length) = args_4(span, args, None, None, None, Some(None))?; 178 | // length should never > isize::MAX. 179 | let length = length.unwrap_or_else(|| self.0.length_of(&placing).try_into().unwrap()); 180 | let range = self.0.parse_sql_range(&input, start, length); 181 | input.splice(range, placing); 182 | Ok(C::Constant(input.into())) 183 | } 184 | } 185 | 186 | //------------------------------------------------------------------------------ 187 | 188 | /// The string concatenation (`||`) SQL function. 189 | #[derive(Debug)] 190 | pub struct Concat; 191 | 192 | impl Function for Concat { 193 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 194 | match Value::sql_concat(args.iter().map(|arg| &arg.inner)) { 195 | Ok(result) => Ok(C::Constant(result)), 196 | Err(e) => Err(e.span(span)), 197 | } 198 | } 199 | } 200 | -------------------------------------------------------------------------------- /src/functions/time.rs: -------------------------------------------------------------------------------- 1 | //! Time functions. 2 | 3 | use super::{Arguments, Function, args_1}; 4 | use crate::{ 5 | error::Error, 6 | eval::{C, CompileContext}, 7 | span::{ResultExt, S, Span}, 8 | value::{TIMESTAMP_FORMAT, Value}, 9 | }; 10 | 11 | use chrono::NaiveDateTime; 12 | 13 | /// The `timestamp` SQL function 14 | #[derive(Debug)] 15 | pub struct Timestamp; 16 | 17 | impl Function for Timestamp { 18 | fn compile(&self, _: &CompileContext, span: Span, args: Arguments) -> Result> { 19 | let input = args_1::(span, args, None)?; 20 | let timestamp = NaiveDateTime::parse_from_str(&input, TIMESTAMP_FORMAT).span_err(span)?; 21 | Ok(C::Constant(Value::Timestamp(timestamp))) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/lexctr.rs: -------------------------------------------------------------------------------- 1 | //! Lexicographical counter. 2 | 3 | use std::fmt; 4 | 5 | /// A counter which prints numbers in lexicographic order and smaller numbers 6 | /// are also shorter. 7 | /// 8 | /// The output sequence is like: 9 | /// * 000, 001, …, 099, 10 | /// * 10000, 10001, …, 19999, 11 | /// * 2000000, …, 2999999, 12 | /// * … 13 | /// * 900000000000000000000, …, 999999999999999999999. 14 | /// 15 | /// It can count up to 10^(20) distinct numbers. 16 | #[derive(Debug, Copy, Clone)] 17 | pub struct LexCtr { 18 | prefix: usize, 19 | count: u64, 20 | limit: u64, 21 | } 22 | 23 | impl Default for LexCtr { 24 | fn default() -> Self { 25 | Self { 26 | prefix: 0, 27 | count: 0, 28 | limit: 100, 29 | } 30 | } 31 | } 32 | 33 | impl LexCtr { 34 | /// Increases the counter by 1. 35 | /// 36 | /// # Panics 37 | /// 38 | /// Panics if the count exceeds 10^(20). 39 | pub fn inc(&mut self) { 40 | self.count += 1; 41 | if self.count >= self.limit { 42 | self.limit *= 100; 43 | self.prefix += 1; 44 | self.count = 0; 45 | } 46 | } 47 | } 48 | 49 | impl fmt::Display for LexCtr { 50 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 51 | write!(f, "{0}{2:01$}", self.prefix, self.prefix * 2 + 2, self.count) 52 | } 53 | } 54 | 55 | #[test] 56 | fn test_lexctr() { 57 | let mut lexctr = LexCtr::default(); 58 | assert_eq!(lexctr.to_string(), "000"); 59 | lexctr.inc(); 60 | assert_eq!(lexctr.to_string(), "001"); 61 | for _ in 1..99 { 62 | lexctr.inc(); 63 | } 64 | assert_eq!(lexctr.to_string(), "099"); 65 | lexctr.inc(); 66 | assert_eq!(lexctr.to_string(), "10000"); 67 | for _ in 10000..19999 { 68 | lexctr.inc(); 69 | } 70 | assert_eq!(lexctr.to_string(), "19999"); 71 | lexctr.inc(); 72 | assert_eq!(lexctr.to_string(), "2000000"); 73 | } 74 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![warn( 2 | clippy::pedantic, 3 | missing_debug_implementations, 4 | trivial_casts, 5 | trivial_numeric_casts, 6 | unreachable_pub, 7 | variant_size_differences, 8 | missing_docs, 9 | rust_2024_compatibility, 10 | deprecated_in_future, 11 | future_incompatible, 12 | let_underscore, 13 | clippy::undocumented_unsafe_blocks, 14 | clippy::as_underscore, 15 | clippy::assertions_on_result_states, 16 | clippy::branches_sharing_code, 17 | clippy::cognitive_complexity, 18 | clippy::collection_is_never_read, 19 | clippy::dbg_macro, 20 | clippy::debug_assert_with_mut_call, 21 | clippy::derive_partial_eq_without_eq, 22 | clippy::format_push_string, 23 | clippy::if_then_some_else_none, 24 | clippy::imprecise_flops, 25 | clippy::infinite_loop, 26 | clippy::iter_on_empty_collections, 27 | clippy::iter_on_single_items, 28 | clippy::iter_with_drain, 29 | clippy::large_stack_frames, 30 | clippy::let_underscore_must_use, 31 | clippy::let_underscore_untyped, 32 | clippy::lossy_float_literal, 33 | clippy::mixed_read_write_in_expression, 34 | clippy::multiple_unsafe_ops_per_block, 35 | clippy::mutex_atomic, 36 | clippy::mutex_integer, 37 | clippy::needless_collect, 38 | clippy::needless_pass_by_ref_mut, 39 | clippy::or_fun_call, 40 | clippy::rc_buffer, 41 | clippy::read_zero_byte_vec, 42 | clippy::redundant_clone, 43 | clippy::redundant_pub_crate, 44 | clippy::redundant_type_annotations, 45 | clippy::rest_pat_in_fully_bound_structs, 46 | clippy::same_name_method, 47 | clippy::str_to_string, 48 | clippy::string_lit_as_bytes, 49 | clippy::string_to_string, 50 | clippy::suspicious_operation_groupings, 51 | clippy::todo, 52 | clippy::trivial_regex, 53 | clippy::try_err, 54 | clippy::tuple_array_conversions, 55 | clippy::unimplemented, 56 | clippy::use_self, 57 | clippy::useless_let_if_seq, 58 | clippy::verbose_file_reads 59 | )] 60 | #![allow( 61 | edition_2024_expr_fragment_specifier, 62 | if_let_rescope, 63 | tail_expr_drop_order, 64 | clippy::module_name_repetitions, 65 | clippy::must_use_candidate, 66 | clippy::missing_errors_doc, 67 | clippy::option_if_let_else, 68 | clippy::missing_panics_doc 69 | )] 70 | 71 | //! The reusable library powering `dbgen`. 72 | 73 | /// The full version of this library, for use in the CLI 74 | pub const FULL_VERSION: &str = concat!( 75 | "\nVersion: v", 76 | env!("CARGO_PKG_VERSION"), 77 | "\nCommit: ", 78 | env!("VERGEN_GIT_SHA"), 79 | "\nTarget: ", 80 | env!("VERGEN_CARGO_TARGET_TRIPLE"), 81 | ); 82 | 83 | pub mod array; 84 | pub mod bytes; 85 | #[cfg(feature = "cli")] 86 | pub mod cli; 87 | pub mod error; 88 | pub mod eval; 89 | pub mod format; 90 | pub mod functions; 91 | pub mod lexctr; 92 | pub mod number; 93 | pub mod parser; 94 | #[cfg(feature = "cli")] 95 | pub mod schemagen_cli; 96 | pub mod span; 97 | pub mod value; 98 | pub mod writer; 99 | -------------------------------------------------------------------------------- /src/parser.pest: -------------------------------------------------------------------------------- 1 | WHITESPACE = _{ " "|"\t"|"\r"|"\n"|"\x0b"|"\x0c" } 2 | COMMENT = _{ 3 | "--" ~ (!NEWLINE ~ ANY)* ~ NEWLINE | 4 | "/*" ~ !"{{" ~ (!"*/" ~ ANY)* ~ "*/" | 5 | "/*{{" ~ (!"*/" ~ !"}}*/" ~ ANY)* ~ "*/" 6 | } 7 | 8 | back_quoted = @{ "`" ~ (!"`" ~ ANY | "``")* ~ "`" } 9 | single_quoted = @{ "'" ~ (!"'" ~ ANY | "''")* ~ "'" } 10 | double_quoted = @{ "\"" ~ (!"\"" ~ ANY | "\"\"")* ~ "\"" } 11 | 12 | ident = @{ 13 | back_quoted | 14 | double_quoted | 15 | "[" ~ (!"]" ~ ANY)* ~ "]" | 16 | (ASCII_ALPHA|"_") ~ (ASCII_ALPHANUMERIC|"_")* 17 | } 18 | number = @{ 19 | ^"0x" ~ ASCII_HEX_DIGIT+ | 20 | (ASCII_DIGIT+ ~ ("." ~ ASCII_DIGIT*)? | "." ~ ASCII_DIGIT+) ~ (^"e" ~ ("+"|"-")? ~ ASCII_DIGIT+)? 21 | } 22 | 23 | b = _{ !(ASCII_ALPHANUMERIC|"_") } 24 | kw_create = @{ ^"create" ~ b } 25 | kw_table = @{ ^"table" ~ b } 26 | kw_or = @{ ^"or" ~ b } 27 | kw_and = @{ ^"and" ~ b } 28 | kw_not = @{ ^"not" ~ b } 29 | kw_is = @{ ^"is" ~ b } 30 | kw_rownum = @{ ^"rownum" ~ b } 31 | kw_subrownum = @{ ^"subrownum" ~ b } 32 | kw_null = @{ ^"null" ~ b } 33 | kw_true = @{ ^"true" ~ b } 34 | kw_false = @{ ^"false" ~ b } 35 | kw_case = @{ ^"case" ~ b } 36 | kw_when = @{ ^"when" ~ b } 37 | kw_then = @{ ^"then" ~ b } 38 | kw_else = @{ ^"else" ~ b } 39 | kw_end = @{ ^"end" ~ b } 40 | kw_timestamp = @{ ^"timestamp" ~ b } 41 | kw_interval = @{ ^"interval" ~ b } 42 | kw_week = @{ ^"week" ~ b } 43 | kw_day = @{ ^"day" ~ b } 44 | kw_hour = @{ ^"hour" ~ b } 45 | kw_minute = @{ ^"minute" ~ b } 46 | kw_second = @{ ^"second" ~ b } 47 | kw_millisecond = @{ ^"millisecond" ~ b } 48 | kw_microsecond = @{ ^"microsecond" ~ b } 49 | kw_substring = @{ ^"substring" ~ b } 50 | kw_from = @{ ^"from" ~ b } 51 | kw_for = @{ ^"for" ~ b } 52 | kw_using = @{ ^"using" ~ b } 53 | kw_characters = @{ ^"characters" ~ b } 54 | kw_octets = @{ ^"octets" ~ b } 55 | kw_overlay = @{ ^"overlay" ~ b } 56 | kw_placing = @{ ^"placing" ~ b } 57 | kw_current_timestamp = @{ ^"current_timestamp" ~ b } 58 | kw_array = @{ ^"array" ~ b } 59 | kw_each = @{ ^"each" ~ b } 60 | kw_rows = @{ ^"row" ~ ^"s"? ~ b } 61 | kw_of = @{ ^"of" ~ b } 62 | kw_generate = @{ ^"generate" ~ b } 63 | kw_x = @{ ^"x" ~ b } 64 | 65 | open_paren = @{ "(" } 66 | close_paren = @{ ")" } 67 | 68 | op_le = @{ "<=" } 69 | op_lt = @{ "<" } 70 | op_ge = @{ ">=" } 71 | op_gt = @{ ">" } 72 | op_eq = @{ "=" } 73 | op_ne = @{ "<>" } 74 | op_add = @{ "+" } 75 | op_sub = @{ "-" } 76 | op_concat = @{ "||" } 77 | op_mul = @{ "*" } 78 | op_float_div = @{ "/" } 79 | op_semicolon = @{ ";" } 80 | op_bit_and = @{ "&" } 81 | op_bit_or = @{ "|" } 82 | op_bit_xor = @{ "^" } 83 | op_bit_not = @{ "~" } 84 | op_comma = @{ "," } 85 | 86 | balanced = _{ 87 | "(" ~ balanced* ~ ")" | 88 | "[" ~ balanced* ~ "]" | 89 | "{" ~ balanced* ~ "}" | 90 | back_quoted | 91 | single_quoted | 92 | double_quoted | 93 | !("("|"["|"{"|"`"|"'"|"\""|")"|"]"|"}") ~ ANY 94 | } 95 | 96 | qname = { 97 | ident ~ ("." ~ ident){0,2} 98 | } 99 | 100 | any_text = { 101 | (!"{{" ~ !"/*{{" ~ balanced)+ 102 | } 103 | content = _{ 104 | "{{" ~ stmt ~ "}}" | "/*{{" ~ stmt ~ "}}*/" 105 | } 106 | 107 | dependency_directive_content = _{ 108 | "{{" ~ dependency_directive ~ "}}" | "/*{{" ~ dependency_directive ~ "}}*/" 109 | } 110 | dependency_directive = { 111 | kw_for ~ kw_each ~ kw_rows ~ kw_of ~ qname ~ 112 | kw_generate ~ expr ~ kw_rows ~ kw_of ~ qname 113 | } 114 | single_table = { 115 | kw_create ~ kw_table ~ qname ~ 116 | open_paren ~ (ident | op_comma | any_text | content)* ~ close_paren ~ any_text? 117 | } 118 | create_table = _{ 119 | SOI ~ content* ~ single_table ~ (dependency_directive_content ~ single_table)* ~ EOI 120 | } 121 | 122 | stmt = { 123 | expr ~ (op_semicolon ~ expr)* 124 | } 125 | expr = { 126 | ("@" ~ ident ~ ":=")* ~ expr_or 127 | } 128 | expr_or = { 129 | expr_and ~ (kw_or ~ expr_and)* 130 | } 131 | expr_and = { 132 | expr_not ~ (kw_and ~ expr_not)* 133 | } 134 | expr_not = { 135 | kw_not* ~ expr_cmp 136 | } 137 | is_not = { kw_is ~ kw_not } 138 | expr_cmp = { 139 | expr_bit_or ~ ((is_not | kw_is | op_le | op_ge | op_ne | op_lt | op_gt | op_eq) ~ expr_bit_or)? 140 | } 141 | expr_bit_or = { 142 | expr_bit_and ~ ((op_bit_or | op_bit_xor) ~ expr_bit_and)* 143 | } 144 | expr_bit_and = { 145 | expr_add ~ (op_bit_and ~ expr_add)* 146 | } 147 | expr_add = { 148 | expr_mul ~ ((op_add | op_sub | op_concat) ~ expr_mul)* 149 | } 150 | expr_mul = { 151 | expr_unary ~ ((op_mul | op_float_div) ~ expr_unary)* 152 | } 153 | expr_unary = { 154 | (op_add | op_sub | op_bit_not)* ~ expr_primary ~ ("[" ~ expr ~ "]")* 155 | } 156 | 157 | expr_primary = { 158 | kw_rownum | kw_subrownum | kw_null | kw_true | kw_false | kw_current_timestamp | 159 | expr_group | 160 | single_quoted | // string 161 | number | 162 | expr_case_value_when | 163 | expr_timestamp | 164 | expr_interval | 165 | expr_hex | 166 | expr_get_variable | 167 | expr_array | 168 | expr_substring_function | 169 | expr_overlay_function | 170 | expr_function 171 | } 172 | 173 | expr_group = { 174 | "(" ~ expr ~ ")" 175 | } 176 | 177 | expr_case_value_when = { 178 | kw_case ~ case_value_when_value? ~ 179 | (kw_when ~ case_value_when_pattern ~ kw_then ~ case_value_when_result)+ ~ 180 | (kw_else ~ case_value_when_else)? ~ 181 | kw_end 182 | } 183 | case_value_when_value = { expr } 184 | case_value_when_pattern = { expr } 185 | case_value_when_result = { stmt } 186 | case_value_when_else = { stmt } 187 | 188 | expr_timestamp = { 189 | kw_timestamp ~ expr_primary 190 | } 191 | expr_interval = { 192 | kw_interval ~ expr ~ (kw_week | kw_day | kw_hour | kw_minute | kw_second | kw_millisecond | kw_microsecond) 193 | } 194 | expr_hex = { 195 | kw_x ~ expr_primary 196 | } 197 | 198 | expr_get_variable = { 199 | "@" ~ ident ~ !":=" 200 | } 201 | 202 | expr_array = { 203 | kw_array ~ "[" ~ (expr ~ ("," ~ expr)*)? ~ "]" 204 | } 205 | 206 | expr_substring_function = { 207 | kw_substring ~ "(" ~ substring_input ~ 208 | (kw_from ~ substring_from)? ~ 209 | (kw_for ~ substring_for)? ~ 210 | (kw_using ~ (kw_octets | kw_characters))? ~ 211 | ")" 212 | } 213 | substring_input = { expr } 214 | substring_from = { expr } 215 | substring_for = { expr } 216 | 217 | expr_overlay_function = { 218 | kw_overlay ~ "(" ~ substring_input ~ 219 | kw_placing ~ overlay_placing ~ 220 | kw_from ~ substring_from ~ 221 | (kw_for ~ substring_for)? ~ 222 | (kw_using ~ (kw_octets | kw_characters))? ~ 223 | ")" 224 | } 225 | overlay_placing = { expr } 226 | 227 | expr_function = { 228 | qname ~ "(" ~ (expr ~ ("," ~ expr)*)? ~ ")" 229 | } 230 | -------------------------------------------------------------------------------- /src/span.rs: -------------------------------------------------------------------------------- 1 | //! Span of substrings from the template file, for error reporting. 2 | 3 | use crate::parser::Rule; 4 | use pest::error::{Error, ErrorVariant}; 5 | 6 | /// The span of an object, indicating the start and end offsets where the 7 | /// object was parsed from the template file. 8 | #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] 9 | pub struct Span(usize); 10 | 11 | impl Default for Span { 12 | fn default() -> Self { 13 | Self(usize::MAX) 14 | } 15 | } 16 | 17 | /// Registry of spans. 18 | #[derive(Default, Debug, Clone)] 19 | pub struct Registry(Vec>); 20 | 21 | impl Registry { 22 | /// Registers a span represented by a Pest span. 23 | pub fn register(&mut self, span: pest::Span<'_>) -> Span { 24 | let res = Span(self.0.len()); 25 | self.0.push(Error::new_from_span( 26 | ErrorVariant::CustomError { message: String::new() }, 27 | span, 28 | )); 29 | res 30 | } 31 | 32 | /// Describes a spanned error as a human-readable string. 33 | pub fn describe(&self, err: &S) -> String { 34 | use std::fmt::Write; 35 | let mut buf = format!("Error: {}\n", err.inner); 36 | 37 | if let Some(e) = self.0.get(err.span.0) { 38 | writeln!(&mut buf, "{e}\n").unwrap(); 39 | } 40 | 41 | let mut err: &(dyn std::error::Error + 'static) = &err.inner; 42 | while let Some(source) = err.source() { 43 | writeln!(&mut buf, "Cause: {source}").unwrap(); 44 | err = source; 45 | } 46 | 47 | buf 48 | } 49 | } 50 | 51 | /// A wrapper of around object, annotating it with a span. 52 | #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] 53 | pub struct S { 54 | /// The object itself. 55 | pub inner: T, 56 | /// The span associated with the object. 57 | pub span: Span, 58 | } 59 | 60 | /// Extension trait for all values for associating it with a span. 61 | pub trait SpanExt: Sized { 62 | /// Associates this value with a span. 63 | fn span(self, span: Span) -> S; 64 | 65 | /// Associates this value with the default (null) span. 66 | fn no_span(self) -> S; 67 | } 68 | 69 | impl SpanExt for T { 70 | fn span(self, span: Span) -> S { 71 | S { span, inner: self } 72 | } 73 | 74 | fn no_span(self) -> S { 75 | self.span(Span::default()) 76 | } 77 | } 78 | 79 | /// Extension trait for `Result` for associating part of it with a span. 80 | pub trait ResultExt { 81 | /// The ok type of the result. 82 | type Ok; 83 | /// The error type of the result. 84 | type Err; 85 | 86 | /// Associates the same span to both the ok and error part of the result. 87 | fn span_ok_err>(self, span: Span) -> Result, S>; 88 | /// Associates the span to the error part of the result. 89 | fn span_err>(self, span: Span) -> Result>; 90 | 91 | /// Associates the default (null) span to the error part of the result. 92 | fn no_span_err>(self) -> Result>; 93 | } 94 | 95 | impl ResultExt for Result { 96 | type Ok = T; 97 | type Err = E; 98 | 99 | fn span_ok_err>(self, span: Span) -> Result, S> { 100 | match self { 101 | Ok(t) => Ok(S { span, inner: t }), 102 | Err(e) => Err(S { span, inner: e.into() }), 103 | } 104 | } 105 | 106 | fn span_err>(self, span: Span) -> Result> { 107 | self.map_err(|e| S { span, inner: e.into() }) 108 | } 109 | 110 | fn no_span_err>(self) -> Result> { 111 | self.span_err(Span::default()) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/writer.rs: -------------------------------------------------------------------------------- 1 | //! Helpers for writing out table rows. 2 | 3 | use crate::{ 4 | error::Error, 5 | eval::{Schema, State, Table}, 6 | span::{ResultExt, S}, 7 | value::Value, 8 | }; 9 | use std::{convert::TryInto, mem}; 10 | 11 | /// A generic writer which could accept rows of values. 12 | pub trait Writer { 13 | /// Writes a single value. 14 | fn write_value(&mut self, value: &Value) -> Result<(), S>; 15 | 16 | /// Writes the content at the beginning of each file. 17 | fn write_file_header(&mut self, schema: &Schema<'_>) -> Result<(), S>; 18 | 19 | /// Writes the content of an INSERT statement before all rows. 20 | fn write_header(&mut self, schema: &Schema<'_>) -> Result<(), S>; 21 | 22 | /// Writes the column name before a value. 23 | fn write_value_header(&mut self, column: &str) -> Result<(), S>; 24 | 25 | /// Writes the separator between the every value. 26 | fn write_value_separator(&mut self) -> Result<(), S>; 27 | 28 | /// Writes the separator between the every row. 29 | fn write_row_separator(&mut self) -> Result<(), S>; 30 | 31 | /// Writes the content of an INSERT statement after all rows. 32 | fn write_trailer(&mut self) -> Result<(), S>; 33 | } 34 | 35 | /// The state of a table within [`Env`]. 36 | #[derive(Debug)] 37 | struct TableState<'a, W: Writer> { 38 | /// The parsed table. 39 | table: &'a Table, 40 | /// The table's schema. 41 | schema: Schema<'a>, 42 | /// Writer associated with the table. 43 | writer: W, 44 | /// Records that, within an [`Env::write_row()`] call, whether this table has not been visited 45 | /// yet (either as a root or derived tables). This member will be reset to `true` at the start 46 | /// of every `Env::write_row()` call. 47 | fresh: bool, 48 | /// Records if any rows have been written out. This determines whether an INSERT statement is 49 | /// needed to be written or not. This member will be reset to `true` after calling 50 | /// [`Env::write_trailer()`]. 51 | empty: bool, 52 | } 53 | 54 | /// An environment for writing rows from multiple tables generated from a single template. 55 | #[derive(Debug)] 56 | pub struct Env<'a, W: Writer> { 57 | state: &'a mut State, 58 | tables: Vec>, 59 | } 60 | 61 | impl<'a, W: Writer> Env<'a, W> { 62 | /// Constructs a new row-writing environment. 63 | pub fn new( 64 | tables: &'a [Table], 65 | state: &'a mut State, 66 | qualified: bool, 67 | mut new_writer: impl FnMut(&Table) -> Result>, 68 | ) -> Result> { 69 | Ok(Self { 70 | tables: tables 71 | .iter() 72 | .map(|table| { 73 | let mut writer = new_writer(table)?; 74 | let schema = table.schema(qualified); 75 | writer.write_file_header(&schema)?; 76 | Ok::<_, S>(TableState { 77 | table, 78 | schema, 79 | writer, 80 | fresh: true, 81 | empty: true, 82 | }) 83 | }) 84 | .collect::>()?, 85 | state, 86 | }) 87 | } 88 | 89 | /// Returns an iterator of tables and writers associated with this environment. 90 | pub fn tables(&mut self) -> impl Iterator + '_ { 91 | self.tables.iter_mut().map(|table| (table.table, &mut table.writer)) 92 | } 93 | 94 | fn write_one_row(&mut self, table_index: usize) -> Result<(), S> { 95 | let table = &mut self.tables[table_index]; 96 | 97 | if mem::take(&mut table.empty) { 98 | table.writer.write_header(&table.schema) 99 | } else { 100 | table.writer.write_row_separator() 101 | }?; 102 | 103 | let values = table.table.row.eval(self.state)?; 104 | 105 | for (col_index, (column, value)) in table.schema.column_names().zip(&values).enumerate() { 106 | if col_index != 0 { 107 | table.writer.write_value_separator()?; 108 | } 109 | table.writer.write_value_header(column)?; 110 | table.writer.write_value(value)?; 111 | } 112 | 113 | for (child, count) in &table.table.derived { 114 | let count = count.eval(self.state)?.try_into().span_err(count.0.span)?; 115 | 116 | for r in 1..=count { 117 | self.state.sub_row_num = r; 118 | self.write_one_row(*child)?; 119 | } 120 | } 121 | 122 | Ok(()) 123 | } 124 | 125 | fn mark_descendant_visited(&mut self, root: usize) { 126 | let mut ids = vec![root]; 127 | while let Some(id) = ids.pop() { 128 | let table = &mut self.tables[id]; 129 | table.fresh = false; 130 | ids.extend(table.table.derived.iter().map(|child| child.0)); 131 | } 132 | } 133 | 134 | /// Writes one row from each root table 135 | pub fn write_row(&mut self) -> Result<(), S> { 136 | for table in &mut self.tables { 137 | table.fresh = true; 138 | } 139 | for i in 0..self.tables.len() { 140 | if self.tables[i].fresh { 141 | self.mark_descendant_visited(i); 142 | self.state.sub_row_num = 1; 143 | self.write_one_row(i)?; 144 | } 145 | } 146 | self.state.increase_row_num(); 147 | Ok(()) 148 | } 149 | 150 | /// Concludes an INSERT statement after writing multiple rows. 151 | /// 152 | /// This method delegates to [`Writer::write_trailer()`] if any rows have been written out 153 | /// previously for a table. Otherwise, if no rows have been written, this method does nothing. 154 | pub fn write_trailer(&mut self) -> Result<(), S> { 155 | for table in &mut self.tables { 156 | if !mem::replace(&mut table.empty, true) { 157 | table.writer.write_trailer()?; 158 | } 159 | } 160 | Ok(()) 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /tests/check.rs: -------------------------------------------------------------------------------- 1 | use dbgen::{ 2 | cli::{Args, run}, 3 | span::Registry, 4 | }; 5 | use diff::{Result as DiffResult, lines}; 6 | use serde_json::from_reader; 7 | use std::{ 8 | env, 9 | error::Error, 10 | ffi::OsStr, 11 | fs::{File, read, read_dir, remove_file}, 12 | path::Path, 13 | str::from_utf8, 14 | }; 15 | use tempfile::tempdir; 16 | 17 | #[test] 18 | fn run_test() { 19 | main().unwrap(); 20 | } 21 | 22 | fn main() -> Result<(), Box> { 23 | let out_dir = tempdir()?; 24 | 25 | let no_print_diff = env::var_os("DIFF").as_deref() == Some(OsStr::new("0")); 26 | 27 | let data_dir = Path::new(file!()).with_file_name("data"); 28 | let mut content_differed = false; 29 | 30 | for child_dir in read_dir(data_dir)? { 31 | let child_dir = child_dir?; 32 | if !child_dir.file_type()?.is_dir() { 33 | continue; 34 | } 35 | 36 | let child_path = child_dir.path(); 37 | eprintln!("Running {}...", child_path.display()); 38 | let mut args: Args = from_reader(File::open(child_path.join("flags.json"))?)?; 39 | args.template = Some(child_path.join("template.sql")); 40 | args.out_dir = out_dir.path().to_owned(); 41 | args.quiet = true; 42 | 43 | let mut registry = Registry::default(); 44 | run(args, &mut registry).map_err(|e| { 45 | eprintln!("{}", registry.describe(&e)); 46 | e 47 | })?; 48 | 49 | for result_entry in read_dir(out_dir.path())? { 50 | let result_entry = result_entry?; 51 | let expected_path = child_path.join(result_entry.file_name()); 52 | let actual_path = result_entry.path(); 53 | eprintln!("Comparing {} vs {} ...", expected_path.display(), actual_path.display()); 54 | let expected_content = read(expected_path)?; 55 | let actual_content = read(&actual_path)?; 56 | if expected_content != actual_content { 57 | content_differed = true; 58 | let expected_string = from_utf8(&expected_content)?; 59 | let actual_string = from_utf8(&actual_content)?; 60 | if no_print_diff { 61 | eprintln!("\x1b[32m{}\x1b[0m", actual_string); 62 | } else { 63 | for diff in lines(&expected_string, &actual_string) { 64 | match diff { 65 | DiffResult::Left(missing) => { 66 | eprintln!("\x1b[31m- {}\x1b[0m", missing); 67 | } 68 | DiffResult::Right(unexpected) => { 69 | eprintln!("\x1b[32m+ {}\x1b[0m", unexpected); 70 | } 71 | DiffResult::Both(same, _) => { 72 | eprintln!(" {}", same); 73 | } 74 | } 75 | } 76 | } 77 | } 78 | remove_file(actual_path)?; 79 | } 80 | } 81 | 82 | assert!(!content_differed); 83 | 84 | Ok(()) 85 | } 86 | -------------------------------------------------------------------------------- /tests/data/array/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "format": "sql-insert-set", 4 | "components": ["data"] 5 | } 6 | -------------------------------------------------------------------------------- /tests/data/array/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result SET 2 | empty = ARRAY[], 3 | one = ARRAY[1], 4 | two = ARRAY[1, 2], 5 | nested = ARRAY[ARRAY[], ARRAY[3], ARRAY[4, 5]], 6 | compare_1st_elem = 1, 7 | compare_2nd_elem = 0, 8 | compare_longer = 0, 9 | compare_equal = 0, 10 | compare_shorter = 1, 11 | elem_1 = 10, 12 | elem_3 = 30, 13 | elem_0 = NULL, 14 | elem_999 = NULL, 15 | nested_elem = 13, 16 | array_var = -1, 17 | gs_pos_step = ARRAY[11, 16, 21], 18 | gs_pos_step_empty = ARRAY[], 19 | gs_neg_step_empty = ARRAY[], 20 | gs_neg_step = ARRAY[21, 16, 11], 21 | gs_pos_float = ARRAY[1.1, 1.6, 2.1], 22 | gs_neg_float = ARRAY[2.25, 1.75, 1.25], 23 | gs_implicit = ARRAY[1, 2, 3, 4], 24 | gs_implicit_single = ARRAY[3], 25 | gs_step_too_large = ARRAY[4], 26 | gs_timestamp = ARRAY['2019-01-01 13:00:00', '2019-01-01 13:20:00', '2019-01-01 13:40:00', '2019-01-01 14:00:00'], 27 | gs_elem_3 = 21, 28 | gs_elem_999 = NULL, 29 | shuffle_empty = ARRAY[], 30 | shuffle_one = ARRAY['one'], 31 | shuffle_elem_1 = 1, 32 | shuffle_elem_999 = NULL, 33 | gs_compare_concrete = 1; 34 | 35 | -------------------------------------------------------------------------------- /tests/data/array/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | empty {{ array[] }} 3 | one {{ @a := array[1] }} 4 | two {{ array[1, 2] }} 5 | nested {{ array[array[], array[3], array[4, 5]] }} 6 | 7 | compare_1st_elem {{ array[3, 6] < array[4, 1] }} 8 | compare_2nd_elem {{ array[3, 6] < array[3, 1] }} 9 | compare_longer {{ array[3, 6] < array[3] }} 10 | compare_equal {{ array[3, 6] < array[3, 6] }} 11 | compare_shorter {{ array[3, 6] < array[3, 6, 9] }} 12 | 13 | elem_1 {{ array[10, 20, 30][1] }} 14 | elem_3 {{ array[10, 20, 30][3] }} 15 | elem_0 {{ array[10, 20, 30][0] }} 16 | elem_999 {{ array[10, 20, 30][999] }} 17 | nested_elem {{ array[array[13]][1][1] }} 18 | 19 | array_var {{ -@a[1] }} 20 | 21 | gs_pos_step {{ generate_series(11, 21, 5) }} 22 | gs_pos_step_empty {{ generate_series(21, 11, 5) }} 23 | gs_neg_step_empty {{ generate_series(11, 21, -5) }} 24 | gs_neg_step {{ generate_series(21, 11, -5) }} 25 | gs_pos_float {{ generate_series(1.1, 2.25, 0.5) }} 26 | gs_neg_float {{ generate_series(2.25, 1.1, -0.5) }} 27 | gs_implicit {{ generate_series(1, 4) }} 28 | gs_implicit_single {{ generate_series(3, 3) }} 29 | gs_step_too_large {{ generate_series(4, 5, 7) }} 30 | gs_timestamp {{ generate_series(TIMESTAMP '2019-01-01 13:00:00', TIMESTAMP '2019-01-01 14:00:00', INTERVAL 20 MINUTE) }} 31 | gs_elem_3 {{ generate_series(11, 21, 5)[3] }} 32 | gs_elem_999 {{ generate_series(11, 21, 5)[999] }} 33 | 34 | shuffle_empty {{ rand.shuffle(array[]) }} 35 | shuffle_one {{ rand.shuffle(array['one']) }} 36 | shuffle_elem_1 {{ rand.shuffle(@a)[1] }} 37 | shuffle_elem_999 {{ rand.shuffle(@a)[999] }} 38 | 39 | gs_compare_concrete {{ generate_series(11, 21, 5) = array[11, 16, 21] }} 40 | ); 41 | -------------------------------------------------------------------------------- /tests/data/check_eval_result/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "format": "sql-insert-set", 4 | "components": ["data"], 5 | "format_true": "TRUE", 6 | "format_false": "FALSE" 7 | } 8 | -------------------------------------------------------------------------------- /tests/data/check_eval_result/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result SET 2 | number = 1, 3 | add = 7, 4 | sub = -5, 5 | mul = 42, 6 | div = 0.5, 7 | neg_4 = 3, 8 | neg_1 = -3, 9 | and_11 = TRUE, 10 | and_10 = FALSE, 11 | and_1n = NULL, 12 | and_01 = FALSE, 13 | and_00 = FALSE, 14 | and_0n = FALSE, 15 | and_n1 = NULL, 16 | and_n0 = FALSE, 17 | and_nn = NULL, 18 | or_11 = TRUE, 19 | or_10 = TRUE, 20 | or_1n = TRUE, 21 | or_01 = TRUE, 22 | or_00 = FALSE, 23 | or_0n = NULL, 24 | or_n1 = TRUE, 25 | or_n0 = NULL, 26 | or_nn = NULL, 27 | is_11 = TRUE, 28 | is_10 = FALSE, 29 | is_1n = FALSE, 30 | is_01 = FALSE, 31 | is_00 = TRUE, 32 | is_0n = FALSE, 33 | is_n1 = FALSE, 34 | is_n0 = FALSE, 35 | is_nn = TRUE, 36 | is_not_11 = FALSE, 37 | is_not_10 = TRUE, 38 | is_not_1n = TRUE, 39 | is_not_01 = TRUE, 40 | is_not_00 = FALSE, 41 | is_not_0n = TRUE, 42 | is_not_n1 = TRUE, 43 | is_not_n0 = TRUE, 44 | is_not_nn = FALSE, 45 | not_true = FALSE, 46 | not_false = TRUE, 47 | not_null = NULL, 48 | u64_max = 18446744073709551615, 49 | neg_i64_min = -9223372036854775809, 50 | float_normal = 1.5, 51 | float_e300 = 1.5e300, 52 | float_no_dot = 1e300, 53 | float_no_zero = 5e299, 54 | float_dot_e = 5e-250, 55 | float_e_plus = 60000000000.0, 56 | string = 'hello world', 57 | string_concat = 'hello, world!!111', 58 | string_emoji = '👋🌍', 59 | greatest = 9, 60 | least = 0, 61 | case_6 = 'six', 62 | case_5 = NULL, 63 | case_4 = 'otherwise', 64 | case_3 = 'three', 65 | case_cond = 'minus three', 66 | parethensis = 7, 67 | chain_add_sub = -24, 68 | div_by_0 = NULL, 69 | gt = TRUE, 70 | lt = FALSE, 71 | ge = TRUE, 72 | le = FALSE, 73 | eq = FALSE, 74 | ne = TRUE, 75 | var_def = 18, 76 | var_use = 18, 77 | chain_def = 'e', 78 | chain_use = '18eee', 79 | ts_normal = '2010-01-01 00:00:00', 80 | ts_frac = '2010-01-01 00:00:00.000001', 81 | ts_add = '2010-01-01 00:00:00.000001', 82 | ts_compare = TRUE, 83 | ts_sub = '2009-12-04 00:00:00', 84 | ts_mul_iv = '2009-11-20 00:00:00', 85 | ts_add_iv = '2010-01-01 16:10:47', 86 | ts_sub_ts = '168 22:01:35', 87 | backslash = '\', 88 | round_0 = 123.0, 89 | round_1 = 123.5, 90 | round_2 = -123.98, 91 | round_9 = 123.456, 92 | round_neg_1 = 120.0, 93 | round_neg_9 = 0.0, 94 | interval_0 = '00:00:00', 95 | interval_pos = '00:20:34.567890', 96 | interval_neg = '-00:20:34.567890', 97 | interval_big = '14288 23:31:30', 98 | interval_big_neg = '-14288 23:31:30', 99 | interval_add = '00:06:25', 100 | interval_sub = '00:05:35', 101 | interval_mul_r = '02:30:00', 102 | interval_mul_l = '01:51:00', 103 | interval_fdiv_r = '00:00:14.400000', 104 | interval_fdiv_r0 = NULL, 105 | interval_fdiv = 14.4, 106 | interval_fdiv_0 = NULL, 107 | interval_div = 14, 108 | interval_div_0 = NULL, 109 | interval_mod = '00:00:10', 110 | interval_mod_0 = NULL, 111 | interval_neg_l = '-00:06:00', 112 | chain_and = FALSE, 113 | chain_or = TRUE, 114 | chain_add = 18, 115 | chain_mul = 210, 116 | chain_sub = -8, 117 | chain_div = 0.875, 118 | char_length = 45, 119 | character_length = 45, 120 | octet_length = 93, 121 | coalesce_12 = 1, 122 | coalesce_1n = 1, 123 | coalesce_n2 = 2, 124 | coalesce_nn = NULL, 125 | semicolon = 600, 126 | bit_and = 64, 127 | bit_or = 95, 128 | bit_xor = 79, 129 | bit_not = -68, 130 | bool_false = FALSE, 131 | bool_true = TRUE, 132 | bool_concat = '10', 133 | bool_is = TRUE, 134 | bool_eq = TRUE, 135 | bool_arith = 2, 136 | decode_hex_lower = X'ABCDEF', 137 | decode_hex_upper = X'ABCDEF', 138 | decode_hex_empty = '', 139 | decode_hex_unicode = '¿?', 140 | decode_hex_function = X'ABCD', 141 | encode_hex = 'C2BF3F', 142 | encode_base64 = 'ULWytOExmcWkO37y5xVWI6+Si8DCrhO/Fgkj28POZBrmxnFnNkpu6lfZVae3DvZJD1Av20JdMzyW/PekA7vkTA==', 143 | decode_base64 = X'50B5B2B4E13199C5A43B7EF2E7155623AF928BC0C2AE13BF160923DBC3CE641AE6C67167364A6EEA57D955A7B70EF6490F502FDB425D333C96FCF7A403BBE44C', 144 | encode_base64url = 'ULWytOExmcWkO37y5xVWI6-Si8DCrhO_Fgkj28POZBrmxnFnNkpu6lfZVae3DvZJD1Av20JdMzyW_PekA7vkTA', 145 | decode_base64url = X'50B5B2B4E13199C5A43B7EF2E7155623AF928BC0C2AE13BF160923DBC3CE641AE6C67167364A6EEA57D955A7B70EF6490F502FDB425D333C96FCF7A403BBE44C'; 146 | 147 | -------------------------------------------------------------------------------- /tests/data/check_eval_result/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | number {{ 1 }} 3 | add {{ 2 + 5 }} 4 | sub {{ 3 - 8 }} 5 | mul {{ 7 * 6 }} 6 | div {{ 12 / 24 }} 7 | neg_4 {{ - - - - 3 }} 8 | neg_1 {{ - 3 }} 9 | 10 | and_11 {{ TRUE AND TRUE }} 11 | and_10 {{ TRUE AND FALSE }} 12 | and_1n {{ TRUE AND NULL }} 13 | and_01 {{ FALSE AND TRUE }} 14 | and_00 {{ FALSE AND FALSE }} 15 | and_0n {{ FALSE AND NULL }} 16 | and_n1 {{ NULL AND TRUE }} 17 | and_n0 {{ NULL AND FALSE }} 18 | and_nn {{ NULL AND NULL }} 19 | 20 | or_11 {{ TRUE OR TRUE }} 21 | or_10 {{ TRUE OR FALSE }} 22 | or_1n {{ TRUE OR NULL }} 23 | or_01 {{ FALSE OR TRUE }} 24 | or_00 {{ FALSE OR FALSE }} 25 | or_0n {{ FALSE OR NULL }} 26 | or_n1 {{ NULL OR TRUE }} 27 | or_n0 {{ NULL OR FALSE }} 28 | or_nn {{ NULL OR NULL }} 29 | 30 | is_11 {{ TRUE IS TRUE }} 31 | is_10 {{ TRUE IS FALSE }} 32 | is_1n {{ TRUE IS NULL }} 33 | is_01 {{ FALSE IS TRUE }} 34 | is_00 {{ FALSE IS FALSE }} 35 | is_0n {{ FALSE IS NULL }} 36 | is_n1 {{ NULL IS TRUE }} 37 | is_n0 {{ NULL IS FALSE }} 38 | is_nn {{ NULL IS NULL }} 39 | 40 | is_not_11 {{ TRUE IS NOT TRUE }} 41 | is_not_10 {{ TRUE IS NOT FALSE }} 42 | is_not_1n {{ TRUE IS NOT NULL }} 43 | is_not_01 {{ FALSE IS NOT TRUE }} 44 | is_not_00 {{ FALSE IS NOT FALSE }} 45 | is_not_0n {{ FALSE IS NOT NULL }} 46 | is_not_n1 {{ NULL IS NOT TRUE }} 47 | is_not_n0 {{ NULL IS NOT FALSE }} 48 | is_not_nn {{ NULL IS NOT NULL }} 49 | 50 | not_true {{ NOT TRUE }} 51 | not_false {{ NOT FALSE }} 52 | not_null {{ NOT NULL }} 53 | 54 | u64_max {{ 0xffffffffffffffff }} 55 | neg_i64_min {{ -0x8000000000000001 }} 56 | 57 | float_normal {{ 1.5 }} 58 | float_e300 {{ 1.5e300 }} 59 | float_no_dot {{ 1e300 }} 60 | float_no_zero {{ .5e300 }} 61 | float_dot_e {{ 5.e-250 }} 62 | float_e_plus {{ 6e+10 }} 63 | 64 | string {{ 'hello world' }} 65 | string_concat {{ 'hello' || ', ' || 'world!!' || 111 }} 66 | string_emoji {{ '👋' || '🌍' }} 67 | 68 | greatest {{ greatest(1, 3, 2, 9, 6, 0, 5) }} 69 | least {{ least(1, 3, 2, 9, 6, 0, 5) }} 70 | 71 | case_6 {{ case 6 72 | when 1 then 'one' 73 | when 3 then 'three' 74 | when 6 then 'six' 75 | when 10 then 'ten' 76 | end }} 77 | case_5 {{ case 5 78 | when 1 then 'one' 79 | when 3 then 'three' 80 | when 6 then 'six' 81 | when 10 then 'ten' 82 | end }} 83 | case_4 {{ case 4 84 | when 1 then 'one' 85 | when 3 then 'three' 86 | when 6 then 'six' 87 | when 10 then 'ten' 88 | else 'otherwise' 89 | end }} 90 | case_3 {{ case 3 91 | when 1 then 'one' 92 | when 3 then 'three' 93 | when 6 then 'six' 94 | when 10 then 'ten' 95 | else 'otherwise' 96 | end }} 97 | case_cond {{ case 98 | when null then 'null' 99 | when false then 'false' 100 | when -3 then 'minus three' 101 | when true then 'true' 102 | end }} 103 | 104 | parethensis {{ ((((((((((((((((((((((((7)))))))))))))))))))))))) }} 105 | chain_add_sub {{ 1 + 2 - 3 - 4 + 5 + 6 + 7 - 8 - 9 - 10 - 11 }} 106 | div_by_0 {{ 1 / 0 }} 107 | 108 | gt {{ 60 > 3 }} 109 | lt {{ 60 < 3 }} 110 | ge {{ 60 >= 3 }} 111 | le {{ 60 <= 3 }} 112 | eq {{ 60 = 3 }} 113 | ne {{ 60 <> 3 }} 114 | 115 | var_def {{ @a := 18 }} 116 | var_use {{ @a }} 117 | 118 | chain_def {{ @b := @c := @d := 'e' }} 119 | chain_use {{ @a || @b || @c || @d }} 120 | 121 | ts_normal {{ timestamp '2010-01-01 00:00:00' }} 122 | ts_frac {{ timestamp '2010-01-01 00:00:00.000001' }} 123 | ts_add {{ timestamp '2010-01-01 00:00:00' + interval 1 microsecond }} 124 | ts_compare {{ timestamp '2010-01-01 00:00:00' + interval 1 microsecond = timestamp '2010-01-01 00:00:00.000001' }} 125 | ts_sub {{ timestamp '2010-01-01 00:00:00' - interval 4 week }} 126 | ts_mul_iv {{ timestamp '2010-01-01 00:00:00' - interval 3.5 day * 12 }} 127 | ts_add_iv {{ timestamp '2010-01-01 00:00:00' + interval 15 hour + interval 71 minute - interval 13 second }} 128 | ts_sub_ts {{ timestamp '2010-01-23 04:56:07' - timestamp '2009-08-07 06:54:32' }} 129 | 130 | backslash {{ '\' }} 131 | 132 | round_0 {{ round(123.45) }} 133 | round_1 {{ round(123.45, 1) }} 134 | round_2 {{ round(-123.975, 2) }} 135 | round_9 {{ round(123.456, 9) }} 136 | round_neg_1 {{ round(123.456, -1) }} 137 | round_neg_9 {{ round(123.456, -9) }} 138 | 139 | interval_0 {{ interval 0 microsecond }} 140 | interval_pos {{ interval 1234567890 microsecond }} 141 | interval_neg {{ interval -1234567890 microsecond }} 142 | interval_big {{ interval 1234567890 second }} 143 | interval_big_neg{{ interval -1234567890 second }} 144 | 145 | interval_add {{ interval 6 minute + interval 25 second }} 146 | interval_sub {{ interval 6 minute - interval 25 second }} 147 | interval_mul_r {{ interval 6 minute * 25 }} 148 | interval_mul_l {{ 18.5 * interval 6 minute }} 149 | interval_fdiv_r {{ interval 6 minute / 25 }} 150 | interval_fdiv_r0{{ interval 6 minute / 0 }} 151 | interval_fdiv {{ interval 6 minute / interval 25 second }} 152 | interval_fdiv_0 {{ interval 6 minute / interval 0 second }} 153 | interval_div {{ div(interval 6 minute, interval 25 second) }} 154 | interval_div_0 {{ div(interval 6 minute, interval 0 second) }} 155 | interval_mod {{ mod(interval 6 minute, interval 25 second) }} 156 | interval_mod_0 {{ mod(interval 6 minute, interval 0 second) }} 157 | interval_neg_l {{ -interval 6 minute }} 158 | 159 | chain_and {{ true and true and false }} 160 | chain_or {{ false or false or true }} 161 | chain_add {{ 5 + 6 + 7 }} 162 | chain_mul {{ 5 * 6 * 7 }} 163 | chain_sub {{ 5 - 6 - 7 }} 164 | chain_div {{ 7 / 4 / 2 }} 165 | 166 | char_length {{ char_length('Unicodeの文字集合の符号空間は0–10FFFF₁₆で111万4112符号位置がある。') }} 167 | character_length{{ character_length('Unicodeの文字集合の符号空間は0–10FFFF₁₆で111万4112符号位置がある。') }} 168 | octet_length {{ octet_length('Unicodeの文字集合の符号空間は0–10FFFF₁₆で111万4112符号位置がある。') }} 169 | 170 | coalesce_12 {{ coalesce(1, 2) }} 171 | coalesce_1n {{ coalesce(1, null) }} 172 | coalesce_n2 {{ coalesce(null, 2) }} 173 | coalesce_nn {{ coalesce(null, null) }} 174 | 175 | semicolon {{ @e := 567; @f := @e - 7; @f + 40 }} 176 | 177 | bit_and {{ 80 & 91 & 68 }} 178 | bit_or {{ 80 | 91 | 68 }} 179 | bit_xor {{ 80 ^ 91 ^ 68 }} 180 | bit_not {{ ~ ~ - ~ - 69 }} 181 | 182 | bool_false {{ false }} 183 | bool_true {{ true }} 184 | bool_concat {{ true || false }} 185 | bool_is {{ 1 is true }} 186 | bool_eq {{ false = 0 }} 187 | bool_arith {{ true + true }} 188 | 189 | decode_hex_lower {{ x'abcdef' }} 190 | decode_hex_upper {{ X'AB CD EF' }} 191 | decode_hex_empty {{ x'' }} 192 | decode_hex_unicode {{ x'c2bf 3f' }} 193 | decode_hex_function {{ from_hex('ab' || 'cd') }} 194 | encode_hex {{ to_hex('¿?') }} 195 | encode_base64 {{ to_base64(x'50B5B2B4E13199C5A43B7EF2E7155623AF928BC0C2AE13BF160923DBC3CE641AE6C67167364A6EEA57D955A7B70EF6490F502FDB425D333C96FCF7A403BBE44C') }} 196 | decode_base64 {{ from_base64('ULWytOExmcWkO37y5xVWI6+Si8DCrhO/' || x'0d0a' || 'Fgkj28POZBrmxnFnNkpu6lfZVae3DvZJD1Av20JdMzyW/PekA7vkTA=') }} 197 | encode_base64url {{ to_base64url(x'50B5B2B4E13199C5A43B7EF2E7155623AF928BC0C2AE13BF160923DBC3CE641AE6C67167364A6EEA57D955A7B70EF6490F502FDB425D333C96FCF7A403BBE44C') }} 198 | decode_base64url {{ from_base64url('ULWytOExmcWkO37y5xVWI6-Si8DCrhO_Fgkj28POZBrmxnFnNkpu6lfZVae3DvZJD1Av20JdMzyW_PekA7vkTA') }} 199 | ); 200 | -------------------------------------------------------------------------------- /tests/data/comment-expr/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "components": ["data"] 4 | } 5 | -------------------------------------------------------------------------------- /tests/data/comment-expr/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (1, 2, '{{6}}', '/*{{7}}'); 3 | -------------------------------------------------------------------------------- /tests/data/comment-expr/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | {{ 1 }} 3 | /*{{ 2 }}*/ 4 | /*{{ 3 */ 5 | /* 4 }}*/ 6 | /*{ 5 }*/ 7 | /*{{ '{{6}}' }}*/ 8 | /*{{ '/*{{7}}' }}*/ 9 | ); 10 | -------------------------------------------------------------------------------- /tests/data/compress/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "rows_count": 100, 4 | "compression": "xz", 5 | "compress_level": 9 6 | } 7 | -------------------------------------------------------------------------------- /tests/data/compress/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); -------------------------------------------------------------------------------- /tests/data/compress/result.1.sql.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kennytm/dbgen/0996fe5f0a1cac116bd3cfc22ce379f92f26c4a5/tests/data/compress/result.1.sql.xz -------------------------------------------------------------------------------- /tests/data/compress/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ({{ rownum }} {{ rownum * 7 }}); -------------------------------------------------------------------------------- /tests/data/csv/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "format": "csv" 4 | } 5 | -------------------------------------------------------------------------------- /tests/data/csv/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | a INT, 3 | b TEXT, 4 | c TIMESTAMP, 5 | d INTERVAL, 6 | e INT); 7 | -------------------------------------------------------------------------------- /tests/data/csv/result.1.csv: -------------------------------------------------------------------------------- 1 | 1,"""hello""",2001-04-19 18:08:51,00:08:20,\N 2 | -------------------------------------------------------------------------------- /tests/data/csv/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | a INT /*{{ 1 }}*/, 3 | b TEXT /*{{ '"hello"' }}*/, 4 | c TIMESTAMP /*{{ TIMESTAMP '2001-04-19 18:08:51' }}*/, 5 | d INTERVAL /*{{ INTERVAL 500 SECOND }}*/, 6 | e INT /*{{ NULL }}*/ 7 | ); 8 | -------------------------------------------------------------------------------- /tests/data/derived-tables/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "files_count": 2, 3 | "inserts_count": 3, 4 | "schema_name": "test", 5 | "rng": "hc128", 6 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 7 | "components": ["schema", "table", "data"] 8 | } 9 | -------------------------------------------------------------------------------- /tests/data/derived-tables/template.sql: -------------------------------------------------------------------------------- 1 | create table animal( 2 | {{ rownum }} 3 | {{ subrownum }} 4 | {{ @a := rand.range(0, 100) }} 5 | ); 6 | 7 | {{ for each row of animal generate 4 rows of limb }} 8 | create table limb( 9 | {{ rownum }} 10 | {{ @l := subrownum }} 11 | {{ @a }} 12 | {{ @toes := least(rand.range_inclusive(0, 5), rownum-1) }} 13 | ); 14 | 15 | {{ for each row of limb generate @toes rows of toe }} 16 | create table toe( 17 | {{ rownum }} 18 | {{ @l }} 19 | {{ subrownum }} 20 | {{ @a }} 21 | ); 22 | 23 | {{ for each row of ANIMAL generate 1 row of HEAD }} 24 | create table head( 25 | {{ rownum }} 26 | {{ subrownum }} 27 | {{ @a * 100 }} 28 | ); 29 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test-schema-create.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA test; 2 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.animal-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE animal (); 2 | 3 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.animal.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO animal VALUES 2 | (1, 1, 45); 3 | INSERT INTO animal VALUES 4 | (2, 1, 11); 5 | INSERT INTO animal VALUES 6 | (3, 1, 88); 7 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.animal.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO animal VALUES 2 | (4, 1, 7); 3 | INSERT INTO animal VALUES 4 | (5, 1, 99); 5 | INSERT INTO animal VALUES 6 | (6, 1, 97); 7 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.head-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE head (); 2 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.head.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO head VALUES 2 | (1, 1, 4500); 3 | INSERT INTO head VALUES 4 | (2, 1, 1100); 5 | INSERT INTO head VALUES 6 | (3, 1, 8800); 7 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.head.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO head VALUES 2 | (4, 1, 700); 3 | INSERT INTO head VALUES 4 | (5, 1, 9900); 5 | INSERT INTO head VALUES 6 | (6, 1, 9700); 7 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.limb-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE limb (); 2 | 3 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.limb.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO limb VALUES 2 | (1, 1, 45, 0), 3 | (1, 2, 45, 0), 4 | (1, 3, 45, 0), 5 | (1, 4, 45, 0); 6 | INSERT INTO limb VALUES 7 | (2, 1, 11, 1), 8 | (2, 2, 11, 1), 9 | (2, 3, 11, 1), 10 | (2, 4, 11, 1); 11 | INSERT INTO limb VALUES 12 | (3, 1, 88, 2), 13 | (3, 2, 88, 2), 14 | (3, 3, 88, 1), 15 | (3, 4, 88, 1); 16 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.limb.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO limb VALUES 2 | (4, 1, 7, 3), 3 | (4, 2, 7, 3), 4 | (4, 3, 7, 3), 5 | (4, 4, 7, 3); 6 | INSERT INTO limb VALUES 7 | (5, 1, 99, 1), 8 | (5, 2, 99, 1), 9 | (5, 3, 99, 3), 10 | (5, 4, 99, 2); 11 | INSERT INTO limb VALUES 12 | (6, 1, 97, 5), 13 | (6, 2, 97, 2), 14 | (6, 3, 97, 5), 15 | (6, 4, 97, 1); 16 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.toe-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE toe (); 2 | 3 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.toe.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO toe VALUES 2 | (2, 1, 1, 11), 3 | (2, 2, 1, 11), 4 | (2, 3, 1, 11), 5 | (2, 4, 1, 11); 6 | INSERT INTO toe VALUES 7 | (3, 1, 1, 88), 8 | (3, 1, 2, 88), 9 | (3, 2, 1, 88), 10 | (3, 2, 2, 88), 11 | (3, 3, 1, 88), 12 | (3, 4, 1, 88); 13 | -------------------------------------------------------------------------------- /tests/data/derived-tables/test.toe.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO toe VALUES 2 | (4, 1, 1, 7), 3 | (4, 1, 2, 7), 4 | (4, 1, 3, 7), 5 | (4, 2, 1, 7), 6 | (4, 2, 2, 7), 7 | (4, 2, 3, 7), 8 | (4, 3, 1, 7), 9 | (4, 3, 2, 7), 10 | (4, 3, 3, 7), 11 | (4, 4, 1, 7), 12 | (4, 4, 2, 7), 13 | (4, 4, 3, 7); 14 | INSERT INTO toe VALUES 15 | (5, 1, 1, 99), 16 | (5, 2, 1, 99), 17 | (5, 3, 1, 99), 18 | (5, 3, 2, 99), 19 | (5, 3, 3, 99), 20 | (5, 4, 1, 99), 21 | (5, 4, 2, 99); 22 | INSERT INTO toe VALUES 23 | (6, 1, 1, 97), 24 | (6, 1, 2, 97), 25 | (6, 1, 3, 97), 26 | (6, 1, 4, 97), 27 | (6, 1, 5, 97), 28 | (6, 2, 1, 97), 29 | (6, 2, 2, 97), 30 | (6, 3, 1, 97), 31 | (6, 3, 2, 97), 32 | (6, 3, 3, 97), 33 | (6, 3, 4, 97), 34 | (6, 3, 5, 97), 35 | (6, 4, 1, 97); 36 | -------------------------------------------------------------------------------- /tests/data/div-mod/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 3, 3 | "format": "sql-insert-set", 4 | "components": ["data"] 5 | } 6 | -------------------------------------------------------------------------------- /tests/data/div-mod/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result SET 2 | div_int_pos = -2, 3 | mod_int_pos = 1, 4 | div_int_neg = 2, 5 | mod_int_neg = -1, 6 | div_float_pos = -2.0, 7 | mod_float_pos = 1.5, 8 | div_float_neg = 2.0, 9 | mod_float_neg = -1.5, 10 | float_div_int = -2.25, 11 | float_div_float = -2.694444444444444; 12 | 13 | INSERT INTO result SET 14 | div_int_pos = NULL, 15 | mod_int_pos = NULL, 16 | div_int_neg = NULL, 17 | mod_int_neg = NULL, 18 | div_float_pos = NULL, 19 | mod_float_pos = NULL, 20 | div_float_neg = NULL, 21 | mod_float_neg = NULL, 22 | float_div_int = NULL, 23 | float_div_float = NULL; 24 | 25 | INSERT INTO result SET 26 | div_int_pos = 2, 27 | mod_int_pos = 1, 28 | div_int_neg = -2, 29 | mod_int_neg = -1, 30 | div_float_pos = 2.0, 31 | mod_float_pos = 1.5, 32 | div_float_neg = -2.0, 33 | mod_float_neg = -1.5, 34 | float_div_int = 2.25, 35 | float_div_float = 2.694444444444444; 36 | 37 | -------------------------------------------------------------------------------- /tests/data/div-mod/template.sql: -------------------------------------------------------------------------------- 1 | create table result ( 2 | div_int_pos {{ div(9, 4*(rownum - 2)) }} 3 | mod_int_pos {{ mod(9, 4*(rownum - 2)) }} 4 | div_int_neg {{ div(-9, 4*(rownum - 2)) }} 5 | mod_int_neg {{ mod(-9, 4*(rownum - 2)) }} 6 | div_float_pos {{ div(9.7, 4.1*(rownum - 2)) }} 7 | mod_float_pos {{ mod(9.7, 4.1*(rownum - 2)) }} 8 | div_float_neg {{ div(-9.7, 4.1*(rownum - 2)) }} 9 | mod_float_neg {{ mod(-9.7, 4.1*(rownum - 2)) }} 10 | float_div_int {{ 9 / (4*(rownum - 2)) }} 11 | float_div_float {{ 9.7 / (3.6*(rownum - 2)) }} 12 | ); 13 | -------------------------------------------------------------------------------- /tests/data/escape-backslash/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "escape_backslash": true 4 | } 5 | -------------------------------------------------------------------------------- /tests/data/escape-backslash/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/escape-backslash/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | ('\\'); 3 | -------------------------------------------------------------------------------- /tests/data/escape-backslash/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | {{ '\' }} 3 | ); 4 | -------------------------------------------------------------------------------- /tests/data/expr-in-middle/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1 3 | } 4 | -------------------------------------------------------------------------------- /tests/data/expr-in-middle/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | a INT DEFAULT 0, 3 | b INT, 4 | c INT); 5 | -------------------------------------------------------------------------------- /tests/data/expr-in-middle/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (1, 1, 1); 3 | -------------------------------------------------------------------------------- /tests/data/expr-in-middle/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | a INT {{ rownum }} DEFAULT 0, 3 | b INT, 4 | /*{{ rownum }}*/ 5 | c INT 6 | /*{{ rownum }}*/ 7 | ); 8 | -------------------------------------------------------------------------------- /tests/data/file-size/a.1000.csv: -------------------------------------------------------------------------------- 1 | "c1" 2 | "1aaaaaaaaaa" 3 | "2aaaaaaaaaa" 4 | "3aaaaaaaaaa" 5 | "4aaaaaaaaaa" 6 | "5aaaaaaaaaa" 7 | "6aaaaaaaaaa" 8 | "7aaaaaaaaaa" 9 | "8aaaaaaaaaa" 10 | -------------------------------------------------------------------------------- /tests/data/file-size/a.1001.csv: -------------------------------------------------------------------------------- 1 | "c1" 2 | "9aaaaaaaaaa" 3 | "10aaaaaaaaaa" 4 | -------------------------------------------------------------------------------- /tests/data/file-size/a.2000.csv: -------------------------------------------------------------------------------- 1 | "c1" 2 | "11aaaaaaaaaa" 3 | "12aaaaaaaaaa" 4 | "13aaaaaaaaaa" 5 | "14aaaaaaaaaa" 6 | "15aaaaaaaaaa" 7 | "16aaaaaaaaaa" 8 | "17aaaaaaaaaa" 9 | "18aaaaaaaaaa" 10 | -------------------------------------------------------------------------------- /tests/data/file-size/a.2001.csv: -------------------------------------------------------------------------------- 1 | "c1" 2 | "19aaaaaaaaaa" 3 | "20aaaaaaaaaa" 4 | -------------------------------------------------------------------------------- /tests/data/file-size/b.1000.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "1bbbbbbbbbb","1ccc" 3 | "1bbbbbbbbbb","2ccc" 4 | "2bbbbbbbbbb","1ccc" 5 | "2bbbbbbbbbb","2ccc" 6 | "3bbbbbbbbbb","1ccc" 7 | "3bbbbbbbbbb","2ccc" 8 | "4bbbbbbbbbb","1ccc" 9 | "4bbbbbbbbbb","2ccc" 10 | -------------------------------------------------------------------------------- /tests/data/file-size/b.1001.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "5bbbbbbbbbb","1ccc" 3 | "5bbbbbbbbbb","2ccc" 4 | "6bbbbbbbbbb","1ccc" 5 | "6bbbbbbbbbb","2ccc" 6 | "7bbbbbbbbbb","1ccc" 7 | "7bbbbbbbbbb","2ccc" 8 | "8bbbbbbbbbb","1ccc" 9 | "8bbbbbbbbbb","2ccc" 10 | -------------------------------------------------------------------------------- /tests/data/file-size/b.1002.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "9bbbbbbbbbb","1ccc" 3 | "9bbbbbbbbbb","2ccc" 4 | "10bbbbbbbbbb","1ccc" 5 | "10bbbbbbbbbb","2ccc" 6 | -------------------------------------------------------------------------------- /tests/data/file-size/b.2000.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "11bbbbbbbbbb","1ccc" 3 | "11bbbbbbbbbb","2ccc" 4 | "12bbbbbbbbbb","1ccc" 5 | "12bbbbbbbbbb","2ccc" 6 | -------------------------------------------------------------------------------- /tests/data/file-size/b.2001.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "13bbbbbbbbbb","1ccc" 3 | "13bbbbbbbbbb","2ccc" 4 | "14bbbbbbbbbb","1ccc" 5 | "14bbbbbbbbbb","2ccc" 6 | -------------------------------------------------------------------------------- /tests/data/file-size/b.2002.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "15bbbbbbbbbb","1ccc" 3 | "15bbbbbbbbbb","2ccc" 4 | "16bbbbbbbbbb","1ccc" 5 | "16bbbbbbbbbb","2ccc" 6 | -------------------------------------------------------------------------------- /tests/data/file-size/b.2003.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "17bbbbbbbbbb","1ccc" 3 | "17bbbbbbbbbb","2ccc" 4 | "18bbbbbbbbbb","1ccc" 5 | "18bbbbbbbbbb","2ccc" 6 | -------------------------------------------------------------------------------- /tests/data/file-size/b.2004.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | "19bbbbbbbbbb","1ccc" 3 | "19bbbbbbbbbb","2ccc" 4 | "20bbbbbbbbbb","1ccc" 5 | "20bbbbbbbbbb","2ccc" 6 | -------------------------------------------------------------------------------- /tests/data/file-size/b.2005.csv: -------------------------------------------------------------------------------- 1 | "c1","c2" 2 | -------------------------------------------------------------------------------- /tests/data/file-size/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "total_count": 20, 3 | "rows_per_file": 10, 4 | "rows_count": 2, 5 | "format": "csv", 6 | "headers": true, 7 | "components": ["data"], 8 | "size": 98 9 | } -------------------------------------------------------------------------------- /tests/data/file-size/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE a ( 2 | c1 text {{ rownum || 'aaaaaaaaaa' }} 3 | ); 4 | 5 | /*{{ for each row of a generate 2 row of b }}*/ 6 | CREATE TABLE b ( 7 | c1 text {{ rownum || 'bbbbbbbbbb' }}, 8 | c2 text {{ subrownum || 'ccc' }} 9 | ); 10 | -------------------------------------------------------------------------------- /tests/data/global-exprs/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "rows_count": 5, 3 | "initialize": [ 4 | "@d := 100" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /tests/data/global-exprs/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/global-exprs/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (19, 119), 3 | (20, 120), 4 | (21, 121), 5 | (22, 122), 6 | (23, 123); 7 | -------------------------------------------------------------------------------- /tests/data/global-exprs/template.sql: -------------------------------------------------------------------------------- 1 | {{ @a := 1 + 2 + 3 }} 2 | {{ @b := @a * 2 }} 3 | CREATE TABLE result ( 4 | {{ @c := rownum + @a + @b }} 5 | {{ @c + @d }} 6 | ); 7 | -------------------------------------------------------------------------------- /tests/data/headers-csv/a-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE a ( 2 | col1 int, 3 | col2 int, 4 | `col3` int, 5 | `col``4` int, 6 | "col5" int, 7 | "col""6" int, 8 | [col7] int, 9 | [col "8"] int); 10 | 11 | -------------------------------------------------------------------------------- /tests/data/headers-csv/a.1.csv: -------------------------------------------------------------------------------- 1 | "col1","col2","col3","col`4","col5","col""6","col7","col ""8""","","" 2 | 1,1,1,1,1,1,1,1,1,1 3 | 2,2,2,2,2,2,2,2,2,2 4 | 3,3,3,3,3,3,3,3,3,3 5 | 4,4,4,4,4,4,4,4,4,4 6 | -------------------------------------------------------------------------------- /tests/data/headers-csv/a.2.csv: -------------------------------------------------------------------------------- 1 | "col1","col2","col3","col`4","col5","col""6","col7","col ""8""","","" 2 | 5,5,5,5,5,5,5,5,5,5 3 | 6,6,6,6,6,6,6,6,6,6 4 | 7,7,7,7,7,7,7,7,7,7 5 | 8,8,8,8,8,8,8,8,8,8 6 | -------------------------------------------------------------------------------- /tests/data/headers-csv/b-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE b ( 2 | id serial primary key, 3 | foo numeric(40, 20) unique); 4 | -------------------------------------------------------------------------------- /tests/data/headers-csv/b.1.csv: -------------------------------------------------------------------------------- 1 | "foo" 2 | 1 3 | 2 4 | 3 5 | 4 6 | -------------------------------------------------------------------------------- /tests/data/headers-csv/b.2.csv: -------------------------------------------------------------------------------- 1 | "foo" 2 | 5 3 | 6 4 | 7 5 | 8 6 | -------------------------------------------------------------------------------- /tests/data/headers-csv/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "files_count": 2, 3 | "inserts_count": 2, 4 | "rows_count": 2, 5 | "format": "csv", 6 | "headers": true 7 | } 8 | -------------------------------------------------------------------------------- /tests/data/headers-csv/template.sql: -------------------------------------------------------------------------------- 1 | create table a( 2 | col1 int {{rownum}}, 3 | col2 int, {{rownum}} 4 | `col3` int {{rownum}}, 5 | `col``4` int {{rownum}}, 6 | "col5" int {{rownum}}, 7 | "col""6" int {{rownum}}, 8 | [col7] int {{rownum}}, 9 | [col "8"] int {{rownum}} 10 | /* (anonymous column) */ {{rownum}} 11 | /* (anonymous column) */ {{rownum}} 12 | ); 13 | 14 | {{ for each row of a generate 1 row of b }} 15 | create table b( 16 | id serial primary key, 17 | foo numeric(40, 20) unique {{ rownum }} 18 | ); 19 | -------------------------------------------------------------------------------- /tests/data/headers-sql/a-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE a ( 2 | col1 int, 3 | col2 int, 4 | `col3` int, 5 | `col``4` int, 6 | "col5" int, 7 | "col""6" int, 8 | [col7] int, 9 | [col "8"] int); 10 | 11 | -------------------------------------------------------------------------------- /tests/data/headers-sql/a.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO a (col1, col2, `col3`, `col``4`, "col5", "col""6", [col7], [col "8"], , ) VALUES 2 | (1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 3 | (2, 2, 2, 2, 2, 2, 2, 2, 2, 2); 4 | INSERT INTO a (col1, col2, `col3`, `col``4`, "col5", "col""6", [col7], [col "8"], , ) VALUES 5 | (3, 3, 3, 3, 3, 3, 3, 3, 3, 3), 6 | (4, 4, 4, 4, 4, 4, 4, 4, 4, 4); 7 | -------------------------------------------------------------------------------- /tests/data/headers-sql/a.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO a (col1, col2, `col3`, `col``4`, "col5", "col""6", [col7], [col "8"], , ) VALUES 2 | (5, 5, 5, 5, 5, 5, 5, 5, 5, 5), 3 | (6, 6, 6, 6, 6, 6, 6, 6, 6, 6); 4 | INSERT INTO a (col1, col2, `col3`, `col``4`, "col5", "col""6", [col7], [col "8"], , ) VALUES 5 | (7, 7, 7, 7, 7, 7, 7, 7, 7, 7), 6 | (8, 8, 8, 8, 8, 8, 8, 8, 8, 8); 7 | -------------------------------------------------------------------------------- /tests/data/headers-sql/b-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE b ( 2 | id serial primary key, 3 | foo numeric(40, 20) unique); 4 | -------------------------------------------------------------------------------- /tests/data/headers-sql/b.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO b (foo) VALUES 2 | (1), 3 | (2); 4 | INSERT INTO b (foo) VALUES 5 | (3), 6 | (4); 7 | -------------------------------------------------------------------------------- /tests/data/headers-sql/b.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO b (foo) VALUES 2 | (5), 3 | (6); 4 | INSERT INTO b (foo) VALUES 5 | (7), 6 | (8); 7 | -------------------------------------------------------------------------------- /tests/data/headers-sql/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "files_count": 2, 3 | "inserts_count": 2, 4 | "rows_count": 2, 5 | "format": "sql", 6 | "headers": true 7 | } 8 | -------------------------------------------------------------------------------- /tests/data/headers-sql/template.sql: -------------------------------------------------------------------------------- 1 | create table a( 2 | col1 int {{rownum}}, 3 | col2 int, {{rownum}} 4 | `col3` int {{rownum}}, 5 | `col``4` int {{rownum}}, 6 | "col5" int {{rownum}}, 7 | "col""6" int {{rownum}}, 8 | [col7] int {{rownum}}, 9 | [col "8"] int {{rownum}} 10 | /* (anonymous column) */ {{rownum}} 11 | /* (anonymous column) */ {{rownum}} 12 | ); 13 | 14 | {{ for each row of a generate 1 row of b }} 15 | create table b( 16 | id serial primary key, 17 | foo numeric(40, 20) unique {{ rownum }} 18 | ); 19 | -------------------------------------------------------------------------------- /tests/data/jagged-output/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "total_count": 92, 3 | "rows_per_file": 35, 4 | "rows_count": 7 5 | } 6 | -------------------------------------------------------------------------------- /tests/data/jagged-output/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/jagged-output/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (1), 3 | (2), 4 | (3), 5 | (4), 6 | (5), 7 | (6), 8 | (7); 9 | INSERT INTO result VALUES 10 | (8), 11 | (9), 12 | (10), 13 | (11), 14 | (12), 15 | (13), 16 | (14); 17 | INSERT INTO result VALUES 18 | (15), 19 | (16), 20 | (17), 21 | (18), 22 | (19), 23 | (20), 24 | (21); 25 | INSERT INTO result VALUES 26 | (22), 27 | (23), 28 | (24), 29 | (25), 30 | (26), 31 | (27), 32 | (28); 33 | INSERT INTO result VALUES 34 | (29), 35 | (30), 36 | (31), 37 | (32), 38 | (33), 39 | (34), 40 | (35); 41 | -------------------------------------------------------------------------------- /tests/data/jagged-output/result.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (36), 3 | (37), 4 | (38), 5 | (39), 6 | (40), 7 | (41), 8 | (42); 9 | INSERT INTO result VALUES 10 | (43), 11 | (44), 12 | (45), 13 | (46), 14 | (47), 15 | (48), 16 | (49); 17 | INSERT INTO result VALUES 18 | (50), 19 | (51), 20 | (52), 21 | (53), 22 | (54), 23 | (55), 24 | (56); 25 | INSERT INTO result VALUES 26 | (57), 27 | (58), 28 | (59), 29 | (60), 30 | (61), 31 | (62), 32 | (63); 33 | INSERT INTO result VALUES 34 | (64), 35 | (65), 36 | (66), 37 | (67), 38 | (68), 39 | (69), 40 | (70); 41 | -------------------------------------------------------------------------------- /tests/data/jagged-output/result.3.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (71), 3 | (72), 4 | (73), 5 | (74), 6 | (75), 7 | (76), 8 | (77); 9 | INSERT INTO result VALUES 10 | (78), 11 | (79), 12 | (80), 13 | (81), 14 | (82), 15 | (83), 16 | (84); 17 | INSERT INTO result VALUES 18 | (85), 19 | (86), 20 | (87), 21 | (88), 22 | (89), 23 | (90), 24 | (91); 25 | INSERT INTO result VALUES 26 | (92); 27 | -------------------------------------------------------------------------------- /tests/data/jagged-output/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ({{ rownum }}); 2 | -------------------------------------------------------------------------------- /tests/data/lazy-array/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 500, 3 | "format": "csv", 4 | "components": ["data"], 5 | "rng": "hc128", 6 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 7 | } -------------------------------------------------------------------------------- /tests/data/lazy-array/template.sql: -------------------------------------------------------------------------------- 1 | {{ @short_array := generate_series(1001, 1500) }} 2 | {{ @shuffled_short_array := rand.shuffle(@short_array) }} 3 | {{ @long_array := generate_series(1, 10000000000000000000) }} 4 | {{ @shuffled_long_array := rand.shuffle(@long_array) }} 5 | 6 | CREATE TABLE result ( 7 | {{ rownum }} 8 | {{ @short_array[rownum] }} 9 | {{ @shuffled_short_array[rownum] }} 10 | {{ @long_array[20000000000000000 * rownum] }} 11 | {{ @shuffled_long_array[20000000000000000 * rownum] }} 12 | ) -------------------------------------------------------------------------------- /tests/data/rand-finite-float/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "rows_count": 100, 4 | "rng": "hc128", 5 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 6 | } 7 | -------------------------------------------------------------------------------- /tests/data/rand-finite-float/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/rand-finite-float/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (1, 90672.484375, 1.1339103227112086e22), 3 | (2, -3.9885693578263373e-16, 7.833575980360231e-174), 4 | (3, 7.240773990175674e22, 1.4197898344540396e280), 5 | (4, 59127382016.0, 6.616223591746987e-236), 6 | (5, -4.2604917653137e17, 4.362379751853048e133), 7 | (6, -271989.78125, 3.251462614608522e-186), 8 | (7, -2234509426688.0, 2.724018576451632e-22), 9 | (8, 4.575370547937472e29, 3.748573372495495e154), 10 | (9, -0.0014323251089081168, 5.4988323481225707e45), 11 | (10, -3.28562618062905e-23, 2.206077893973367e-168), 12 | (11, -13047437459456.0, 4.166804169107403e-100), 13 | (12, 2.5795822958546562e36, 1.632393225116654e-87), 14 | (13, 5.483646869919312e-9, 21249691.171849478), 15 | (14, -3.853022526155832e-35, 5.947947743978342e-182), 16 | (15, 3.1382959907484334e31, -7.795565200938266e130), 17 | (16, -1.0497456421507568e-27, -1.0592991141145154e17), 18 | (17, 1.5472405742531271e38, -3.1290732865309286e174), 19 | (18, 184289.1875, -3.896294566591252e-73), 20 | (19, -9.706033661098786e36, -5.763447382303127e147), 21 | (20, -1.3068177182745666e-12, -1.5322762544893857e184), 22 | (21, 20397.857421875, 8.851270437374964e-244), 23 | (22, -8.264244104892828e-24, -4.628369298342482e282), 24 | (23, -0.0017546163871884346, 714606769936.4602), 25 | (24, -4.840370326775201e-9, -5.433999502097366e211), 26 | (25, -9.99207236423123e17, -2.310721571800414e-258), 27 | (26, 3.712962536102852e29, 4.136223942330634e-183), 28 | (27, -1.2318039616064832e24, 1.324698983551158e-265), 29 | (28, -0.0039032120257616043, 1.148385179919965e164), 30 | (29, -11832683134976.0, 1.0086204532409517e-46), 31 | (30, 1.839200552239864e23, -1.8628866409044736e17), 32 | (31, -3.7525185490257055e-19, 1.4242891264403062e-136), 33 | (32, -2.478781159400999e-31, -6.393287095280547e-9), 34 | (33, -118.38725280761719, 8039257107.044941), 35 | (34, 2.9621754525204186e-37, 8.568219555719027e-28), 36 | (35, -6.4958161666976e25, 4.696497143717539e-150), 37 | (36, -3.534039077491801e-27, 2.828365085092237e-63), 38 | (37, 0.46201908588409424, 2.4086920863229868e221), 39 | (38, -1730867200.0, 1.7040597490515657e169), 40 | (39, -2.3207548497923827e20, 3.195767411252744e189), 41 | (40, 8.96008259527905e-22, -7.261114640705759e-237), 42 | (41, 9.793616827353538e-19, 2.611466464958359e-238), 43 | (42, 7.116422136891493e-34, -1.248617608609914e-101), 44 | (43, -32209061888.0, -9.517551565433224e-94), 45 | (44, 2.890690087110959e-21, 2.5302639567197774e179), 46 | (45, -7.360597908956379e-9, 1.2453982176648428e186), 47 | (46, 4.07431450307689e-16, 2.0173561844416902e155), 48 | (47, 1.3118930086067658e25, -9.347434650324013e-157), 49 | (48, -3.099216575121671e23, 0.17191740799896224), 50 | (49, 513.27734375, 6.169315103399442e138), 51 | (50, 1.6981207957288199e31, 1.553235595276201e-266), 52 | (51, -1.1796304631116595e17, 2.059609972927675e-184), 53 | (52, -986501056.0, -3.1945070427158674e-282), 54 | (53, 58292476.0, -1.06972082130298e141), 55 | (54, -1.4203994960976022e-18, 8.749596682028774e288), 56 | (55, -7.828941306797788e-6, 1.5344428407500372e-54), 57 | (56, -1.0219106066133419e-31, 3.874525697186665e-104), 58 | (57, -0.00022248682216741145, -2.0199254934565108e-291), 59 | (58, 3.966689173309779e-15, -3.9935644990683073e-134), 60 | (59, 7.429580667168139e25, 2.896614989721052e185), 61 | (60, -2.0374230838154363e18, 1.403122282667301e243), 62 | (61, 1.2594012847190817e36, -6.4432442789482024e-27), 63 | (62, -3.597980348451771e21, 1.27588639295836e-29), 64 | (63, 9.675402378062053e-20, 5.729772407943651e-204), 65 | (64, -2.6915499302243056e32, 1.3923459825706752e-286), 66 | (65, 1.1558364427076795e-36, -1.6745067879870783e130), 67 | (66, 1.9405875963038245e-25, 3.512491676602063e136), 68 | (67, -2678.02294921875, 1.845593182341988e168), 69 | (68, 83333.359375, -2.1158726438057665e-113), 70 | (69, 1.1352855317412242e-27, -1.1752556245355497e180), 71 | (70, -1.3565529244930025e-31, 7.10625218667836e169), 72 | (71, -6.568974099110029e-35, 9.128038292326728e78), 73 | (72, 749965632.0, 26.100409017090264), 74 | (73, -3.192422940408775e-34, -1.8827271106731403e-283), 75 | (74, -1.6730553001546006e-18, 8.646951925107533e-237), 76 | (75, -131585040384.0, -2.900246076083441e201), 77 | (76, 8.313116688543911e-20, -3.583701803049338e110), 78 | (77, 3.0312861599103097e27, -1.033105464477069e-228), 79 | (78, -2.8692661580009826e-9, -3.1608025039353693e-114), 80 | (79, 1.6661872450640658e-6, -5.246588704751609e-101), 81 | (80, 3.429970765775033e-24, -3.853122356887104e-66), 82 | (81, -8.590454569912254e-9, -1603157.5510638105), 83 | (82, 1.2052242153515585e-22, 1.762190988544563e259), 84 | (83, -2.4019966571309226e-10, 3.442827619789515e305), 85 | (84, -7.90335033212135e26, 3.688915544853158e-9), 86 | (85, 1.1506715519726964e20, 1.856185825363751e183), 87 | (86, -1.582902679030045e19, 4.567456213728061e282), 88 | (87, 1.706448342340631e25, 2.994565385602456e93), 89 | (88, 16.967838287353516, 3.441059180697452e299), 90 | (89, -1.7456716591147254e18, 1.6129647092582622e-91), 91 | (90, -1.2552967573911634e18, 1.4062936734520358e200), 92 | (91, -7.6454928587424115e-34, -7.442819471086665e-292), 93 | (92, -5.590406067755571e-13, 5.062064109209187e-302), 94 | (93, 3.327976025148621e17, 1.0298670777918947e99), 95 | (94, -41.087646484375, -2.4032856755403114e-280), 96 | (95, -3.036375000994206e21, -1.1995223344396541e-207), 97 | (96, -2.194237901724742e24, -1.3484774165468417e299), 98 | (97, 0.9569990634918213, 2.521787143855416e-197), 99 | (98, 1.374498209931269e-18, -3.0250956456896433e279), 100 | (99, -1.031601547534211e31, -2.0761968006586557e-255), 101 | (100, 1.9719611301790482e19, 5.050829248467284e266); 102 | -------------------------------------------------------------------------------- /tests/data/rand-finite-float/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | {{ rownum }} 3 | {{ rand.finite_f32() }} 4 | {{ rand.finite_f64() }} 5 | ); 6 | -------------------------------------------------------------------------------- /tests/data/rand-weighted/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "rows_count": 100, 4 | "rng": "hc128", 5 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 6 | } 7 | -------------------------------------------------------------------------------- /tests/data/rand-weighted/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/rand-weighted/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (1, 2), 3 | (2, 1), 4 | (3, 3), 5 | (4, 2), 6 | (5, 3), 7 | (6, 2), 8 | (7, 2), 9 | (8, 3), 10 | (9, 2), 11 | (10, 1), 12 | (11, 3), 13 | (12, 3), 14 | (13, 2), 15 | (14, 1), 16 | (15, 3), 17 | (16, 1), 18 | (17, 3), 19 | (18, 2), 20 | (19, 3), 21 | (20, 2), 22 | (21, 2), 23 | (22, 3), 24 | (23, 2), 25 | (24, 2), 26 | (25, 3), 27 | (26, 3), 28 | (27, 3), 29 | (28, 2), 30 | (29, 3), 31 | (30, 3), 32 | (31, 1), 33 | (32, 1), 34 | (33, 2), 35 | (34, 1), 36 | (35, 3), 37 | (36, 1), 38 | (37, 2), 39 | (38, 2), 40 | (39, 3), 41 | (40, 1), 42 | (41, 1), 43 | (42, 1), 44 | (43, 2), 45 | (44, 3), 46 | (45, 2), 47 | (46, 3), 48 | (47, 3), 49 | (48, 3), 50 | (49, 2), 51 | (50, 3), 52 | (51, 3), 53 | (52, 2), 54 | (53, 2), 55 | (54, 3), 56 | (55, 2), 57 | (56, 1), 58 | (57, 2), 59 | (58, 1), 60 | (59, 3), 61 | (60, 3), 62 | (61, 3), 63 | (62, 3), 64 | (63, 1), 65 | (64, 3), 66 | (65, 3), 67 | (66, 3), 68 | (67, 2), 69 | (68, 2), 70 | (69, 3), 71 | (70, 3), 72 | (71, 3), 73 | (72, 2), 74 | (73, 1), 75 | (74, 1), 76 | (75, 2), 77 | (76, 3), 78 | (77, 3), 79 | (78, 2), 80 | (79, 2), 81 | (80, 1), 82 | (81, 2), 83 | (82, 3), 84 | (83, 3), 85 | (84, 3), 86 | (85, 3), 87 | (86, 3), 88 | (87, 3), 89 | (88, 3), 90 | (89, 3), 91 | (90, 3), 92 | (91, 1), 93 | (92, 2), 94 | (93, 3), 95 | (94, 2), 96 | (95, 3), 97 | (96, 3), 98 | (97, 2), 99 | (98, 3), 100 | (99, 3), 101 | (100, 3); 102 | -------------------------------------------------------------------------------- /tests/data/rand-weighted/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | {{ rownum }} 3 | {{ rand.weighted(array[2, 3, 5]) }} 4 | ); 5 | -------------------------------------------------------------------------------- /tests/data/seeded-hc128/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "rows_count": 100, 4 | "rng": "hc128", 5 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 6 | } 7 | -------------------------------------------------------------------------------- /tests/data/seeded-hc128/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/seeded-hc128/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | (1, 4, 2), 3 | (2, 8, 4), 4 | (3, 4, 4), 5 | (4, 4, 8), 6 | (5, 3, 1), 7 | (6, 9, 10), 8 | (7, 2, 1), 9 | (8, 5, 1), 10 | (9, 9, 4), 11 | (10, 4, 3), 12 | (11, 9, 10), 13 | (12, 8, 1), 14 | (13, 4, 6), 15 | (14, 8, 3), 16 | (15, 0, 3), 17 | (16, 0, 3), 18 | (17, 1, 2), 19 | (18, 6, 6), 20 | (19, 2, 1), 21 | (20, 0, 2), 22 | (21, 2, 6), 23 | (22, 2, 6), 24 | (23, 8, 8), 25 | (24, 6, 4), 26 | (25, 1, 8), 27 | (26, 3, 9), 28 | (27, 2, 8), 29 | (28, 8, 7), 30 | (29, 4, 1), 31 | (30, 0, 3), 32 | (31, 7, 7), 33 | (32, 3, 9), 34 | (33, 7, 3), 35 | (34, 5, 1), 36 | (35, 1, 5), 37 | (36, 6, 3), 38 | (37, 3, 2), 39 | (38, 3, 5), 40 | (39, 9, 7), 41 | (40, 4, 2), 42 | (41, 9, 2), 43 | (42, 9, 1), 44 | (43, 8, 2), 45 | (44, 0, 1), 46 | (45, 0, 10), 47 | (46, 2, 9), 48 | (47, 7, 9), 49 | (48, 8, 1), 50 | (49, 8, 1), 51 | (50, 7, 7), 52 | (51, 6, 4), 53 | (52, 0, 2), 54 | (53, 6, 1), 55 | (54, 4, 8), 56 | (55, 1, 2), 57 | (56, 5, 5), 58 | (57, 3, 6), 59 | (58, 8, 7), 60 | (59, 0, 3), 61 | (60, 1, 7), 62 | (61, 0, 2), 63 | (62, 6, 3), 64 | (63, 3, 1), 65 | (64, 4, 10), 66 | (65, 8, 3), 67 | (66, 0, 2), 68 | (67, 7, 7), 69 | (68, 1, 4), 70 | (69, 8, 9), 71 | (70, 6, 4), 72 | (71, 2, 1), 73 | (72, 9, 9), 74 | (73, 5, 7), 75 | (74, 2, 7), 76 | (75, 5, 9), 77 | (76, 8, 3), 78 | (77, 5, 2), 79 | (78, 9, 5), 80 | (79, 3, 1), 81 | (80, 4, 1), 82 | (81, 9, 1), 83 | (82, 5, 2), 84 | (83, 7, 5), 85 | (84, 7, 1), 86 | (85, 3, 4), 87 | (86, 1, 2), 88 | (87, 1, 4), 89 | (88, 5, 1), 90 | (89, 3, 9), 91 | (90, 6, 3), 92 | (91, 4, 9), 93 | (92, 7, 3), 94 | (93, 2, 6), 95 | (94, 9, 1), 96 | (95, 5, 2), 97 | (96, 2, 2), 98 | (97, 2, 2), 99 | (98, 8, 8), 100 | (99, 4, 1), 101 | (100, 3, 2); 102 | -------------------------------------------------------------------------------- /tests/data/seeded-hc128/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | {{ rownum }} 3 | {{ rand.range(0, 10) }} 4 | {{ rand.zipf(10, 0.75) }} 5 | ); 6 | -------------------------------------------------------------------------------- /tests/data/shuffle-with-restarts/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "files_count": 2, 3 | "inserts_count": 2, 4 | "rows_count": 10, 5 | "rng": "hc128", 6 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 7 | } 8 | -------------------------------------------------------------------------------- /tests/data/shuffle-with-restarts/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/shuffle-with-restarts/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | ('Three'), 3 | ('Five'), 4 | ('Four'), 5 | ('One'), 6 | ('Two'), 7 | ('Three'), 8 | ('Two'), 9 | ('Five'), 10 | ('One'), 11 | ('Four'); 12 | INSERT INTO result VALUES 13 | ('Four'), 14 | ('Five'), 15 | ('Three'), 16 | ('One'), 17 | ('Two'), 18 | ('Three'), 19 | ('Two'), 20 | ('Four'), 21 | ('Five'), 22 | ('One'); 23 | -------------------------------------------------------------------------------- /tests/data/shuffle-with-restarts/result.2.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | ('Five'), 3 | ('Two'), 4 | ('One'), 5 | ('Four'), 6 | ('Three'), 7 | ('Three'), 8 | ('Two'), 9 | ('One'), 10 | ('Five'), 11 | ('Four'); 12 | INSERT INTO result VALUES 13 | ('Four'), 14 | ('Five'), 15 | ('Two'), 16 | ('Three'), 17 | ('One'), 18 | ('Three'), 19 | ('One'), 20 | ('Four'), 21 | ('Two'), 22 | ('Five'); 23 | -------------------------------------------------------------------------------- /tests/data/shuffle-with-restarts/template.sql: -------------------------------------------------------------------------------- 1 | {{ @data := array['One', 'Two', 'Three', 'Four', 'Five'] }} 2 | CREATE TABLE result ( 3 | {{ 4 | @i := mod(rownum-1, 5); 5 | case when @i = 0 then @data := rand.shuffle(@data) end; 6 | @data[@i + 1] 7 | }} 8 | ); 9 | -------------------------------------------------------------------------------- /tests/data/substring/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "inserts_count": 1, 3 | "format": "sql-insert-set", 4 | "components": ["data"] 5 | } 6 | -------------------------------------------------------------------------------- /tests/data/substring/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result SET 2 | ss_a = '🥰😘😍', 3 | ss_b = '😘😍', 4 | ss_c = '😘', 5 | ss_d = '🥰😘😍', 6 | ss_e = '', 7 | ss_f = '😘😍', 8 | ss_g = '🥰😘😍', 9 | ss_h = '🥰', 10 | ss_i = '', 11 | ss_j = X'9FA5B0', 12 | ss_k = X'9FA5', 13 | ss_l = X'F0', 14 | ss_m = '', 15 | ss_n = '', 16 | ss_o = '🥰😘', 17 | ss_p = X'F09F', 18 | ov_a = 'A🥰CDEF', 19 | ov_b = 'A🥰F', 20 | ov_c = 'A😘😍', 21 | ov_d = X'419FA5B0', 22 | ov_e = 'XYabc', 23 | ov_f = 'XYabc', 24 | ov_g = 'A_F', 25 | ov_h = 'A_BCDEF'; 26 | 27 | -------------------------------------------------------------------------------- /tests/data/substring/template.sql: -------------------------------------------------------------------------------- 1 | create table result ( 2 | ss_a {{ substring('🥰😘😍' from 1) }} 3 | ss_b {{ substring('🥰😘😍' from 2) }} 4 | ss_c {{ substring('🥰😘😍' from 2 for 1) }} 5 | ss_d {{ substring('🥰😘😍' from -99) }} 6 | ss_e {{ substring('🥰😘😍' from 99) }} 7 | ss_f {{ substring('🥰😘😍' from 2 for 99) }} 8 | ss_g {{ substring('🥰😘😍' from -2 for 99) }} 9 | ss_h {{ substring('🥰😘😍' from -1 for 3) }} 10 | ss_i {{ substring('🥰😘😍' from 2 for -1) }} 11 | ss_j {{ substring('🥰' from 2 using octets) }} 12 | ss_k {{ substring('🥰' from 2 for 2 using octets) }} 13 | ss_l {{ substring('🥰' from -1 for 3 using octets) }} 14 | ss_m {{ substring('🥰' from 99 using octets) }} 15 | ss_n {{ substring('🥰' from 99 for 99 using octets) }} 16 | ss_o {{ substring('🥰😘😍' for 2) }} 17 | ss_p {{ substring('🥰😘😍' for 2 using octets) }} 18 | 19 | ov_a {{ overlay('ABCDEF' placing '🥰' from 2) }} 20 | ov_b {{ overlay('ABCDEF' placing '🥰' from 2 using octets) }} 21 | ov_c {{ overlay('🥰😘😍' placing 'A' from 1) }} 22 | ov_d {{ overlay('🥰' placing 'A' from 1 using octets) }} 23 | ov_e {{ overlay('XYZ' placing 'abc' from 3) }} 24 | ov_f {{ overlay('XYZ' placing 'abc' from 3 using octets) }} 25 | ov_g {{ overlay('ABCDEF' placing '_' from 2 for 4) }} 26 | ov_h {{ overlay('ABCDEF' placing '_' from 2 for 0) }} 27 | ); 28 | -------------------------------------------------------------------------------- /tests/data/uuid/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "rows_count": 5, 3 | "rng": "hc128", 4 | "seed": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" 5 | } 6 | -------------------------------------------------------------------------------- /tests/data/uuid/result-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result (); 2 | -------------------------------------------------------------------------------- /tests/data/uuid/result.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO result VALUES 2 | ('229f1c7a-8e0c-41bd-abe8-00795dc61fea'), 3 | ('990ee46d-c4bf-41ca-9cfc-a90a640c38ac'), 4 | ('4d30f2c9-ce62-4119-890a-ec5f7be4c03f'), 5 | ('671f17c7-02e0-4662-932b-968be1dfbc80'), 6 | ('28c4b8f3-063b-40e7-b209-3b82e6f05fa1'); 7 | -------------------------------------------------------------------------------- /tests/data/uuid/template.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE result ( 2 | {{ rand.uuid() }} 3 | ); 4 | -------------------------------------------------------------------------------- /tests/data/zero-children/flags.json: -------------------------------------------------------------------------------- 1 | { 2 | "rows_count": 1, 3 | "schema_name": "test" 4 | } -------------------------------------------------------------------------------- /tests/data/zero-children/template.sql: -------------------------------------------------------------------------------- 1 | create table a ({{ @a := 1 }}); 2 | 3 | /*{{ for each row of a generate 0 rows of b }}*/ 4 | create table b ({{ @b := 2 }}); 5 | 6 | /*{{ for each row of b generate 0 rows of c }}*/ 7 | create table c ({{ @a * @b }}); 8 | -------------------------------------------------------------------------------- /tests/data/zero-children/test.a-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE a (); 2 | 3 | -------------------------------------------------------------------------------- /tests/data/zero-children/test.a.1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO a VALUES 2 | (1); 3 | -------------------------------------------------------------------------------- /tests/data/zero-children/test.b-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE b (); 2 | 3 | -------------------------------------------------------------------------------- /tests/data/zero-children/test.b.1.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kennytm/dbgen/0996fe5f0a1cac116bd3cfc22ce379f92f26c4a5/tests/data/zero-children/test.b.1.sql -------------------------------------------------------------------------------- /tests/data/zero-children/test.c-schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE c (); 2 | -------------------------------------------------------------------------------- /tests/data/zero-children/test.c.1.sql: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kennytm/dbgen/0996fe5f0a1cac116bd3cfc22ce379f92f26c4a5/tests/data/zero-children/test.c.1.sql --------------------------------------------------------------------------------