├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── assets ├── cover.png └── logo.png ├── examples ├── create.sql ├── script.sql └── types.txt ├── go.mod ├── go.sum ├── internal └── pkg │ ├── parse │ ├── blocks.go │ └── blocks_test.go │ ├── random │ ├── random.go │ ├── random_test.go │ ├── stringf.go │ └── weight.go │ ├── runner │ ├── builder.go │ ├── common_test.go │ ├── data.go │ ├── helpers.go │ ├── option.go │ ├── option_test.go │ ├── runner.go │ ├── runner_test.go │ ├── store.go │ └── store_test.go │ └── test │ └── helpers.go └── main.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, build with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool, specifically when used with LiteIDE 12 | *.out 13 | 14 | # Output of Makefile builds. 15 | datagen_* 16 | 17 | # Output of runtime. 18 | query_err.sql -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.10.x 5 | - 1.11.x 6 | - 1.12.x 7 | - master 8 | 9 | before_install: 10 | - go get -u -d gopkg.in/cheggaaa/pb.v1 11 | - go get -u -d github.com/go-sql-driver/mysql 12 | - go get -u -d github.com/lib/pq 13 | - go get -u -d github.com/pkg/errors 14 | - go get -u -d github.com/google/uuid 15 | - go get -u -d github.com/DATA-DOG/go-sqlmock 16 | 17 | script: 18 | make test -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Coding Concepts 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: test 2 | 3 | cockroachdb: 4 | docker run -d -p 26257:26257 -p 8080:8080 cockroachdb/cockroach:v2.1.6 start --insecure 5 | 6 | example: 7 | go run main.go -script ./examples/script.sql --driver postgres --conn postgres://root@localhost:26257/sandbox?sslmode=disable 8 | 9 | test: 10 | go test ./... -v ;\ 11 | go test ./... -cover 12 | 13 | bench: 14 | go test ./... -bench=. 15 | 16 | cover: 17 | go test ./... -coverprofile=coverage.out -coverpkg=\ 18 | github.com/codingconcepts/datagen/internal/pkg/parse,\ 19 | github.com/codingconcepts/datagen/internal/pkg/random,\ 20 | github.com/codingconcepts/datagen/internal/pkg/runner;\ 21 | go tool cover -html=coverage.out 22 | 23 | release: 24 | # linux 25 | GOOS=linux go build -ldflags "-X main.semver=${VERSION}" -o datagen ;\ 26 | tar -zcvf datagen_${VERSION}_linux.tar.gz ./datagen ;\ 27 | 28 | # macos 29 | GOOS=darwin go build -ldflags "-X main.semver=${VERSION}" -o datagen ;\ 30 | tar -zcvf datagen_${VERSION}_macOS.tar.gz ./datagen ;\ 31 | 32 | # windows 33 | GOOS=windows go build -ldflags "-X main.semver=${VERSION}" -o datagen ;\ 34 | tar -zcvf datagen_${VERSION}_windows.tar.gz ./datagen ;\ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This project is archived, as [dg](https://github.com/codingconcepts/dg) offers better functionality whilst being simpler to use. 2 | 3 | # ![datagen logo](assets/cover.png) 4 | 5 | [![Build Status](https://travis-ci.org/codingconcepts/datagen.svg?branch=master)](https://travis-ci.org/codingconcepts/datagen) [![Go Report Card](https://goreportcard.com/badge/github.com/codingconcepts/datagen)](https://goreportcard.com/report/github.com/codingconcepts/datagen) 6 | 7 | If you need to generate a lot of random data for your database tables but don't want to spend hours configuring a custom tool for the job, then `datagen` could work for you. 8 | 9 | `datagen` takes its instructions from a configuration file. These configuration files can execute any number of SQL queries, taking advantage of multi-row DML for fast inserts and Go's [text/template](https://golang.org/pkg/text/template/) language is used to acheive this. 10 | 11 | This project is still very early days, so some of it is still first-pass. If you don't like something, PRs are very welcome! 12 | 13 | > "[modelgen](https://github.com/LUSHDigital/modelgen) saves sooo much time. coupled with datagen it just gives you a crazy head start" 14 | 15 | > "right now datagen and [modelgen](https://github.com/LUSHDigital/modelgen) are god sends to me" 16 | 17 | ## Credit 18 | 19 | * [go-randomdata](https://github.com/Pallinder/go-randomdata) for the following generators: 20 | 21 | title, namef, namel, name, email, phone, postcode, address, street, city, county, state, currency, locale, country, country2, country3, ip4, ip6, and user-agent. 22 | 23 | ## Installation 24 | 25 | To build from source, either clone the repo, or use `go get` as follows (datagen will automatically be built by Go): 26 | 27 | ```bash 28 | $ go get -u github.com/codingconcepts/datagen 29 | ``` 30 | 31 | ## Building 32 | 33 | To build a version of datagen for Linux, MacOS, and Windows, call the `build` Makefile target with a version argument as follows: 34 | 35 | ``` 36 | $ make build VERSION=1.0.0-beta 37 | ``` 38 | 39 | If you'd prefer to download a binary, the releases page has all built version and can be found [here](https://github.com/codingconcepts/datagen/releases). 40 | 41 | ## Usage 42 | 43 | See the [examples](https://github.com/codingconcepts/datagen/tree/master/examples) directory for a CockroachDB example that works using the `make example` command. When running the executable, use the following syntax: 44 | 45 | ``` 46 | datagen -script script.sql --driver postgres --conn postgres://root@localhost:26257/sandbox?sslmode=disable 47 | ``` 48 | 49 | `datagen` accepts the following arguments: 50 | 51 | | Flag | Description | 52 | | ---------- | ----------- | 53 | | `-conn` | The full database connection string (enclosed in quotes) | 54 | | `-driver` | The name of the database driver to use [postgres, mysql] | 55 | | `-script` | The full path to the script file to use (enclosed in quotes) | 56 | | `-datefmt` | _(optional)_ `time.Time` format string that determines the format of all database and template dates. Defaults to "2006-01-02" | 57 | | `-debug` | _(optional)_ If set, the SQL generated will be written to stout. Note that `ref`, `row`, and `each` won't work. | 58 | 59 | ## Concepts 60 | 61 | | Object | Description | 62 | | ------ | ---------- | 63 | | Block | A block of text within a configuration file that performs a series of operations against a database. | 64 | | Script | A script is a text file that contains a number of blocks. | 65 | 66 | ### Comments 67 | 68 | `datagen` uses Go's [text/template](https://golang.org/pkg/text/template/) engine where possible but where it's not possible to use that, it parses and makes use of comments. The following comments provide instructions to `datagen` during block parsing. 69 | 70 | | Comment | Description | 71 | | ------------- | ----------- | 72 | | `-- REPEAT N` | Repeat the block that directly follows the comment N times. If this comment isn't provided, a block will be executed once. Consider this when using the `ntimes` function to insert a large amount of data. For example `-- REPEAT 100` when used in conjunction with `ntimes 1000` will result in 100,0000 rows being inserted using multi-row DML syntax as per the examples. | 73 | | `-- NAME` | Assigns a given name to the block that directly follows the comment, allowing specific rows from blocks to be referenced and not muddled with others. If this comment isn't provided, no distinction will be made between same-name columns from different tables, so issues will likely arise (e.g. `owner.id` and `pet.id` in the examples). Only omit this for single-block configurations. | 74 | | `-- EOF` | Causing block parsing to stop, essentially simulating the natural end-of-file. If this comment isn't provided, the parse will parse all blocks in the script. | 75 | 76 | #### Helper functions 77 | 78 | ##### ntimes 79 | 80 | Expresses the number of multi-row DML statements that will be generated: 81 | 82 | ``` 83 | {{range $i, $e := ntimes 1 10 }} 84 | {{if $i}},{{end}} 85 | ( 86 | ...something 87 | ) 88 | {{end}} 89 | ``` 90 | 91 | `ntimes` the name of the function.
92 | `1` the minimum value.
93 | `10` _(optional)_ the maximum value. If omitted, the number will be exactly equal to the minimum value.
94 | 95 | The following script generates 5 entries into the `one` table and between 5 and 10 entries into the `two` table as a result of the combination of the `-- REPEAT` and `ntimes` configured: 96 | 97 | ``` sql 98 | -- REPEAT 1 99 | -- NAME one 100 | insert into "one" ( 101 | "id", 102 | "name") values 103 | {{range $i, $e := ntimes 5 }} 104 | {{if $i}},{{end}} 105 | ( 106 | {{int 1 10000}}, 107 | '{{name}}' 108 | ) 109 | {{end}} 110 | returning "id"; 111 | 112 | -- REPEAT 1 113 | -- NAME two 114 | insert into "two" ( 115 | "one_id") values 116 | {{range $i, $e := ntimes 5 10 }} 117 | {{if $i}},{{end}} 118 | ( 119 | '{{each "one" "id" $i}}' 120 | ) 121 | {{end}}; 122 | ``` 123 | 124 | The `ntimes` and `REPEAT` values for table one's insert totalled 5, so you'll see 5 rows in table one: 125 | 126 | | id | 127 | | -- | 128 | | 1977 | 129 | | 2875 | 130 | | 6518 | 131 | | 6877 | 132 | | 9425 | 133 | 134 | The `ntimes` and `REPEAT` values for table two's insert totalled 7 (`ntimes` generated 7 and we `REPEATE` once): 135 | 136 | | one_id | count | 137 | | ------ | ----- | 138 | | 1977 | 2 | 139 | | 2875 | 1 | 140 | | 6518 | 2 | 141 | | 6877 | 1 | 142 | | 9425 | 1 | 143 | 144 | By increasing the `REPEAT` value to 2, we'll generate a total of 14 (`ntimes` is 7 multiplied by two this time): 145 | 146 | | one_id | count | 147 | | ------ | ----- | 148 | | 1977 | 3 | 149 | | 2875 | 2 | 150 | | 6518 | 3 | 151 | | 6877 | 3 | 152 | | 9425 | 3 | 153 | 154 | ##### string 155 | 156 | Generates a random string between a given minimum and maximum length: 157 | 158 | ``` 159 | '{{string 5 10 "abcABC"}}' 160 | ``` 161 | 162 | `string` the name of the function.
163 | `5` the minimum string length including any prefix.
164 | `10` the maximum string length including any prefix.
165 | `"abcABC"` _(optional)_ the set of characters to select from.
166 | 167 | Note that the apostrophes will wrap the string, turning it into a database string. 168 | 169 | ##### stringf 170 | 171 | Generates a formatted string using placeholder syntax: 172 | 173 | ``` 174 | '{{stringf "%s.%d@acme.com" 5 10 "abc" 10000 20000}}' 175 | ``` 176 | 177 | `stringf` the name of the function.
178 | `"%s.%i@acme.com"` the format string.
179 | `5` the minimum string length for the first string placeholder.
180 | `10` the minimum string length for the first string placeholder.
181 | `"abc"` the characters to use for the first string placeholder (leave blank to use defaults).
182 | `10000` the minimum value for the integer placeholder.
183 | `20000` the minimum value for the integer placeholder.
184 | 185 | Note that at present only the following verbs are supported: 186 | 187 | - %s - a string 188 | - %d - an integer 189 | 190 | ##### int 191 | 192 | Generates a random 64 bit integer between a minimum and maximum value. 193 | 194 | ``` 195 | {{int 5 10}} 196 | ``` 197 | 198 | `int` the name of the function.
199 | `5` the minimum number to generate.
200 | `10` the maximum number to generate.
201 | 202 | ##### date 203 | 204 | Generates a random date between two dates. 205 | 206 | ``` 207 | '{{date "2018-01-02" "now" "" }}' 208 | ``` 209 | 210 | `date` the name of the function.
211 | `"2018-01-02"` the minimum date to generate.
212 | `"2019-01-02"` the maximum date to generate.
213 | `""` the format to use for input dates, left blank to use the value specified by the `-datefmt` flag date. If overridden, both the minimum and maximum date arguments should be in the overridden format. 214 | 215 | Note that `"now"` can be passed to both the minimum and maximum dates if required. 216 | 217 | ``` 218 | '{{date "now" "now" "2006-01-02 15:04:05" }}' 219 | ``` 220 | 221 | `"2006-01-02 15:04:05"` the date format you which to be generated 222 | 223 | ##### float 224 | 225 | Generates a random 64 bit float between a minimum and maximum value. 226 | 227 | ``` 228 | {{float 1.2345678901 2.3456789012}} 229 | ``` 230 | 231 | `float` the name of the function.
232 | `1.2345678901` the minimum number to generate.
233 | `2.3456789012` the maximum number to generate.
234 | 235 | ##### uuid 236 | 237 | Generates a random V4 UUID using Google's [uuid](github.com/google/uuid) package. 238 | 239 | ``` 240 | {{uuid}} 241 | ``` 242 | 243 | `uuid` the name of the function. 244 | 245 | ##### set 246 | 247 | Selects a random value from a set of possible values. 248 | 249 | ``` 250 | '{{set "alice" 1 2.3"}}' 251 | ``` 252 | 253 | `set` the name of the function.
254 | `"alice"`|`"bob"` etc. the available options to generate from.
255 | 256 | ##### wset 257 | 258 | Selects a random value from a set of possible values using weighting. 259 | 260 | ``` 261 | '{{wset "a" 60 "b" 30 "c" 10}}' 262 | ``` 263 | 264 | `wset` the name of the function.
265 | `"a"` the first option.
266 | `60` a weight of 60 for the first option.
267 | `"b"` the second option.
268 | `30` a weight of 30 for the second option.
269 | `"c"` the third option.
270 | `10` a weight of 10 for the first option.
271 | 272 | Weights can be any number. 273 | 274 | ##### fset 275 | 276 | Selects a random value from a set of possible values contained within a file and caches the enumeration so it's not reloaded from disk on every invocation. 277 | 278 | ``` 279 | '{{fset "./examples/types.txt"}}' 280 | ``` 281 | 282 | `fset` the name of the function.
283 | `"./examples/types.txt"` the path to the file containing the options.
284 | 285 | ##### ref 286 | 287 | References a random value from a previous block's returned values (cached in memory). For example, if you have two blocks, one named "owner" and another named "pet" and you insert a number of owners into the database, returning their IDs, then wish to assign pets to them, you can use the following syntax (assuming you've provided the value "owner" for the first block's `-- NAME` comment): 288 | 289 | ``` 290 | '{{ref "owner" "id"}}', 291 | ``` 292 | 293 | `ref` the name of the function.
294 | 295 | ##### row 296 | 297 | References a random row from a previous block's returned values and caches it so that values from the same row can be used for other column insert values. For example, if you have two blocks, one named "owner" and another named "pet" and you insert a number of owners into the database, returning their IDs and names, you can use the following syntax to get the ID and name of a random row (assuming you've provided the value "owner" for the first block's `-- NAME` comment): 298 | 299 | ``` 300 | '{{row "owner" "id" $i}}', 301 | '{{row "owner" "name" $i}}' 302 | ``` 303 | 304 | `row` the name of the function.
305 | `owner` the name of the block whose data we're referencing.
306 | `id` the name of the owner column we'd like.
307 | `$i` the group identifier for this insert statement (ensures columns get taken from the same row).
308 | 309 | ##### each 310 | 311 | Works in a simliar way to `row` but references _sequential_ rows from a previous block's returned values, allowing all of a previous block's rows to have associated rows in a related table, provided the product of `--REPEAT` and `ntimes` is the same as the previous block's. 312 | 313 | ``` 314 | '{{each "owner" "id" $i}}', 315 | '{{each "owner" "name" $i}}', 316 | ``` 317 | 318 | `each` the name of the function.
319 | `owner` the name of the block whose data we're referencing.
320 | `id` the name of the owner column we'd like.
321 | `$i` the group identifier for this insert statement (ensures columns get taken from the same row).
322 | 323 | ``` 324 | {{range $i, $e := ntimes 1}} 325 | ...something 326 | {{end}} 327 | ``` 328 | 329 | ##### adj 330 | 331 | Generates a random adjective. 332 | 333 | ``` 334 | {{adj}} 335 | ``` 336 | 337 | ##### noun 338 | 339 | Generates a random noun. 340 | 341 | ``` 342 | {{noun}} 343 | ``` 344 | 345 | ##### title 346 | 347 | Generates a random title for a random gender. 348 | 349 | ``` 350 | {{title}} 351 | ``` 352 | 353 | ##### namef 354 | 355 | Generates a random first name for a random gender. 356 | 357 | ``` 358 | {{namef}} 359 | ``` 360 | 361 | ##### namel 362 | 363 | Generates a random last name. 364 | 365 | ``` 366 | {{namel}} 367 | ``` 368 | 369 | ##### name 370 | 371 | Generates a random full name for a random gender. 372 | 373 | ``` 374 | {{name}} 375 | ``` 376 | 377 | ##### email 378 | 379 | Generates a random email address. 380 | 381 | ``` 382 | {{email}} 383 | ``` 384 | 385 | ##### phone 386 | 387 | Generates a random phone number in E164 format. 388 | 389 | ``` 390 | {{phone}} 391 | ``` 392 | 393 | ##### postcode 394 | 395 | Generates a random postcode, taking a 2-letter country code. 396 | 397 | ``` 398 | {{postcode "GB"}} 399 | ``` 400 | 401 | ##### address 402 | 403 | Generates a random American address. It's possible to create addresses for other countries using the various other functions available. 404 | 405 | ``` 406 | {{address}} 407 | ``` 408 | 409 | ##### street 410 | 411 | Generates a random street name, taking a 2-letter country code. 412 | 413 | ``` 414 | {{street "GB"}} 415 | ``` 416 | 417 | ##### city 418 | 419 | Generates a random American city name. 420 | 421 | ``` 422 | {{city}} 423 | ``` 424 | 425 | ##### county 426 | 427 | Generates a random county/state name, taking a 2-letter country code. To generate a random state name without having to pass "US" as an argument, use the `state` function. 428 | 429 | ``` 430 | {{county "GB"}} 431 | ``` 432 | 433 | ##### state 434 | 435 | Generates a random American state name. 436 | 437 | ``` 438 | {{state}} 439 | ``` 440 | 441 | ##### state2 442 | 443 | Generates a random American state name in its 2-letter format. 444 | 445 | ``` 446 | {{state2}} 447 | ``` 448 | 449 | ##### currency 450 | 451 | Generates a random currency in ISO 4217 format. 452 | 453 | ``` 454 | {{currency}} 455 | ``` 456 | 457 | ##### locale 458 | 459 | Generates a random locale in BCP 47 format. 460 | 461 | ``` 462 | {{locale}} 463 | ``` 464 | 465 | ##### country 466 | 467 | Generates a random country name. 468 | 469 | ``` 470 | {{country}} 471 | ``` 472 | 473 | ##### country2 474 | 475 | Generates a random country name in ISO 3166-1 alpha-2 format. 476 | 477 | ``` 478 | {{country2}} 479 | ``` 480 | 481 | ##### country3 482 | 483 | Generates a random country name in ISO 3166-1 alpha-3 format. 484 | 485 | ``` 486 | {{country3}} 487 | ``` 488 | 489 | ##### ip4 490 | 491 | Generates a random v4 IP address. 492 | 493 | ``` 494 | {{ip4}} 495 | ``` 496 | 497 | ##### ip6 498 | 499 | Generates a random v6 IP address. 500 | 501 | ``` 502 | {{ip5}} 503 | ``` 504 | 505 | ##### user-agent 506 | 507 | Generates a random user agent to simulate an API client. 508 | 509 | ``` 510 | {{agent}} 511 | ``` 512 | 513 | ## Other database types: 514 | 515 | ### MySQL 516 | 517 | ``` 518 | datagen -script mysql.sql --driver mysql --conn root@/sandbox 519 | ``` 520 | 521 | With MySQL's lack of a `returning` clause, we instead select a random record from the `person` table when inserting pet records, which is less efficient but provides a workaround. 522 | 523 | ```sql 524 | -- REPEAT 10 525 | -- NAME pet 526 | insert into `pet` (`pid`, `name`) values 527 | {{range $i, $e := ntimes 100 }} 528 | {{if $i}},{{end}} 529 | ( 530 | (select `id` from `person` order by rand() limit 1), 531 | '{{name}}' 532 | ) 533 | {{end}}; 534 | ``` 535 | 536 | ## Todos 537 | 538 | * Refactor: 539 | * `runner.ResetEach` and remove that state. Create block runner and keep state within there. (Will require a change to the store as well). 540 | 541 | * Allow generated columns to be referenced by rows in the same insert. 542 | 543 | * Consider using ($1, $2) with arguments 544 | 545 | * Better handling of connection issues during run. 546 | 547 | * Integration tests. 548 | 549 | * Migrate to travis-ci.com and add coveralls support back in. 550 | -------------------------------------------------------------------------------- /assets/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codingconcepts/datagen/c3b67b6bb7d96e58661486c1de982ef7cb00cd65/assets/cover.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codingconcepts/datagen/c3b67b6bb7d96e58661486c1de982ef7cb00cd65/assets/logo.png -------------------------------------------------------------------------------- /examples/create.sql: -------------------------------------------------------------------------------- 1 | CREATE DATABASE "sandbox"; 2 | 3 | USE "sandbox"; 4 | 5 | CREATE TABLE "owner" ( 6 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid(), 7 | "email" STRING NOT NULL, 8 | "date_of_birth" TIMESTAMP NOT NULL 9 | ); 10 | 11 | CREATE TABLE "pet" ( 12 | "id" UUID DEFAULT gen_random_uuid(), 13 | "pid" UUID NOT NULL, 14 | "name" STRING NOT NULL, 15 | "type" STRING NOT NULL, 16 | PRIMARY KEY ("pid", "id"), 17 | CONSTRAINT type_v1 CHECK ("type" IN ('cat', 'dog')) 18 | ) INTERLEAVE IN PARENT "owner" ("pid"); -------------------------------------------------------------------------------- /examples/script.sql: -------------------------------------------------------------------------------- 1 | -- REPEAT 10 2 | -- NAME owner 3 | insert into "owner" ("email", "date_of_birth") values 4 | {{range $i, $e := ntimes 5 }} 5 | {{if $i}},{{end}} 6 | ( 7 | '{{email}}', 8 | '{{date "1900-01-01" "now" ""}}' 9 | ) 10 | {{end}} 11 | returning "id"; 12 | 13 | -- REPEAT 20 14 | -- NAME pet 15 | insert into "pet" ("pid", "name", "type") values 16 | {{range $i, $e := ntimes 5 }} 17 | {{if $i}},{{end}} 18 | ( 19 | '{{ref "owner" "id"}}', 20 | '{{adj}} {{noun}}', 21 | '{{wset "dog" 60 "cat" 40}}' 22 | ) 23 | {{end}}; 24 | -------------------------------------------------------------------------------- /examples/types.txt: -------------------------------------------------------------------------------- 1 | cat 2 | dog -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/codingconcepts/datagen 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/DATA-DOG/go-sqlmock v1.5.0 7 | github.com/Pallinder/go-randomdata v1.2.0 8 | github.com/go-sql-driver/mysql v1.7.0 9 | github.com/google/uuid v1.3.0 10 | github.com/lib/pq v1.10.7 11 | github.com/pkg/errors v0.9.1 12 | gopkg.in/cheggaaa/pb.v1 v1.0.28 13 | ) 14 | 15 | require ( 16 | github.com/fatih/color v1.14.1 // indirect 17 | github.com/mattn/go-colorable v0.1.13 // indirect 18 | github.com/mattn/go-runewidth v0.0.14 // indirect 19 | github.com/rivo/uniseg v0.2.0 // indirect 20 | golang.org/x/sys v0.5.0 // indirect 21 | golang.org/x/text v0.7.0 // indirect 22 | ) 23 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/DATA-DOG/go-sqlmock v1.5.0 h1:Shsta01QNfFxHCfpW6YH2STWB0MudeXXEWMr20OEh60= 2 | github.com/DATA-DOG/go-sqlmock v1.5.0/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM= 3 | github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg= 4 | github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y= 5 | github.com/fatih/color v1.14.1 h1:qfhVLaG5s+nCROl1zJsZRxFeYrHLqWroPOQ8BWiNb4w= 6 | github.com/fatih/color v1.14.1/go.mod h1:2oHN61fhTpgcxD3TSWCgKDiH1+x4OiDVVGH8WlgGZGg= 7 | github.com/go-sql-driver/mysql v1.7.0 h1:ueSltNNllEqE3qcWBTD0iQd3IpL/6U+mJxLkazJ7YPc= 8 | github.com/go-sql-driver/mysql v1.7.0/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= 9 | github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= 10 | github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 11 | github.com/lib/pq v1.10.7 h1:p7ZhMD+KsSRozJr34udlUrhboJwWAgCg34+/ZZNvZZw= 12 | github.com/lib/pq v1.10.7/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= 13 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 14 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 15 | github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= 16 | github.com/mattn/go-isatty v0.0.17 h1:BTarxUcIeDqL27Mc+vyvdWYSL28zpIhv3RoTdsLMPng= 17 | github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU= 18 | github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 19 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 20 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 21 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= 22 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 23 | golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 24 | golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= 25 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 26 | golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= 27 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 28 | gopkg.in/cheggaaa/pb.v1 v1.0.28 h1:n1tBJnnK2r7g9OW2btFH91V92STTUevLXYFb8gy9EMk= 29 | gopkg.in/cheggaaa/pb.v1 v1.0.28/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= 30 | -------------------------------------------------------------------------------- /internal/pkg/parse/blocks.go: -------------------------------------------------------------------------------- 1 | package parse 2 | 3 | import ( 4 | "bufio" 5 | "io" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/pkg/errors" 10 | ) 11 | 12 | const ( 13 | commentEOF = "-- EOF" 14 | commentRepeat = "-- REPEAT" 15 | commentName = "-- NAME" 16 | comment = "-- " 17 | ) 18 | 19 | // Block represents an instruction block in a script file. 20 | type Block struct { 21 | // Repeat tells the application how many times to run the body. 22 | Repeat int 23 | 24 | // The name of the block can be used to identify the return values 25 | // from one block execution from another. 26 | Name string 27 | 28 | // The body of the template. 29 | Body string 30 | } 31 | 32 | // Blocks reads an input reader line by line, parsing blocks than 33 | // can be executed by the Runner. If a block does not have an 34 | // explicit REPEAT value, a default of 1 will be used. 35 | func Blocks(r io.Reader) ([]Block, error) { 36 | scanner := bufio.NewScanner(r) 37 | output := []Block{} 38 | 39 | for { 40 | ok, block, err := parseBlock(scanner) 41 | if err != nil { 42 | return nil, err 43 | } 44 | if block.Body != "" { 45 | output = append(output, block) 46 | } 47 | if !ok { 48 | return output, nil 49 | } 50 | } 51 | } 52 | 53 | func parseBlock(scanner *bufio.Scanner) (ok bool, block Block, err error) { 54 | b := strings.Builder{} 55 | block.Repeat = 1 56 | for scanner.Scan() { 57 | t := strings.Trim(scanner.Text(), " \t") 58 | 59 | if strings.HasPrefix(t, commentName) { 60 | block.Name = parseName(t) 61 | continue 62 | } 63 | 64 | if strings.HasPrefix(t, commentRepeat) { 65 | var err error 66 | if block.Repeat, err = parseRepeat(t); err != nil { 67 | return false, Block{}, errors.Wrap(err, "parsing repeat") 68 | } 69 | continue 70 | } 71 | 72 | // We've hit the gap between statements,break out and 73 | // signal that there could be more blocks to come. 74 | if t == "" { 75 | block.Body = b.String() 76 | return true, block, nil 77 | } 78 | 79 | // We've git the user-defined EOF, break out and signal 80 | // that there are no more blocks to come. 81 | if strings.HasPrefix(t, commentEOF) { 82 | block.Body = b.String() 83 | return false, block, nil 84 | } 85 | 86 | // Catch all for all other types of comments. 87 | if strings.HasPrefix(t, comment) { 88 | continue 89 | } 90 | 91 | b.WriteString(t) 92 | } 93 | 94 | block.Body = b.String() 95 | return false, block, scanner.Err() 96 | } 97 | 98 | func parseRepeat(input string) (int, error) { 99 | clean := strings.Trim(strings.TrimPrefix(input, commentRepeat), " \t") 100 | return strconv.Atoi(clean) 101 | } 102 | 103 | func parseName(input string) string { 104 | return strings.Trim(strings.TrimPrefix(input, commentName), " \t") 105 | } 106 | -------------------------------------------------------------------------------- /internal/pkg/parse/blocks_test.go: -------------------------------------------------------------------------------- 1 | package parse 2 | 3 | import ( 4 | "errors" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/codingconcepts/datagen/internal/pkg/test" 9 | ) 10 | 11 | func TestBlocks(t *testing.T) { 12 | input := `-- NAME a 13 | -- REPEAT 1 14 | A 15 | 16 | -- NAME b 17 | -- REPEAT 2 18 | B 19 | 20 | -- NAME e 21 | -- REPEAT 4 22 | 23 | -- NAME c 24 | -- REPEAT 3 25 | C 26 | 27 | -- NAME e 28 | -- REPEAT 5` 29 | 30 | blocks, err := Blocks(strings.NewReader(input)) 31 | if err != nil { 32 | t.Fatalf("error parsing blocks: %v", err) 33 | } 34 | 35 | test.Equals(t, "a", blocks[0].Name) 36 | test.Equals(t, 1, blocks[0].Repeat) 37 | test.Equals(t, "A", blocks[0].Body) 38 | 39 | test.Equals(t, "b", blocks[1].Name) 40 | test.Equals(t, 2, blocks[1].Repeat) 41 | test.Equals(t, "B", blocks[1].Body) 42 | 43 | test.Equals(t, "c", blocks[2].Name) 44 | test.Equals(t, 3, blocks[2].Repeat) 45 | test.Equals(t, "C", blocks[2].Body) 46 | 47 | // Note that nothing is parsed for block d. 48 | test.Equals(t, 3, len(blocks)) 49 | } 50 | 51 | func BenchmarkBlock(b *testing.B) { 52 | cases := []struct { 53 | name string 54 | input string 55 | }{ 56 | { 57 | name: "one block", 58 | input: "insert into table (one, two) values (1, 2);", 59 | }, 60 | { 61 | name: "ten blocks", 62 | input: "-- NAME 0\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 63 | "-- NAME 1\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 64 | "-- NAME 2\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 65 | "-- NAME 3\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 66 | "-- NAME 4\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 67 | "-- NAME 5\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 68 | "-- NAME 6\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 69 | "-- NAME 7\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 70 | "-- NAME 8\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n" + 71 | "-- NAME 9\n-- REPEAT 1\ninsert into table (one, two) values (1, 2);\n\n", 72 | }, 73 | } 74 | 75 | for _, c := range cases { 76 | b.Run(c.name, func(b *testing.B) { 77 | r := strings.NewReader(c.input) 78 | 79 | for i := 0; i < b.N; i++ { 80 | Blocks(r) 81 | } 82 | }) 83 | } 84 | } 85 | 86 | func TestBlocksRepeat(t *testing.T) { 87 | cases := []struct { 88 | name string 89 | input string 90 | expCount int 91 | exp int 92 | expError bool 93 | }{ 94 | { 95 | name: "defaults to 1", 96 | input: `insert into "t" ("a", "b") values ('a', 'b');`, 97 | expCount: 1, 98 | exp: 1, 99 | expError: false, 100 | }, 101 | { 102 | name: "defaults to 1 with blank line", 103 | input: ` 104 | 105 | insert into "t" ("a", "b") values ('a', 'b');`, 106 | expCount: 1, 107 | exp: 1, 108 | expError: false, 109 | }, 110 | { 111 | name: "sets to 2", 112 | input: `-- REPEAT 2 113 | insert into "t" ("a", "b") values ('a', 'b');`, 114 | expCount: 1, 115 | exp: 2, 116 | expError: false, 117 | }, 118 | { 119 | name: "sets to 2 with blank line", 120 | input: ` 121 | 122 | -- REPEAT 2 123 | insert into "t" ("a", "b") values ('a', 'b');`, 124 | expCount: 1, 125 | exp: 2, 126 | expError: false, 127 | }, 128 | { 129 | name: "returns error for invalid repeat", 130 | input: `-- REPEAT a 131 | insert into "t" ("a", "b") values ('a', 'b');`, 132 | expCount: 0, 133 | exp: 0, 134 | expError: true, 135 | }, 136 | { 137 | name: "returns error for invalid repeat with blank line", 138 | input: ` 139 | 140 | -- REPEAT a 141 | insert into "t" ("a", "b") values ('a', 'b');`, 142 | expCount: 0, 143 | exp: 0, 144 | expError: true, 145 | }, 146 | } 147 | 148 | for _, c := range cases { 149 | t.Run(c.name, func(t *testing.T) { 150 | blocks, err := Blocks(strings.NewReader(c.input)) 151 | test.ErrorExists(t, c.expError, err) 152 | if err != nil { 153 | return 154 | } 155 | 156 | test.Equals(t, c.expCount, len(blocks)) 157 | for _, block := range blocks { 158 | test.Equals(t, c.exp, block.Repeat) 159 | } 160 | }) 161 | } 162 | } 163 | 164 | func TestBlocksName(t *testing.T) { 165 | cases := []struct { 166 | name string 167 | input string 168 | expCount int 169 | exp string 170 | expError bool 171 | }{ 172 | { 173 | name: "defaults to empty", 174 | input: `insert into "t" ("a", "b") values ('a', 'b');`, 175 | expCount: 1, 176 | exp: "", 177 | expError: false, 178 | }, 179 | { 180 | name: "defaults to empty with blank line", 181 | input: ` 182 | 183 | insert into "t" ("a", "b") values ('a', 'b');`, 184 | expCount: 1, 185 | exp: "", 186 | expError: false, 187 | }, 188 | { 189 | name: "sets to hello", 190 | input: `-- NAME hello 191 | insert into "t" ("a", "b") values ('a', 'b');`, 192 | expCount: 1, 193 | exp: "hello", 194 | expError: false, 195 | }, 196 | { 197 | name: "sets to hello with blank line", 198 | input: ` 199 | 200 | -- NAME hello 201 | insert into "t" ("a", "b") values ('a', 'b');`, 202 | expCount: 1, 203 | exp: "hello", 204 | expError: false, 205 | }, 206 | } 207 | 208 | for _, c := range cases { 209 | t.Run(c.name, func(t *testing.T) { 210 | blocks, err := Blocks(strings.NewReader(c.input)) 211 | test.ErrorExists(t, c.expError, err) 212 | test.Equals(t, c.expCount, len(blocks)) 213 | 214 | for _, block := range blocks { 215 | test.Equals(t, c.exp, block.Name) 216 | } 217 | }) 218 | } 219 | } 220 | 221 | func TestBlocksEOF(t *testing.T) { 222 | cases := []struct { 223 | name string 224 | input string 225 | expCount int 226 | expError bool 227 | }{ 228 | { 229 | name: "one block", 230 | input: `insert into "t" ("a", "b") values ('a', 'b'); 231 | 232 | -- EOF`, 233 | expCount: 1, 234 | expError: false, 235 | }, 236 | { 237 | name: "two blocks", 238 | input: `insert into "t" ("a", "b") values ('a', 'b'); 239 | 240 | insert into "t" ("a", "b") values ('a', 'b'); 241 | 242 | -- EOF`, 243 | expCount: 2, 244 | expError: false, 245 | }, 246 | { 247 | name: "ignore block", 248 | input: `insert into "t" ("a", "b") values ('a', 'b'); 249 | 250 | -- EOF 251 | 252 | insert into "t" ("a", "b") values ('a', 'b');`, 253 | expCount: 1, 254 | expError: false, 255 | }, 256 | } 257 | 258 | for _, c := range cases { 259 | t.Run(c.name, func(t *testing.T) { 260 | blocks, err := Blocks(strings.NewReader(c.input)) 261 | test.ErrorExists(t, c.expError, err) 262 | test.Equals(t, c.expCount, len(blocks)) 263 | }) 264 | } 265 | } 266 | 267 | func TestBlocksScanError(t *testing.T) { 268 | r := &errReader{err: errors.New("oh noes!")} 269 | _, err := Blocks(r) 270 | test.Equals(t, r.err, err) 271 | } 272 | 273 | type errReader struct { 274 | err error 275 | } 276 | 277 | func (r *errReader) Read(_ []byte) (int, error) { 278 | return 0, r.err 279 | } 280 | -------------------------------------------------------------------------------- /internal/pkg/random/random.go: -------------------------------------------------------------------------------- 1 | package random 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "regexp" 7 | "strings" 8 | "time" 9 | 10 | "github.com/pkg/errors" 11 | ) 12 | 13 | var ( 14 | utcNow = func() time.Time { return time.Now().UTC() } 15 | 16 | ascii = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") 17 | 18 | verbPattern = regexp.MustCompile("%[sd]{1}") 19 | ) 20 | 21 | // String returns a random string between two lengths. 22 | func String(min, max int64, set string) string { 23 | var length int64 24 | if min == max { 25 | length = min 26 | } else { 27 | length = between64(min, max) 28 | } 29 | 30 | runes := ascii 31 | if set != "" { 32 | runes = []rune(set) 33 | } 34 | 35 | output := []rune{} 36 | for i := 0; i < int(length); i++ { 37 | output = append(output, runes[rand.Intn(len(runes))]) 38 | } 39 | 40 | return string(output) 41 | } 42 | 43 | // StringF returns a random string built around a format string. 44 | func StringF(d StringFDefaults) func(format string, args ...interface{}) (string, error) { 45 | return func(format string, args ...interface{}) (string, error) { 46 | fargs := []interface{}{} 47 | 48 | verbs := verbPattern.FindAllString(format, -1) 49 | 50 | var min int64 51 | var max int64 52 | var pattern string 53 | var argIndex int 54 | var err error 55 | 56 | for _, v := range verbs { 57 | switch v[1:] { 58 | case "d": 59 | min, max, argIndex, err = intArgs(argIndex, d, args...) 60 | if err != nil { 61 | return "", errors.Wrap(err, "generating integer placeholder") 62 | } 63 | fargs = append(fargs, Int(min, max)) 64 | case "s": 65 | min, max, pattern, argIndex, err = stringArgs(argIndex, d, args...) 66 | if err != nil { 67 | return "", errors.Wrap(err, "generating string placeholder") 68 | } 69 | fargs = append(fargs, String(min, max, pattern)) 70 | } 71 | } 72 | 73 | return fmt.Sprintf(format, fargs...), nil 74 | } 75 | } 76 | 77 | // intArgs returns the minimum and maximum values to generate between, 78 | // the next index to use from the arguments provided by the user, and 79 | // any error that occurred parsing the parameters. 80 | func intArgs(i int, d StringFDefaults, args ...interface{}) (int64, int64, int, error) { 81 | if len(args) <= i { 82 | return d.IntMinDefault, d.IntMaxDefault, i, nil 83 | } 84 | 85 | // The next 2 args should be integers. 86 | min, ok := args[i].(int) 87 | if !ok { 88 | return 0, 0, 0, fmt.Errorf("argument for min: %v is not an integer", args[i]) 89 | } 90 | max, ok := args[i+1].(int) 91 | if !ok { 92 | return 0, 0, 0, fmt.Errorf("argument for max: %v is not an integer", args[i]) 93 | } 94 | 95 | return int64(min), int64(max), i + 2, nil 96 | } 97 | 98 | // stringArgs returns the minimum and maximum length values to generate 99 | // between, the character set to use when generating the random string, 100 | // the next index to use from the arguments provided by the user, and 101 | // any error that occurred parsing the parameters. 102 | func stringArgs(i int, d StringFDefaults, args ...interface{}) (int64, int64, string, int, error) { 103 | if len(args) <= i { 104 | return d.StringMinDefault, d.StringMaxDefault, "", i, nil 105 | } 106 | 107 | // The next 2 args should be integers. 108 | min, ok := args[i].(int) 109 | if !ok { 110 | return 0, 0, "", 0, fmt.Errorf("argument for min: %v is not an integer", args[i]) 111 | } 112 | 113 | max, ok := args[i+1].(int) 114 | if !ok { 115 | return 0, 0, "", 0, fmt.Errorf("argument for max: %v is not an integer", args[i]) 116 | } 117 | 118 | // If there's a next argument, it might be a pattern, or for the next verb. 119 | if len(args) <= i+2 { 120 | return int64(min), int64(max), "", i + 2, nil 121 | } 122 | 123 | if s, ok := args[i+2].(string); ok { 124 | return int64(min), int64(max), s, i + 3, nil 125 | } 126 | 127 | return int64(min), int64(max), "", i + 2, nil 128 | } 129 | 130 | // Int returns a random 64 integer between a minimum and maximum. 131 | func Int(min, max int64) int64 { 132 | return between64(min, max) 133 | } 134 | 135 | // Date returns a random date between two dates and formats it 136 | // as a string provided by Runner. It can optionally accept a 137 | // format string to override the Runner's format. Leave empty 138 | // to use the default. 139 | func Date(dateFormat string) func(minStr, maxStr, format string) (string, error) { 140 | return func(minStr, maxStr, format string) (string, error) { 141 | if format == "" { 142 | format = dateFormat 143 | } 144 | 145 | min, err := parseDate(format, minStr) 146 | if err != nil { 147 | return "", errors.Wrap(err, "parsing min date") 148 | } 149 | 150 | max, err := parseDate(format, maxStr) 151 | if err != nil { 152 | return "", errors.Wrap(err, "parsing max date") 153 | } 154 | 155 | if min == max { 156 | return min.UTC().Format(format), nil 157 | } 158 | 159 | diff := between64(min.Unix(), max.Unix()) 160 | 161 | return time.Unix(diff, 0).UTC().Format(format), nil 162 | } 163 | } 164 | 165 | // Float returns a random 64 bit float between a minimum and maximum. 166 | func Float(min, max float64) float64 { 167 | if min == max { 168 | return min 169 | } 170 | if min > max { 171 | min, max = max, min 172 | } 173 | 174 | return min + rand.Float64()*(max-min) 175 | } 176 | 177 | // Set returns a random item from a set 178 | func Set(set ...interface{}) interface{} { 179 | i := between64(0, int64(len(set))) 180 | return set[i] 181 | } 182 | 183 | // NTimes returns a slice of empty structs of random length. If you know 184 | // the size of the slice you'd like, just pass the min argument, if you'd 185 | // like a slice between a minimum and maximum size, pass a value for extra. 186 | func NTimes(min int64, extra ...int64) []struct{} { 187 | max := min 188 | if len(extra) > 0 { 189 | max = extra[0] 190 | } 191 | return make([]struct{}, Int(min, max)) 192 | } 193 | 194 | func between64(min, max int64) int64 { 195 | if min == max { 196 | return min 197 | } 198 | if min > max { 199 | min, max = max, min 200 | } 201 | return rand.Int63n(max-min) + min 202 | } 203 | 204 | func parseDate(format, input string) (time.Time, error) { 205 | if strings.EqualFold(input, "now") { 206 | return utcNow(), nil 207 | } 208 | 209 | return time.Parse(format, input) 210 | } 211 | -------------------------------------------------------------------------------- /internal/pkg/random/random_test.go: -------------------------------------------------------------------------------- 1 | package random 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "testing" 7 | "time" 8 | 9 | "github.com/codingconcepts/datagen/internal/pkg/test" 10 | ) 11 | 12 | func TestString(t *testing.T) { 13 | cases := []struct { 14 | name string 15 | min int64 16 | max int64 17 | set string 18 | }{ 19 | {name: "length 1", min: 1, max: 1}, 20 | {name: "length 2", min: 2, max: 2}, 21 | {name: "different lengths", min: 1, max: 10}, 22 | {name: "min > max", min: 10, max: 1}, 23 | {name: "custom set", min: 10, max: 10, set: "ab"}, 24 | } 25 | 26 | for _, c := range cases { 27 | t.Run(c.name, func(t *testing.T) { 28 | s := String(c.min, c.max, c.set) 29 | 30 | if c.min > c.max { 31 | c.min, c.max = c.max, c.min 32 | } 33 | 34 | test.Assert(t, int64(len(s)) >= c.min) 35 | test.Assert(t, int64(len(s)) <= c.max) 36 | 37 | if c.set != "" { 38 | runesInSet(t, []rune(c.set), []rune(s)) 39 | } 40 | }) 41 | } 42 | } 43 | 44 | func BenchmarkString(b *testing.B) { 45 | cases := []struct { 46 | name string 47 | min int64 48 | max int64 49 | set string 50 | }{ 51 | {name: "1 1", min: 1, max: 1}, 52 | {name: "10 10", min: 10, max: 10}, 53 | {name: "1 10", min: 1, max: 10}, 54 | {name: "1 10 set", min: 1, max: 10, set: "abcABC"}, 55 | } 56 | 57 | for _, c := range cases { 58 | b.Run(c.name, func(b *testing.B) { 59 | for i := 0; i < b.N; i++ { 60 | String(c.min, c.max, c.set) 61 | } 62 | }) 63 | } 64 | } 65 | 66 | func TestInt(t *testing.T) { 67 | cases := []struct { 68 | name string 69 | min int64 70 | max int64 71 | }{ 72 | {name: "min eq max", min: 1, max: 1}, 73 | {name: "min lt max", min: 1, max: 10}, 74 | {name: "min gt max", min: 10, max: 1}, 75 | } 76 | 77 | for _, c := range cases { 78 | t.Run(c.name, func(t *testing.T) { 79 | i := Int(c.min, c.max) 80 | 81 | if c.min > c.max { 82 | c.min, c.max = c.max, c.min 83 | } 84 | 85 | test.Assert(t, i >= c.min) 86 | test.Assert(t, i <= c.max) 87 | }) 88 | } 89 | } 90 | 91 | func TestStringF(t *testing.T) { 92 | defaults := StringFDefaults{ 93 | StringMinDefault: 10, 94 | StringMaxDefault: 10, 95 | IntMinDefault: 1000, 96 | IntMaxDefault: 1000, 97 | } 98 | 99 | cases := []struct { 100 | name string 101 | format string 102 | args []interface{} 103 | assert func(string) error 104 | expError bool 105 | }{ 106 | { 107 | name: "string without arguments", 108 | format: "%s", 109 | assert: func(s string) error { 110 | if int64(len(s)) != defaults.StringMaxDefault { 111 | return fmt.Errorf("%q length is not equal to %d", s, defaults.StringMaxDefault) 112 | } 113 | return nil 114 | }, 115 | }, 116 | { 117 | name: "string with length arguments", 118 | format: "%s", 119 | args: []interface{}{20, 20}, 120 | assert: func(s string) error { 121 | if int64(len(s)) != 20 { 122 | return fmt.Errorf("%q length is not equal to %d", s, 20) 123 | } 124 | return nil 125 | }, 126 | }, 127 | { 128 | name: "string with all arguments", 129 | format: "%s", 130 | args: []interface{}{20, 20, "abc"}, 131 | assert: func(s string) error { 132 | if int64(len(s)) != 20 { 133 | return fmt.Errorf("%q length is not equal to %d", s, 20) 134 | } 135 | return nil 136 | }, 137 | }, 138 | { 139 | name: "string with another placeholder's arguments", 140 | format: "%s", 141 | args: []interface{}{20, 20, 30, 30}, 142 | assert: func(s string) error { 143 | if int64(len(s)) != 20 { 144 | return fmt.Errorf("%q length is not equal to %d", s, 20) 145 | } 146 | return nil 147 | }, 148 | }, 149 | { 150 | name: "string with invalid min argument", 151 | format: "%s", 152 | args: []interface{}{"hello", 20}, 153 | expError: true, 154 | }, 155 | { 156 | name: "string with invalid max argument", 157 | format: "%s", 158 | args: []interface{}{20, "hello"}, 159 | expError: true, 160 | }, 161 | { 162 | name: "int without arguments", 163 | format: "%d", 164 | assert: func(s string) error { 165 | i, err := strconv.ParseInt(s, 10, 64) 166 | if err != nil { 167 | return err 168 | } 169 | 170 | if i != defaults.IntMaxDefault { 171 | return fmt.Errorf("%q is not equal to %d", s, defaults.IntMaxDefault) 172 | } 173 | return nil 174 | }, 175 | }, 176 | { 177 | name: "int with arguments", 178 | format: "%d", 179 | args: []interface{}{2000, 2000}, 180 | assert: func(s string) error { 181 | i, err := strconv.ParseInt(s, 10, 64) 182 | if err != nil { 183 | return err 184 | } 185 | 186 | if i != 2000 { 187 | return fmt.Errorf("%q is not equal to %d", s, 2000) 188 | } 189 | return nil 190 | }, 191 | }, 192 | { 193 | name: "int with invalid min argument", 194 | format: "%d", 195 | args: []interface{}{"hello", 2000}, 196 | expError: true, 197 | }, 198 | { 199 | name: "int with invalid max argument", 200 | format: "%d", 201 | args: []interface{}{2000, "hello"}, 202 | expError: true, 203 | }, 204 | } 205 | 206 | for _, c := range cases { 207 | t.Run(c.name, func(t *testing.T) { 208 | s := StringF(defaults) 209 | act, err := s(c.format, c.args...) 210 | 211 | test.ErrorExists(t, c.expError, err) 212 | if c.expError { 213 | return 214 | } 215 | 216 | test.ErrorExists(t, false, c.assert(act)) 217 | }) 218 | } 219 | } 220 | 221 | func BenchmarkStringF(b *testing.B) { 222 | defaults := StringFDefaults{ 223 | StringMinDefault: 10, 224 | StringMaxDefault: 10, 225 | IntMinDefault: 1000, 226 | IntMaxDefault: 1000, 227 | } 228 | 229 | cases := []struct { 230 | name string 231 | format string 232 | args []interface{} 233 | }{ 234 | { 235 | name: "string without arguments", 236 | format: "%s", 237 | }, 238 | { 239 | name: "string with length arguments", 240 | format: "%s", 241 | args: []interface{}{20, 20}, 242 | }, 243 | { 244 | name: "string with all arguments", 245 | format: "%s", 246 | args: []interface{}{20, 20, "abc"}, 247 | }, 248 | { 249 | name: "string with another placeholder's arguments", 250 | format: "%s", 251 | args: []interface{}{20, 20, 30, 30}, 252 | }, 253 | { 254 | name: "int without arguments", 255 | format: "%d", 256 | }, 257 | { 258 | name: "int with arguments", 259 | format: "%d", 260 | args: []interface{}{2000, 2000}, 261 | }, 262 | } 263 | 264 | for _, c := range cases { 265 | b.Run(c.name, func(b *testing.B) { 266 | s := StringF(defaults) 267 | 268 | for i := 0; i < b.N; i++ { 269 | s(c.format, c.args...) 270 | } 271 | }) 272 | } 273 | } 274 | 275 | func BenchmarkInt(b *testing.B) { 276 | cases := []struct { 277 | name string 278 | min int64 279 | max int64 280 | }{ 281 | {name: "1 1", min: 1, max: 1}, 282 | {name: "10 10", min: 10, max: 10}, 283 | {name: "1 10", min: 1, max: 10}, 284 | } 285 | 286 | for _, c := range cases { 287 | b.Run(c.name, func(b *testing.B) { 288 | for i := 0; i < b.N; i++ { 289 | Int(c.min, c.max) 290 | } 291 | }) 292 | } 293 | } 294 | 295 | func TestDate(t *testing.T) { 296 | date := time.Date(2000, time.January, 2, 3, 4, 5, 6, time.UTC) 297 | 298 | origUTCNow := utcNow 299 | utcNow = func() time.Time { return date } 300 | defer func() { utcNow = origUTCNow }() 301 | 302 | cases := []struct { 303 | name string 304 | min string 305 | max string 306 | format string 307 | overrideFormat string 308 | expError bool 309 | }{ 310 | {name: "min eq max", min: "2019-04-23", max: "2019-04-23", format: "2006-01-02"}, 311 | {name: "min lt max", min: "2018-04-23", max: "2019-04-23", format: "2006-01-02"}, 312 | {name: "min gt max", min: "2019-04-23", max: "2018-04-23", format: "2006-01-02"}, 313 | 314 | {name: "min parse failure", min: "2019-13-32", expError: true}, 315 | {name: "max parse failure", min: "2019-04-23", max: "2019-13-32", format: "2006-01-02", expError: true}, 316 | {name: "max parse failure", min: "2019-04-23", max: "2019-04-23", format: "1006-01-02", expError: true}, 317 | 318 | {name: "override format", min: "20190423", max: "20180423", format: "2006-01-02", overrideFormat: "20060102"}, 319 | {name: "override format failure", min: "20190423", max: "20180423", format: "2006-01-02", overrideFormat: "20170102", expError: true}, 320 | 321 | {name: "override format failure", min: "20190423", max: "20180423", format: "2006-01-02", overrideFormat: "20170102", expError: true}, 322 | } 323 | 324 | for _, c := range cases { 325 | t.Run(c.name, func(t *testing.T) { 326 | df := Date(c.format) 327 | d, err := df(c.min, c.max, c.overrideFormat) 328 | test.ErrorExists(t, c.expError, err) 329 | 330 | // Don't continue if we expect an error. 331 | if c.expError { 332 | return 333 | } 334 | 335 | format := c.format 336 | if c.overrideFormat != "" { 337 | format = c.overrideFormat 338 | } 339 | 340 | minD, err := time.Parse(format, c.min) 341 | if err != nil { 342 | t.Fatalf("invalid min format: %v", err) 343 | } 344 | 345 | maxD, err := time.Parse(format, c.max) 346 | if err != nil { 347 | t.Fatalf("invalid max format: %v", err) 348 | } 349 | 350 | if minD.Unix() > maxD.Unix() { 351 | minD, maxD = maxD, minD 352 | } 353 | 354 | dD, err := time.Parse(format, d) 355 | if err != nil { 356 | t.Fatalf("invalid d format: %v", err) 357 | } 358 | 359 | test.Assert(t, dD.Unix() >= minD.Unix()) 360 | test.Assert(t, dD.Unix() <= maxD.Unix()) 361 | }) 362 | } 363 | } 364 | 365 | func BenchmarkDate(b *testing.B) { 366 | cases := []struct { 367 | name string 368 | min string 369 | max string 370 | format string 371 | overrideFormat string 372 | }{ 373 | {name: "min eq max date", min: "2019-01-02", max: "2019-01-02", format: "2006-01-02"}, 374 | {name: "min eq max date and time", min: "2019-01-02 03:04:05", max: "2019-01-02 03:04:05", format: "2006-01-02 15:04:05"}, 375 | {name: "min lt max date", min: "2018-01-02", max: "2019-01-02", format: "2006-01-02"}, 376 | {name: "min lt max date and time", min: "2018-01-02 03:04:05", max: "2019-01-02 03:04:05", format: "2006-01-02 15:04:05"}, 377 | {name: "min gt max date", min: "2019-01-02", max: "2018-01-02", format: "2006-01-02"}, 378 | {name: "min gt max date and time", min: "2019-01-02 03:04:05", max: "2018-01-02 03:04:05", format: "2006-01-02 15:04:05"}, 379 | 380 | {name: "override format", min: "2019-01-02 03:04:05", max: "2018-01-02 03:04:05", format: "2006-01-02 15:04:05", overrideFormat: "20060102"}, 381 | {name: "override format failure", min: "2019-01-02 03:04:05", max: "2018-01-02 03:04:05", format: "2006-01-02 15:04:05", overrideFormat: "20170102"}, 382 | } 383 | 384 | for _, c := range cases { 385 | b.Run(c.name, func(b *testing.B) { 386 | d := Date(c.format) 387 | for i := 0; i < b.N; i++ { 388 | d(c.min, c.max, c.overrideFormat) 389 | } 390 | }) 391 | } 392 | } 393 | 394 | func TestFloat(t *testing.T) { 395 | cases := []struct { 396 | name string 397 | min float64 398 | max float64 399 | }{ 400 | {name: "min eq max", min: 1, max: 1}, 401 | {name: "min lt max", min: 1, max: 10}, 402 | {name: "min gt max", min: 10, max: 1}, 403 | } 404 | 405 | for _, c := range cases { 406 | t.Run(c.name, func(t *testing.T) { 407 | i := Float(c.min, c.max) 408 | 409 | if c.min > c.max { 410 | c.min, c.max = c.max, c.min 411 | } 412 | 413 | test.Assert(t, i >= c.min) 414 | test.Assert(t, i <= c.max) 415 | }) 416 | } 417 | } 418 | 419 | func BenchmarkFloat(b *testing.B) { 420 | cases := []struct { 421 | name string 422 | min float64 423 | max float64 424 | }{ 425 | {name: "1.23 1.23", min: 1.23, max: 1.23}, 426 | {name: "10.23 10.23", min: 10.23, max: 10.23}, 427 | {name: "1.23 10.23", min: 1.23, max: 10.23}, 428 | } 429 | 430 | for _, c := range cases { 431 | b.Run(c.name, func(b *testing.B) { 432 | for i := 0; i < b.N; i++ { 433 | Float(c.min, c.max) 434 | } 435 | }) 436 | } 437 | } 438 | 439 | func TestSet(t *testing.T) { 440 | cases := []struct { 441 | name string 442 | set []interface{} 443 | }{ 444 | {name: "one item", set: []interface{}{"a"}}, 445 | {name: "multiple items", set: []interface{}{"a", "b"}}, 446 | } 447 | 448 | for _, c := range cases { 449 | t.Run(c.name, func(t *testing.T) { 450 | Set(c.set...) 451 | }) 452 | } 453 | } 454 | 455 | func BenchmarkSet(b *testing.B) { 456 | cases := []struct { 457 | name string 458 | items []interface{} 459 | }{ 460 | {name: "one item", items: []interface{}{"a"}}, 461 | {name: "multiple items", items: []interface{}{"a", "b", "c"}}, 462 | } 463 | 464 | for _, c := range cases { 465 | b.Run(c.name, func(b *testing.B) { 466 | for i := 0; i < b.N; i++ { 467 | Set(c.items...) 468 | } 469 | }) 470 | } 471 | } 472 | 473 | func TestParseDate(t *testing.T) { 474 | date := time.Date(2000, time.January, 2, 3, 4, 5, 6, time.UTC) 475 | 476 | origUTCNow := utcNow 477 | utcNow = func() time.Time { return date } 478 | defer func() { utcNow = origUTCNow }() 479 | 480 | cases := []struct { 481 | name string 482 | format string 483 | exp string 484 | }{ 485 | {name: "default format", format: "", exp: "2000-01-02"}, 486 | {name: "custom format", format: time.RFC3339, exp: "2000-01-02T03:04:05Z"}, 487 | } 488 | 489 | for _, c := range cases { 490 | t.Run(c.name, func(t *testing.T) { 491 | d := Date("2006-01-02") 492 | act, err := d("now", "now", c.format) 493 | test.ErrorExists(t, false, err) 494 | test.Equals(t, c.exp, act) 495 | }) 496 | } 497 | } 498 | 499 | func runesInSet(t *testing.T, exp, act []rune) { 500 | for _, a := range act { 501 | runeInSet(t, a, exp) 502 | } 503 | } 504 | 505 | func runeInSet(t *testing.T, r rune, set []rune) { 506 | for _, s := range set { 507 | if r == s { 508 | return 509 | } 510 | } 511 | t.Fatalf("rune %v not found in set", r) 512 | } 513 | -------------------------------------------------------------------------------- /internal/pkg/random/stringf.go: -------------------------------------------------------------------------------- 1 | package random 2 | 3 | // StringFDefaults holds the default min and max values 4 | // for the various StringF placeholder types. 5 | type StringFDefaults struct { 6 | StringMinDefault int64 7 | StringMaxDefault int64 8 | IntMinDefault int64 9 | IntMaxDefault int64 10 | } 11 | -------------------------------------------------------------------------------- /internal/pkg/random/weight.go: -------------------------------------------------------------------------------- 1 | package random 2 | 3 | // WeightedItem represents an item value with an associated weight. 4 | type WeightedItem struct { 5 | Value interface{} 6 | Weight int 7 | } 8 | 9 | // WeightedItems represents a collection of weighted items with a 10 | // pre-calculated total weight. 11 | type WeightedItems struct { 12 | items []WeightedItem 13 | totalWeight int 14 | } 15 | 16 | // MakeWeightedItems creates a slice of WeightedItems and calculates 17 | // the total weight. 18 | func MakeWeightedItems(items []WeightedItem) WeightedItems { 19 | wi := WeightedItems{ 20 | items: items, 21 | } 22 | 23 | for _, item := range items { 24 | wi.totalWeight += item.Weight 25 | } 26 | 27 | return wi 28 | } 29 | 30 | // Choose selects a random value using the weights of each to ensure 31 | // items with higher weights have more of a chance of being selected. 32 | func (wi WeightedItems) Choose() interface{} { 33 | randomWeight := between64(1, int64(wi.totalWeight)) 34 | for _, i := range wi.items { 35 | randomWeight -= int64(i.Weight) 36 | if randomWeight <= 0 { 37 | return i.Value 38 | } 39 | } 40 | 41 | panic("didn't select an item") 42 | } 43 | -------------------------------------------------------------------------------- /internal/pkg/runner/builder.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type errBuilder struct { 9 | b strings.Builder 10 | err error 11 | } 12 | 13 | func (ew *errBuilder) write(i interface{}) { 14 | if ew.err != nil { 15 | return 16 | } 17 | _, ew.err = ew.b.WriteString(fmt.Sprintf("%v", i)) 18 | } 19 | -------------------------------------------------------------------------------- /internal/pkg/runner/common_test.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "database/sql" 5 | "log" 6 | 7 | sqlmock "github.com/DATA-DOG/go-sqlmock" 8 | ) 9 | 10 | var ( 11 | db *sql.DB 12 | mock sqlmock.Sqlmock 13 | ) 14 | 15 | func resetMock() { 16 | var err error 17 | if db, mock, err = sqlmock.New(); err != nil { 18 | log.Fatalf("error creating sqlmock: %v", err) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /internal/pkg/runner/helpers.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "io/ioutil" 5 | ) 6 | 7 | func (r *Runner) mustDumpQuery(stmt []byte) { 8 | if err := ioutil.WriteFile(r.queryErrFile, stmt, 0644); err != nil { 9 | panic(err) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /internal/pkg/runner/option.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import "github.com/codingconcepts/datagen/internal/pkg/random" 4 | 5 | // Option allows the Runner to be configured by the user. 6 | type Option func(*Runner) 7 | 8 | // WithDateFormat sets the default date format for the Runner. 9 | func WithDateFormat(f string) Option { 10 | return func(r *Runner) { 11 | r.dateFormat = f 12 | } 13 | } 14 | 15 | // WithStringFDefaults sets the default format min and max values 16 | // for the Runner. 17 | func WithStringFDefaults(d random.StringFDefaults) Option { 18 | return func(r *Runner) { 19 | r.stringFdefaults = d 20 | } 21 | } 22 | 23 | // WithDebug puts the Runner in debug mode, meaning nothing will be 24 | // written to a database. 25 | func WithDebug(d bool) Option { 26 | return func(r *Runner) { 27 | r.debug = d 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /internal/pkg/runner/option_test.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/codingconcepts/datagen/internal/pkg/random" 8 | 9 | "github.com/codingconcepts/datagen/internal/pkg/test" 10 | ) 11 | 12 | func TestWithDateFormat(t *testing.T) { 13 | r := New(db, WithDateFormat(time.RFC3339)) 14 | 15 | test.Equals(t, time.RFC3339, r.dateFormat) 16 | } 17 | 18 | func TestWithStringFDefaults(t *testing.T) { 19 | r := New(db, WithStringFDefaults(random.StringFDefaults{ 20 | IntMinDefault: 1, 21 | IntMaxDefault: 2, 22 | StringMinDefault: 3, 23 | StringMaxDefault: 4, 24 | })) 25 | 26 | test.Equals(t, int64(1), r.stringFdefaults.IntMinDefault) 27 | test.Equals(t, int64(2), r.stringFdefaults.IntMaxDefault) 28 | test.Equals(t, int64(3), r.stringFdefaults.StringMinDefault) 29 | test.Equals(t, int64(4), r.stringFdefaults.StringMaxDefault) 30 | } 31 | -------------------------------------------------------------------------------- /internal/pkg/runner/runner.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "bytes" 5 | "database/sql" 6 | "fmt" 7 | "io/ioutil" 8 | "reflect" 9 | "strings" 10 | "text/template" 11 | "time" 12 | 13 | "github.com/codingconcepts/datagen/internal/pkg/random" 14 | 15 | "github.com/google/uuid" 16 | 17 | "github.com/codingconcepts/datagen/internal/pkg/parse" 18 | "github.com/pkg/errors" 19 | 20 | "github.com/Pallinder/go-randomdata" 21 | ) 22 | 23 | // Runner holds the configuration that will be used at runtime. 24 | type Runner struct { 25 | db *sql.DB 26 | funcs template.FuncMap 27 | helpers map[string]interface{} 28 | store *store 29 | debug bool 30 | queryErrFile string 31 | 32 | dateFormat string 33 | stringFdefaults random.StringFDefaults 34 | 35 | fsets map[string][]string 36 | wsets map[string]random.WeightedItems 37 | 38 | adjectives []string 39 | nouns []string 40 | } 41 | 42 | // New returns a pointer to a newly configured Runner. Optionally 43 | // taking a variable number of configuration options. 44 | func New(db *sql.DB, opts ...Option) *Runner { 45 | r := Runner{ 46 | db: db, 47 | store: newStore(), 48 | debug: false, 49 | queryErrFile: "query_err.sql", 50 | stringFdefaults: random.StringFDefaults{ 51 | StringMinDefault: 10, 52 | StringMaxDefault: 10, 53 | IntMinDefault: 10000, 54 | IntMaxDefault: 99999, 55 | }, 56 | fsets: map[string][]string{}, 57 | wsets: map[string]random.WeightedItems{}, 58 | adjectives: strings.Split(strings.ToLower(adjectives), ","), 59 | nouns: strings.Split(strings.ToLower(nouns), ","), 60 | } 61 | 62 | for _, opt := range opts { 63 | opt(&r) 64 | } 65 | 66 | r.funcs = template.FuncMap{ 67 | "string": random.String, 68 | "stringf": random.StringF(r.stringFdefaults), 69 | "int": random.Int, 70 | "date": random.Date(r.dateFormat), 71 | "float": random.Float, 72 | "ntimes": random.NTimes, 73 | "set": random.Set, 74 | "uuid": func() string { return uuid.New().String() }, 75 | "wset": r.wset, 76 | "fset": r.loadAndSet, 77 | "ref": r.store.reference, 78 | "row": r.store.row, 79 | "each": r.store.each, 80 | "adj": func() string { return r.adjectives[random.Int(0, int64(len(r.adjectives)-1))] }, 81 | "noun": func() string { return r.nouns[random.Int(0, int64(len(r.nouns)-1))] }, 82 | "title": func() string { return randomdata.Title(randomdata.RandomGender) }, 83 | "namef": func() string { return randomdata.FirstName(randomdata.RandomGender) }, 84 | "namel": randomdata.LastName, 85 | "name": func() string { return randomdata.FullName(randomdata.RandomGender) }, 86 | "email": randomdata.Email, 87 | "phone": randomdata.PhoneNumber, 88 | "postcode": randomdata.PostalCode, 89 | "address": randomdata.Address, 90 | "street": randomdata.StreetForCountry, 91 | "city": randomdata.City, 92 | "county": randomdata.ProvinceForCountry, 93 | "state": func() string { return randomdata.State(randomdata.Large) }, 94 | "state2": func() string { return randomdata.State(randomdata.Small) }, 95 | "currency": randomdata.Currency, 96 | "locale": randomdata.Locale, 97 | "country": func() string { return randomdata.Country(randomdata.FullCountry) }, 98 | "country2": func() string { return randomdata.Country(randomdata.TwoCharCountry) }, // ISO 3166-1 alpha-2 99 | "country3": func() string { return randomdata.Country(randomdata.ThreeCharCountry) }, // ISO 3166-1 alpha-3 100 | "ip4": randomdata.IpV4Address, 101 | "ip6": randomdata.IpV6Address, 102 | "agent": randomdata.UserAgentString, 103 | } 104 | 105 | return &r 106 | } 107 | 108 | // Run executes a given block, returning any errors encountered. 109 | func (r *Runner) Run(b parse.Block) error { 110 | tmpl, err := template.New("block").Funcs(r.funcs).Parse(b.Body) 111 | if err != nil { 112 | return errors.Wrap(err, "parsing template") 113 | } 114 | 115 | buf := &bytes.Buffer{} 116 | if err := tmpl.Execute(buf, r.helpers); err != nil { 117 | return errors.Wrap(err, "executing template") 118 | } 119 | 120 | if r.debug { 121 | fmt.Println(buf.String()) 122 | return nil 123 | } 124 | 125 | rows, err := r.db.Query(buf.String()) 126 | if err != nil { 127 | r.mustDumpQuery(buf.Bytes()) 128 | return errors.Wrap(err, "executing query") 129 | } 130 | 131 | return r.scan(b, rows) 132 | } 133 | 134 | // ResetEach resets the variables used for keeping track of sequential row 135 | // references of previous block results. 136 | func (r *Runner) ResetEach(name string) { 137 | r.store.eachRow = 0 138 | r.store.currentGroup = 0 139 | r.store.eachContext = name 140 | } 141 | 142 | func (r *Runner) scan(b parse.Block, rows *sql.Rows) error { 143 | for rows.Next() { 144 | columnTypes, err := rows.ColumnTypes() 145 | if err != nil { 146 | return errors.Wrap(err, "getting columns types from result") 147 | } 148 | 149 | values := make([]interface{}, len(columnTypes)) 150 | for i, ct := range columnTypes { 151 | switch ct.DatabaseTypeName() { 152 | case "UUID": 153 | values[i] = reflect.New(reflect.TypeOf("")).Interface() 154 | default: 155 | values[i] = reflect.New(ct.ScanType()).Interface() 156 | } 157 | } 158 | 159 | if err = rows.Scan(values...); err != nil { 160 | return errors.Wrap(err, "scanning columns") 161 | } 162 | 163 | curr := map[string]interface{}{} 164 | for i, ct := range columnTypes { 165 | values[i] = r.prepareValue(reflect.ValueOf(values[i]).Elem()) 166 | curr[ct.Name()] = values[i] 167 | } 168 | r.store.set(b.Name, curr) 169 | } 170 | 171 | return nil 172 | } 173 | 174 | // prepareValue ensures that data being read out of the database following 175 | // a scan is in the correct format for being re-inserted into the database 176 | // during follow-up queries. 177 | func (r *Runner) prepareValue(v reflect.Value) interface{} { 178 | switch v.Type() { 179 | case reflect.TypeOf(time.Time{}): 180 | t := v.Interface().(time.Time) 181 | return t.Format(r.dateFormat) 182 | default: 183 | return v 184 | } 185 | } 186 | 187 | func (r *Runner) loadAndSet(path string) (string, error) { 188 | set, ok := r.fsets[path] 189 | if ok { 190 | i := random.Int(0, int64(len(set))) 191 | return set[i], nil 192 | } 193 | 194 | b, err := ioutil.ReadFile(path) 195 | if err != nil { 196 | return "", errors.Wrap(err, "error reading file") 197 | } 198 | 199 | strFile := string(b) 200 | strFile = strings.Replace(strFile, "'", "''", -1) 201 | s := strings.Split(strFile, "\n") 202 | 203 | r.fsets[path] = s 204 | return s[random.Int(0, int64(len(s)))], nil 205 | } 206 | 207 | func (r *Runner) wset(set ...interface{}) (interface{}, error) { 208 | b := &errBuilder{b: strings.Builder{}} 209 | 210 | for _, i := range set { 211 | b.write(i) 212 | } 213 | 214 | if b.err != nil { 215 | return nil, b.err 216 | } 217 | 218 | // Use a cached weighted set if found. 219 | found, ok := r.wsets[b.b.String()] 220 | if ok { 221 | return found.Choose(), nil 222 | } 223 | 224 | items := []random.WeightedItem{} 225 | for i, j := 0, 1; j < len(set); i, j = i+2, j+2 { 226 | items = append(items, random.WeightedItem{ 227 | Value: set[i], 228 | Weight: set[j].(int), 229 | }) 230 | } 231 | 232 | witems := random.MakeWeightedItems(items) 233 | r.wsets[b.b.String()] = witems 234 | 235 | return witems.Choose(), nil 236 | } 237 | -------------------------------------------------------------------------------- /internal/pkg/runner/runner_test.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "database/sql/driver" 5 | "reflect" 6 | "testing" 7 | "time" 8 | 9 | sqlmock "github.com/DATA-DOG/go-sqlmock" 10 | "github.com/codingconcepts/datagen/internal/pkg/parse" 11 | "github.com/codingconcepts/datagen/internal/pkg/test" 12 | ) 13 | 14 | func TestRun(t *testing.T) { 15 | cases := []struct { 16 | name string 17 | b parse.Block 18 | expError bool 19 | }{ 20 | { 21 | name: "empty template to simulate db error", 22 | b: parse.Block{ 23 | Repeat: 1, 24 | Name: "owner", 25 | Body: ``, 26 | }, 27 | expError: true, 28 | }, 29 | { 30 | name: "invalid template", 31 | b: parse.Block{ 32 | Repeat: 1, 33 | Name: "owner", 34 | Body: `{{range $i, $e := ntimes 10 }}`, 35 | }, 36 | expError: true, 37 | }, 38 | { 39 | name: "valid block", 40 | b: parse.Block{ 41 | Repeat: 1, 42 | Name: "owner", 43 | Body: `insert into "owner" ("name") values ("Alice") returning "id", "name", "date_of_birth"`, 44 | }, 45 | }, 46 | } 47 | 48 | for _, c := range cases { 49 | t.Run(c.name, func(t *testing.T) { 50 | resetMock() 51 | r := New(db) 52 | 53 | id, name, dob := 123, "Alice", time.Date(2019, time.January, 2, 3, 4, 5, 0, time.UTC) 54 | 55 | if !c.expError { 56 | rows := []driver.Value{id, name, dob} 57 | mock.ExpectQuery(`insert into "owner" (.*) values (.*) returning "id", "name", "date_of_birth"`).WillReturnRows( 58 | sqlmock.NewRows([]string{"id", "name", "date_of_birth"}).AddRow(rows...)) 59 | } 60 | 61 | err := r.Run(c.b) 62 | test.ErrorExists(t, c.expError, err) 63 | if err != nil { 64 | return 65 | } 66 | 67 | // Check the values committed to context, doing a string 68 | // comparison, as we're operating against reflect.Values. 69 | // 70 | // Note that no error expectation cases are being set up, 71 | // as we expect there to be values in these cases. 72 | actID, err := r.store.reference(c.b.Name, "id") 73 | test.ErrorExists(t, false, err) 74 | test.StringEquals(t, id, actID) 75 | 76 | actName, err := r.store.reference(c.b.Name, "name") 77 | test.ErrorExists(t, false, err) 78 | test.StringEquals(t, name, actName) 79 | 80 | actDob, err := r.store.reference(c.b.Name, "date_of_birth") 81 | test.ErrorExists(t, false, err) 82 | test.StringEquals(t, dob, actDob) 83 | }) 84 | } 85 | } 86 | 87 | func TestPrepareValue(t *testing.T) { 88 | r := New(db, WithDateFormat("20060102")) 89 | 90 | cases := []struct { 91 | name string 92 | value interface{} 93 | exp interface{} 94 | }{ 95 | { 96 | name: "string", 97 | value: "Alice", 98 | exp: "Alice", 99 | }, 100 | { 101 | name: "time.Time", 102 | value: time.Date(2019, time.July, 8, 9, 0, 1, 0, time.UTC), 103 | exp: "20190708", 104 | }, 105 | } 106 | 107 | for _, c := range cases { 108 | t.Run(c.name, func(t *testing.T) { 109 | act := r.prepareValue(reflect.ValueOf(c.value)) 110 | test.StringEquals(t, c.exp, act) 111 | }) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /internal/pkg/runner/store.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "sync" 7 | ) 8 | 9 | type groupKey struct { 10 | groupType interface{} 11 | groupID int 12 | } 13 | 14 | // store holds row data that comes out of the database during runtime. 15 | type store struct { 16 | mu sync.RWMutex 17 | data map[string][]map[string]interface{} 18 | group map[groupKey]map[string]interface{} 19 | eachContext string 20 | eachRow int 21 | 22 | firstColumn string 23 | currentGroup int 24 | } 25 | 26 | func newStore() *store { 27 | return &store{ 28 | data: map[string][]map[string]interface{}{}, 29 | group: map[groupKey]map[string]interface{}{}, 30 | } 31 | } 32 | 33 | func (s *store) set(groupName string, rows map[string]interface{}) { 34 | s.mu.Lock() 35 | defer s.mu.Unlock() 36 | 37 | s.data[groupName] = append(s.data[groupName], rows) 38 | } 39 | 40 | func (s *store) reference(key string, column string) (interface{}, error) { 41 | s.mu.RLock() 42 | defer s.mu.RUnlock() 43 | 44 | rows, ok := s.data[key] 45 | if !ok { 46 | return nil, fmt.Errorf("data not found key=%q", key) 47 | } 48 | 49 | index := rand.Intn(len(rows)) 50 | value, ok := rows[index][column] 51 | if !ok { 52 | return nil, fmt.Errorf("data not found key=%q column=%q index=%d", key, column, index) 53 | } 54 | 55 | return value, nil 56 | } 57 | 58 | func (s *store) row(key, column string, group int) (interface{}, error) { 59 | s.mu.RLock() 60 | defer s.mu.RUnlock() 61 | 62 | groupKey := groupKey{groupType: key, groupID: group} 63 | 64 | // Check if we've scanned this row before. 65 | row, ok := s.group[groupKey] 66 | if ok { 67 | value, ok := row[column] 68 | if !ok { 69 | return nil, fmt.Errorf("data not found key=%q column=%q group=%d", key, column, group) 70 | } 71 | return value, nil 72 | } 73 | 74 | // Get a random item from the row context and cache it for the next read. 75 | randomValue := s.data[key][rand.Intn(len(s.data[key]))] 76 | 77 | s.group[groupKey] = randomValue 78 | 79 | value, ok := randomValue[column] 80 | if !ok { 81 | return nil, fmt.Errorf("data not found key=%q column=%q", key, column) 82 | } 83 | 84 | return value, nil 85 | } 86 | 87 | func (s *store) each(key, column string, group int) (interface{}, error) { 88 | s.mu.RLock() 89 | defer s.mu.RUnlock() 90 | 91 | groupKey := groupKey{groupType: key, groupID: group} 92 | 93 | if s.firstColumn == "" { 94 | s.firstColumn = column 95 | } else { 96 | if s.firstColumn == column { 97 | s.eachRow++ 98 | } 99 | } 100 | 101 | // Get the next row from the referenced data set, returning to row 0 if we're generating 102 | // more child records than parents. 103 | rowRef := s.data[key][s.eachRow%len(s.data[key])] 104 | 105 | s.group[groupKey] = rowRef 106 | 107 | value, ok := rowRef[column] 108 | if !ok { 109 | return nil, fmt.Errorf("data not found key=%q column=%q", key, column) 110 | } 111 | 112 | return value, nil 113 | } 114 | -------------------------------------------------------------------------------- /internal/pkg/runner/store_test.go: -------------------------------------------------------------------------------- 1 | package runner 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/datagen/internal/pkg/test" 7 | ) 8 | 9 | func TestReference(t *testing.T) { 10 | s := newStore() 11 | s.set("owner", map[string]interface{}{ 12 | "id": 123, 13 | "name": "Alice", 14 | }) 15 | 16 | cases := []struct { 17 | name string 18 | key string 19 | column string 20 | expValue interface{} 21 | expError bool 22 | }{ 23 | {name: "id found", key: "owner", column: "id", expValue: 123}, 24 | {name: "name found", key: "owner", column: "name", expValue: "Alice"}, 25 | {name: "key not found", key: "invalid", column: "name", expError: true}, 26 | {name: "column not found", key: "owner", column: "invalid", expError: true}, 27 | } 28 | 29 | for _, c := range cases { 30 | t.Run(c.name, func(t *testing.T) { 31 | act, err := s.reference(c.key, c.column) 32 | test.ErrorExists(t, c.expError, err) 33 | test.Equals(t, c.expValue, act) 34 | }) 35 | } 36 | } 37 | 38 | func TestRow(t *testing.T) { 39 | s := newStore() 40 | s.set("owner", map[string]interface{}{ 41 | "id": 123, 42 | "name": "Alice", 43 | }) 44 | 45 | cases := []struct { 46 | name string 47 | key string 48 | group int 49 | lookups map[string]interface{} 50 | expError bool 51 | }{ 52 | {name: "id found", key: "owner", group: 1, lookups: map[string]interface{}{"id": 123}}, 53 | {name: "name found", key: "owner", group: 2, lookups: map[string]interface{}{"name": "Alice"}}, 54 | {name: "columns found", key: "owner", group: 3, lookups: map[string]interface{}{"id": 123, "name": "Alice"}}, 55 | {name: "column not found for new group", group: 4, key: "owner", lookups: map[string]interface{}{"invalid": nil}, expError: true}, 56 | {name: "column not found for existing group", group: 3, key: "owner", lookups: map[string]interface{}{"invalid": nil}, expError: true}, 57 | } 58 | 59 | for _, c := range cases { 60 | t.Run(c.name, func(t *testing.T) { 61 | for lk, lv := range c.lookups { 62 | act, err := s.row(c.key, lk, c.group) 63 | test.ErrorExists(t, c.expError, err) 64 | test.Equals(t, lv, act) 65 | } 66 | }) 67 | } 68 | } 69 | 70 | func TestEach(t *testing.T) { 71 | cases := []struct { 72 | name string 73 | key string 74 | group int 75 | lookups map[string]interface{} 76 | expError bool 77 | }{ 78 | {name: "id found", key: "owner", group: 1, lookups: map[string]interface{}{"id": 123}}, 79 | {name: "name found", key: "owner", group: 2, lookups: map[string]interface{}{"name": "Alice"}}, 80 | {name: "columns found", key: "owner", group: 3, lookups: map[string]interface{}{"id": 123, "name": "Alice"}}, 81 | {name: "column not found for new group", group: 4, key: "owner", lookups: map[string]interface{}{"invalid": nil}, expError: true}, 82 | {name: "column not found for existing group", group: 3, key: "owner", lookups: map[string]interface{}{"invalid": nil}, expError: true}, 83 | } 84 | 85 | for _, c := range cases { 86 | t.Run(c.name, func(t *testing.T) { 87 | s := newStore() 88 | s.set("owner", map[string]interface{}{ 89 | "id": 123, 90 | "name": "Alice", 91 | }) 92 | 93 | for lk, lv := range c.lookups { 94 | act, err := s.each(c.key, lk, c.group) 95 | test.ErrorExists(t, c.expError, err) 96 | test.Equals(t, lv, act) 97 | } 98 | }) 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /internal/pkg/test/helpers.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "reflect" 7 | "testing" 8 | ) 9 | 10 | // StringEquals performs a comparison against two values 11 | // values by comparing their string values and fails if 12 | // they are not the same. 13 | func StringEquals(tb testing.TB, expected, actual interface{}) { 14 | if !reflect.DeepEqual(fmt.Sprintf("%v", expected), fmt.Sprintf("%v", actual)) { 15 | tb.Helper() 16 | tb.Fatalf("\n\texp: %#[1]v (%[1]T)\n\tgot: %#[2]v (%[2]T)\n", expected, actual) 17 | } 18 | } 19 | 20 | // Equals performs a deep equal comparison against two 21 | // values and fails if they are not the same. 22 | func Equals(tb testing.TB, expected, actual interface{}) { 23 | if !reflect.DeepEqual(expected, actual) { 24 | tb.Helper() 25 | tb.Fatalf("\n\texp: %#[1]v (%[1]T)\n\tgot: %#[2]v (%[2]T)\n", expected, actual) 26 | } 27 | } 28 | 29 | // Assert checks the result of a predicate. 30 | func Assert(tb testing.TB, result bool) { 31 | tb.Helper() 32 | if !result { 33 | tb.Fatal("\n\tassertion failed\n") 34 | } 35 | } 36 | 37 | // ErrorExists fails if an error is expected but doesn't 38 | // exist or if an error is exists but is not expected. 39 | // It does not check equality. 40 | func ErrorExists(tb testing.TB, exp bool, err error) { 41 | tb.Helper() 42 | if !exp && err != nil { 43 | log.Fatalf("unexpected error: %v", err) 44 | } 45 | if exp && err == nil { 46 | log.Fatal("expect error but didn't get one") 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "math/rand" 9 | "os" 10 | "time" 11 | 12 | "gopkg.in/cheggaaa/pb.v1" 13 | 14 | "github.com/codingconcepts/datagen/internal/pkg/parse" 15 | "github.com/codingconcepts/datagen/internal/pkg/runner" 16 | _ "github.com/go-sql-driver/mysql" 17 | _ "github.com/lib/pq" 18 | ) 19 | 20 | var semver string 21 | 22 | func main() { 23 | rand.Seed(time.Now().UnixNano()) 24 | log.SetFlags(log.LstdFlags | log.Lshortfile) 25 | 26 | driver := flag.String("driver", "", "name of the database driver to use [postgres|mysql]") 27 | script := flag.String("script", "", "the full or relative path to your script file") 28 | conn := flag.String("conn", "", "the database connection string") 29 | dateFmt := flag.String("datefmt", "2006-01-02", "the Go date format for all database dates") 30 | debug := flag.Bool("debug", false, "dry run without writing to database, ref, row, and each won't work") 31 | version := flag.Bool("version", false, "display the current version number") 32 | flag.Parse() 33 | 34 | if *version { 35 | fmt.Println(semver) 36 | os.Exit(2) 37 | } 38 | 39 | if *script == "" || *driver == "" || *conn == "" { 40 | flag.Usage() 41 | os.Exit(2) 42 | } 43 | 44 | db := mustConnect(*driver, *conn) 45 | defer db.Close() 46 | 47 | runner := runner.New(db, runner.WithDateFormat(*dateFmt), runner.WithDebug(*debug)) 48 | 49 | file, err := os.Open(*script) 50 | if err != nil { 51 | log.Fatalf("error reading script file: %v", err) 52 | } 53 | defer file.Close() 54 | 55 | blocks, err := parse.Blocks(file) 56 | if err != nil { 57 | log.Fatalf("error reading blocks from script file: %v", err) 58 | } 59 | 60 | bar := newProgressBar(blocks) 61 | for _, block := range blocks { 62 | runner.ResetEach(block.Name) 63 | for i := 0; i < block.Repeat; i++ { 64 | bar.Increment() 65 | if err = runner.Run(block); err != nil { 66 | log.Fatalf("error running block %q: %v", block.Name, err) 67 | } 68 | } 69 | } 70 | bar.FinishPrint("Finished") 71 | } 72 | 73 | func newProgressBar(blocks []parse.Block) *pb.ProgressBar { 74 | var count int 75 | for _, block := range blocks { 76 | count += block.Repeat 77 | } 78 | 79 | bar := pb.New(count) 80 | bar.SetRefreshRate(time.Millisecond * 100) 81 | bar.ShowCounters = false 82 | return bar.Start() 83 | } 84 | 85 | func mustConnect(driver, connStr string) *sql.DB { 86 | conn, err := sql.Open(driver, connStr) 87 | if err != nil { 88 | log.Fatalf("error opening connection: %d", err) 89 | } 90 | 91 | if err = conn.Ping(); err != nil { 92 | log.Fatalf("error checking connection: %v", err) 93 | } 94 | 95 | return conn 96 | } 97 | --------------------------------------------------------------------------------