├── .github └── workflows │ └── coverage.yaml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── assets ├── cover.png └── cover_grey.svg ├── dg.go ├── examples ├── const_test │ └── config.yaml ├── each_match_test │ ├── config.yaml │ └── market.csv ├── input_test │ ├── config.yaml │ └── significant_dates.csv ├── many_to_many │ ├── config.yaml │ ├── create.sql │ └── insert.sql ├── match_test │ ├── config.yaml │ └── market.csv ├── pattern_test │ └── config.yaml ├── person │ ├── config.yaml │ ├── create.sql │ └── insert.sql ├── range_test │ └── config.yaml ├── supress_column │ └── config.yaml └── unique_test │ └── config.yaml ├── go.mod ├── go.sum └── internal └── pkg ├── generator ├── array.go ├── array_test.go ├── common.go ├── common_test.go ├── const_generator.go ├── const_generator_test.go ├── each_generator.go ├── each_generator_test.go ├── formatter.go ├── gen_generator.go ├── gen_generator_test.go ├── inc_generator.go ├── inc_generator_test.go ├── match_generator.go ├── match_generator_test.go ├── placeholder.go ├── range_generator.go ├── range_generator_test.go ├── ref_generator.go ├── ref_generator_test.go ├── set_generator.go ├── set_generator_test.go ├── weighted_random.go └── weighted_random_test.go ├── model ├── config.go ├── config_test.go ├── csv_file.go ├── csv_file_test.go ├── raw_message.go ├── raw_message_test.go └── source.go ├── random └── xorshift.go ├── source ├── csv_source.go └── csv_source_test.go ├── ui ├── time_tracker.go └── time_tracker_test.go └── web └── file_server.go /.github/workflows/coverage.yaml: -------------------------------------------------------------------------------- 1 | name: main-coverage 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | coverage: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v3 13 | 14 | - name: Set up Go 15 | uses: actions/setup-go@v4 16 | with: 17 | go-version: '1.20' 18 | 19 | - name: Test 20 | run: go test -v -race -covermode=atomic -coverprofile=coverage.out ./... 21 | 22 | - name: Upload coverage reports to Codecov 23 | uses: codecov/codecov-action@v3 24 | env: 25 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | dg 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | 9 | # Test binary, built with `go test -c` 10 | *.test 11 | 12 | # Output of the go coverage tool, specifically when used with LiteIDE 13 | *.out 14 | *.cov 15 | 16 | # Go workspace file 17 | go.work 18 | 19 | # OS-specific 20 | .DS_Store 21 | 22 | # Application-specific 23 | csvs 24 | releases 25 | scratchpad 26 | 27 | # Code editor 28 | .vscode/ 29 | 30 | .idea 31 | *.iml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Rob Reid 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | validate_version: 2 | ifndef VERSION 3 | $(error VERSION is undefined) 4 | endif 5 | 6 | db: 7 | cockroach demo --insecure --no-example-database 8 | 9 | tables: 10 | cockroach sql --insecure < examples/many_to_many/create.sql 11 | 12 | data_many_to_many: 13 | go run dg.go -c ./examples/many_to_many/config.yaml -o ./csvs/many_to_many -i import.sql 14 | 15 | data_person: 16 | go run dg.go -c ./examples/person/config.yaml -o ./csvs/person 17 | 18 | data_range_test: 19 | go run dg.go -c ./examples/range_test/config.yaml -o ./csvs/range_test 20 | 21 | data_input_test: 22 | go run dg.go -c ./examples/input_test/config.yaml -o ./csvs/input_test 23 | 24 | data_unique_test: 25 | go run dg.go -c ./examples/unique_test/config.yaml -o ./csvs/unique_test 26 | 27 | data_const_test: 28 | go run dg.go -c ./examples/const_test/config.yaml -o ./csvs/const_test 29 | 30 | data_match: 31 | go run dg.go -c ./examples/match_test/config.yaml -o ./csvs/match -i import.sql 32 | 33 | data_each_match: 34 | go run dg.go -c ./examples/each_match_test/config.yaml -o ./csvs/each_match -i import.sql 35 | 36 | data_pattern: 37 | go run dg.go -c ./examples/pattern_test/config.yaml -o ./csvs/pattern_test -i import.sql 38 | 39 | data: data_many_to_many data_person data_range_test data_input_test data_unique_test data_const_test 40 | echo "done" 41 | 42 | file_server: 43 | python3 -m http.server 3000 -d csvs/many_to_many 44 | 45 | import: 46 | cockroach sql --insecure < examples/many_to_many/insert.sql 47 | 48 | test: 49 | go test ./... -v -cover 50 | 51 | cover: 52 | go test -v -coverpkg=./... -coverprofile=profile.cov ./... -count=1 53 | go tool cover -func profile.cov 54 | # go tool cover -html coverage.out 55 | 56 | profile: 57 | go run dg.go -c ./examples/many_to_many/config.yaml -o ./csvs/many_to_many -cpuprofile profile.out 58 | go tool pprof -http=:8080 profile.out 59 | 60 | release: validate_version 61 | # make sure the folder exists 62 | mkdir -p ./releases 63 | 64 | # linux 65 | GOOS=linux go build -ldflags "-X main.version=${VERSION}" -o dg ;\ 66 | tar -zcvf ./releases/dg_${VERSION}_linux.tar.gz ./dg ;\ 67 | 68 | # macos (arm) 69 | GOOS=darwin GOARCH=arm64 go build -ldflags "-X main.version=${VERSION}" -o dg ;\ 70 | tar -zcvf ./releases/dg_${VERSION}_macos_arm64.tar.gz ./dg ;\ 71 | 72 | # macos (amd) 73 | GOOS=darwin GOARCH=amd64 go build -ldflags "-X main.version=${VERSION}" -o dg ;\ 74 | tar -zcvf ./releases/dg_${VERSION}_macos_amd64.tar.gz ./dg ;\ 75 | 76 | # windows 77 | GOOS=windows go build -ldflags "-X main.version=${VERSION}" -o dg ;\ 78 | tar -zcvf ./releases/dg_${VERSION}_windows.tar.gz ./dg ;\ 79 | 80 | rm ./dg -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | drawing 3 |

4 | 5 | A fast data generator that produces CSV files from generated relational data. 6 | 7 | ## Table of Contents 8 | 9 | 1. [Installation](#installation) 10 | 1. [Usage](#usage) 11 | - Import via [HTTP](#import-via-http) 12 | - Import via [psql](#import-via-psql) 13 | - Import via [nodelocal](#import-via-nodelocal) 14 | 1. [Tables](#tables) 15 | - [gen](#gen) 16 | - [set](#set) 17 | - [inc](#inc) 18 | - [ref](#ref) 19 | - [each](#each) 20 | - [range](#range) 21 | - [match](#match) 22 | 1. [Inputs](#inputs) 23 | - [csv](#csv) 24 | 1. [Functions](#functions) 25 | 1. [Thanks](#thanks) 26 | 1. [Todos](#todos) 27 | 28 | ### Installation 29 | 30 | Find the release that matches your architecture on the [releases](https://github.com/codingconcepts/dg/releases) page. 31 | 32 | Download the tar, extract the executable, and move it into your PATH: 33 | 34 | ``` 35 | $ tar -xvf dg_[VERSION]-rc1_macOS.tar.gz 36 | ``` 37 | 38 | ### Usage 39 | 40 | ``` 41 | $ dg 42 | Usage dg: 43 | -c string 44 | the absolute or relative path to the config file 45 | -cpuprofile string 46 | write cpu profile to file 47 | -i string 48 | write import statements to file 49 | -o string 50 | the absolute or relative path to the output dir (default ".") 51 | -p int 52 | port to serve files from (omit to generate without serving) 53 | -version 54 | display the current version number 55 | ``` 56 | 57 | Create a config file. In the following example, we create 10,000 people, 50 events, 5 person types, and then populate the many-to-many `person_event` resolver table with 500,000 rows that represent the Cartesian product between the person and event tables: 58 | 59 | ```yaml 60 | tables: 61 | - name: person 62 | count: 10000 63 | columns: 64 | # Generate a random UUID for each person 65 | - name: id 66 | type: gen 67 | processor: 68 | value: ${uuid} 69 | 70 | - name: event 71 | count: 50 72 | columns: 73 | # Generate a random UUID for each event 74 | - name: id 75 | type: gen 76 | processor: 77 | value: ${uuid} 78 | 79 | - name: person_type 80 | count: 5 81 | columns: 82 | # Generate a random UUID for each person_type 83 | - name: id 84 | type: gen 85 | processor: 86 | value: ${uuid} 87 | 88 | # Generate a random 16 bit number and left-pad it to 5 digits 89 | - name: name 90 | type: gen 91 | processor: 92 | value: ${uint16} 93 | format: "%05d" 94 | 95 | - name: person_event 96 | columns: 97 | # Generate a random UUID for each person_event 98 | - name: id 99 | type: gen 100 | processor: 101 | value: ${uuid} 102 | 103 | # Select a random id from the person_type table 104 | - name: person_type 105 | type: ref 106 | processor: 107 | table: person_type 108 | column: id 109 | 110 | # Generate a person_id column for each id in the person table 111 | - name: person_id 112 | type: each 113 | processor: 114 | table: person 115 | column: id 116 | 117 | # Generate an event_id column for each id in the event table 118 | - name: event_id 119 | type: each 120 | processor: 121 | table: event 122 | column: id 123 | ``` 124 | 125 | Run the application: 126 | 127 | ``` 128 | $ dg -c your_config_file.yaml -o your_output_dir -p 3000 129 | loaded config file took: 428µs 130 | generated table: person took: 41ms 131 | generated table: event took: 159µs 132 | generated table: person_type took: 42µs 133 | generated table: person_event took: 1s 134 | generated all tables took: 1s 135 | wrote csv: person took: 1ms 136 | wrote csv: event took: 139µs 137 | wrote csv: person_type took: 110µs 138 | wrote csv: person_event took: 144ms 139 | wrote all csvs took: 145ms 140 | ``` 141 | 142 | This will output and dg will then run an HTTP server allow you to import the files from localhost. 143 | 144 | ``` 145 | your_output_dir 146 | ├── event.csv 147 | ├── person.csv 148 | ├── person_event.csv 149 | └── person_type.csv 150 | ``` 151 | 152 | ##### Import via HTTP 153 | 154 | Then import the files as you would any other; here's an example insert into CockroachDB: 155 | 156 | ```sql 157 | IMPORT INTO "person" ("id") 158 | CSV DATA ( 159 | 'http://localhost:3000/person.csv' 160 | ) 161 | WITH skip='1', nullif = '', allow_quoted_null; 162 | 163 | IMPORT INTO "event" ("id") 164 | CSV DATA ( 165 | 'http://localhost:3000/event.csv' 166 | ) 167 | WITH skip='1', nullif = '', allow_quoted_null; 168 | 169 | IMPORT INTO "person_type" ("id", "name") 170 | CSV DATA ( 171 | 'http://localhost:3000/person_type.csv' 172 | ) 173 | WITH skip='1', nullif = '', allow_quoted_null; 174 | 175 | IMPORT INTO "person_event" ("person_id", "event_id", "id", "person_type") 176 | CSV DATA ( 177 | 'http://localhost:3000/person_event.csv' 178 | ) 179 | WITH skip='1', nullif = '', allow_quoted_null; 180 | ``` 181 | 182 | ##### Import via psql 183 | 184 | If you're working with a remote database and have access to the `psql` binary, try importing the CSV file as follows: 185 | 186 | ```sh 187 | psql "postgres://root@localhost:26257/defaultdb?sslmode=disable" \ 188 | -c "\COPY public.person (id, full_name, date_of_birth, user_type, favourite_animal) FROM './csvs/person/person.csv' WITH DELIMITER ',' CSV HEADER NULL E''" 189 | ``` 190 | 191 | ##### Import via nodelocal 192 | 193 | If you're working with a remote database and have access to the `cockroach` binary, try importing the CSV file as follows: 194 | 195 | ```sh 196 | cockroach nodelocal upload ./csvs/person/person.csv imports/person.csv \ 197 | --url "postgres://root@localhost:26257?sslmode=disable" 198 | ``` 199 | 200 | Then importing the file as follows: 201 | 202 | ```sql 203 | IMPORT INTO person ("id", "full_name", "date_of_birth", "user_type", "favourite_animal") 204 | CSV DATA ( 205 | 'nodelocal://1/imports/person.csv' 206 | ) WITH skip = '1'; 207 | ``` 208 | 209 | ### Tables 210 | 211 | Table elements instruct dg to generate data for a single table and output it as a csv file. Here are the configuration options for a table: 212 | 213 | ```yaml 214 | tables: 215 | - name: person 216 | unique_columns: [col_a, col_b] 217 | count: 10 218 | columns: ... 219 | ``` 220 | 221 | This config generates 10 random rows for the person table. Here's a breakdown of the fields: 222 | 223 | | Field Name | Optional | Description | 224 | | -------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------- | 225 | | name | No | Name of the table. Must be unique. | 226 | | unique_columns | Yes | Removes duplicates from the table based on the column names provided | 227 | | count | Yes | If provided, will determine the number of rows created. If not provided, will be calculated by the current table size. | 228 | | suppress | Yes | If `true` the table won't be written to a CSV. Useful when you need to generate intermediate tables to combine data locally. | 229 | | columns | No | A collection of columns to generate for the table. | 230 | 231 | #### Processors 232 | 233 | dg takes its configuration from a config file that is parsed in the form of an object containing arrays of objects; `tables` and `inputs`. Each object in the `tables` array represents a CSV file to be generated for a named table and contains a collection of columns to generate data for. 234 | 235 | ##### gen 236 | 237 | Generate a random value for the column. Here's an example: 238 | 239 | ```yaml 240 | - name: sku 241 | type: gen 242 | processor: 243 | value: SKU${uint16} 244 | format: "%05d" 245 | ``` 246 | 247 | This configuration will generate a random left-padded `uint16` with a prefix of "SKU" for a column called "sku". `value` contains zero or more function placeholders that can be used to generate data. A list of available functions can be found [here](https://github.com/codingconcepts/dg#functions). 248 | 249 | Generate a pattern-based value for the column. Here's an example: 250 | 251 | ```yaml 252 | - name: phone 253 | type: gen 254 | processor: 255 | pattern: \d{3}-\d{3}-\d{4} 256 | ``` 257 | 258 | This configuration will generate US-format phone number, like 123-456-7890. 259 | 260 | ##### const 261 | 262 | Provide a constant set of values for a column. Here's an example: 263 | 264 | ```yaml 265 | - name: options 266 | type: const 267 | processor: 268 | values: [bed_breakfast, bed] 269 | ``` 270 | 271 | This configuration will create a column containing two rows. 272 | 273 | ##### set 274 | 275 | Select a value from a given set. Here's an example: 276 | 277 | ```yaml 278 | - name: user_type 279 | type: set 280 | processor: 281 | values: [admin, regular, read-only] 282 | ``` 283 | 284 | This configuration will select between the values "admin", "regular", and "read-only"; each with an equal probability of being selected. 285 | 286 | Items in a set can also be given a weight, which will affect their likelihood of being selected. Here's an example: 287 | 288 | ```yaml 289 | - name: favourite_animal 290 | type: set 291 | processor: 292 | values: [rabbit, dog, cat] 293 | weights: [10, 60, 30] 294 | ``` 295 | 296 | This configuration will select between the values "rabbit", "dog", and "cat"; each with different probabilities of being selected. Rabbits will be selected approximately 10% of the time, dogs 60%, and cats 30%. The total value doesn't have to be 100, however, you can use whichever numbers make most sense to you. 297 | 298 | ##### inc 299 | 300 | Generates an incrementing number. Here's an example: 301 | 302 | ```yaml 303 | - name: id 304 | type: inc 305 | processor: 306 | start: 1 307 | format: "P%03d" 308 | ``` 309 | 310 | This configuration will generate left-padded ids starting from 1, and format them with a prefix of "P". 311 | 312 | ##### ref 313 | 314 | References a value from a previously generated table. Here's an example: 315 | 316 | ```yaml 317 | - name: ptype 318 | type: ref 319 | processor: 320 | table: person_type 321 | column: id 322 | ``` 323 | 324 | This configuration will choose a random id from the person_type table and create a `ptype` column to store the values. 325 | 326 | Use the `ref` type if you need to reference another table but don't need to generate a new row for _every_ instance of the referenced column. 327 | 328 | ##### each 329 | 330 | Creates a row for each value in another table. If multiple `each` columns are provided, a Cartesian product of both columns will be generated. 331 | 332 | Here's an example of one `each` column: 333 | 334 | ```yaml 335 | - name: person 336 | count: 3 337 | columns: 338 | - name: id 339 | type: gen 340 | processor: 341 | value: ${uuid} 342 | 343 | # person 344 | # 345 | # id 346 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 347 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 348 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 349 | 350 | - name: pet 351 | columns: 352 | - name: person_id 353 | type: each 354 | processor: 355 | table: person 356 | column: id 357 | - name: name 358 | type: gen 359 | processor: 360 | value: first_name 361 | # pet 362 | # 363 | # person_id name 364 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 Carlo 365 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea Armando 366 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 Kailey 367 | ``` 368 | 369 | Here's an example of two `each` columns: 370 | 371 | ```yaml 372 | - name: person 373 | count: 3 374 | columns: 375 | - name: id 376 | type: gen 377 | processor: 378 | value: ${uuid} 379 | 380 | # person 381 | # 382 | # id 383 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 384 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 385 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 386 | 387 | - name: event 388 | count: 3 389 | columns: 390 | - name: id 391 | type: gen 392 | processor: 393 | value: ${uuid} 394 | 395 | # event 396 | # 397 | # id 398 | # 39faeb54-67d1-46db-a38b-825b41bfe919 399 | # 7be981a9-679b-432a-8a0f-4a0267170c68 400 | # 9954f321-8040-4cd7-96e6-248d03ee9266 401 | 402 | - name: person_event 403 | columns: 404 | - name: person_id 405 | type: each 406 | processor: 407 | table: person 408 | column: id 409 | - name: event_id 410 | type: each 411 | processor: 412 | table: event 413 | column: id 414 | # person_event 415 | # 416 | # person_id 417 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 39faeb54-67d1-46db-a38b-825b41bfe919 418 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 7be981a9-679b-432a-8a0f-4a0267170c68 419 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 9954f321-8040-4cd7-96e6-248d03ee9266 420 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 39faeb54-67d1-46db-a38b-825b41bfe919 421 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 7be981a9-679b-432a-8a0f-4a0267170c68 422 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 9954f321-8040-4cd7-96e6-248d03ee9266 423 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 39faeb54-67d1-46db-a38b-825b41bfe919 424 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 7be981a9-679b-432a-8a0f-4a0267170c68 425 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 9954f321-8040-4cd7-96e6-248d03ee9266 426 | ``` 427 | 428 | Use the `each` type if you need to reference another table and need to generate a new row for _every_ instance of the referenced column. 429 | 430 | ##### range 431 | 432 | Generates data within a given range. Note that a number of factors determine how this generator will behave. The step (and hence, number of rows) will be generated in the following priority order: 433 | 434 | 1. If an `each` generator is being used, step will be derived from that 435 | 1. If a `count` is provided, step will be derived from that 436 | 1. Otherwise, `step` will be used 437 | 438 | Here's an example that generates monotonically increasing ids for a table, starting from 1: 439 | 440 | ```yaml 441 | - name: users 442 | count: 10000 443 | columns: 444 | - name: id 445 | type: range 446 | processor: 447 | type: int 448 | from: 1 449 | step: 1 450 | ``` 451 | 452 | Here's an example that generates all dates between `2020-01-01` and `2023-01-01` at daily intervals: 453 | 454 | ```yaml 455 | - name: event 456 | columns: 457 | - name: date 458 | type: range 459 | processor: 460 | type: date 461 | from: 2020-01-01 462 | to: 2023-01-01 463 | step: 24h 464 | format: 2006-01-02 465 | ``` 466 | 467 | Here's an example that generates 10 dates between `2020-01-01` and `2023-01-02`: 468 | 469 | ```yaml 470 | - name: event 471 | count: 10 472 | columns: 473 | - name: date 474 | type: range 475 | processor: 476 | type: date 477 | from: 2020-01-01 478 | to: 2023-01-01 479 | format: 2006-01-02 480 | step: 24h # Ignored due to table count. 481 | ``` 482 | 483 | Here's an example that generates 20 dates (one for every row found from an `each` generator) between `2020-01-01` and `2023-01-02`: 484 | 485 | ```yaml 486 | - name: person 487 | count: 20 488 | columns: 489 | - name: id 490 | type: gen 491 | processor: 492 | value: ${uuid} 493 | 494 | - name: event 495 | count: 10 # Ignored due to resulting count from "each" generator. 496 | columns: 497 | - name: person_id 498 | type: each 499 | processor: 500 | table: person 501 | column: id 502 | 503 | - name: date 504 | type: range 505 | processor: 506 | type: date 507 | from: 2020-01-01 508 | to: 2023-01-01 509 | format: 2006-01-02 510 | ``` 511 | 512 | The range generate currently supports the following data types: 513 | 514 | - `date` - Generate dates between a from and to value 515 | - `int` - Generate integers between a from and to value 516 | 517 | ##### match 518 | 519 | Generates data by matching data in another table. In this example, we'll assume there's a CSV file for the `significant_event` input that generates the following table: 520 | 521 | | date | event | 522 | | ---------- | ----- | 523 | | 2023-01-10 | abc | 524 | | 2023-01-11 | | 525 | | 2023-01-12 | def | 526 | 527 | ```yaml 528 | inputs: 529 | - name: significant_event 530 | type: csv 531 | source: 532 | file_name: significant_dates.csv 533 | 534 | tables: 535 | - name: events 536 | columns: 537 | - name: timeline_date 538 | type: range 539 | processor: 540 | type: date 541 | from: 2023-01-09 542 | to: 2023-01-13 543 | format: 2006-01-02 544 | step: 24h 545 | - name: timeline_event 546 | type: match 547 | processor: 548 | source_table: significant_event 549 | source_column: date 550 | source_value: events 551 | match_column: timeline_date 552 | ``` 553 | 554 | dg will match rows in the significant_event table with rows in the events table based on the match between `significant_event.date` and `events.timeline_date`, and take the value from the `significant_events.event` column where there's a match (otherwise leaving `NULL`). This will result in the following `events` table being generated: 555 | 556 | | timeline_date | timeline_event | 557 | | ------------- | -------------- | 558 | | 2023-01-09 | | 559 | | 2023-01-10 | abc | 560 | | 2023-01-11 | | 561 | | 2023-01-12 | def | 562 | | 2023-01-13 | | 563 | 564 | ### Inputs 565 | 566 | dg takes its configuration from a config file that is parsed in the form of an object containing arrays of objects; `tables` and `inputs`. Each object in the `inputs` array represents a data source from which a table can be created. Tables created via inputs will not result in output CSVs. 567 | 568 | ##### csv 569 | 570 | Reads in a CSV file as a table that can be referenced from other tables. Here's an example: 571 | 572 | ```yaml 573 | - name: significant_event 574 | type: csv 575 | source: 576 | file_name: significant_dates.csv 577 | ``` 578 | 579 | This configuration will read from a file called significant_dates.csv and create a table from its contents. Note that the `file_name` should be relative to the config directory, so if your CSV file is in the same directory as your config file, just include the file name. 580 | 581 | ### Functions 582 | 583 | | Name | Type | Example | 584 | | ------------------------------ | --------- | --------------------------------------------------------------------------------------------------------- | 585 | | ${ach_account} | string | 586981797546 | 586 | | ${ach_routing} | string | 441478502 | 587 | | ${adjective_demonstrative} | string | there | 588 | | ${adjective_descriptive} | string | eager | 589 | | ${adjective_indefinite} | string | several | 590 | | ${adjective_interrogative} | string | whose | 591 | | ${adjective_possessive} | string | her | 592 | | ${adjective_proper} | string | Iraqi | 593 | | ${adjective_quantitative} | string | sufficient | 594 | | ${adjective} | string | double | 595 | | ${adverb_degree} | string | far | 596 | | ${adverb_frequency_definite} | string | daily | 597 | | ${adverb_frequency_indefinite} | string | always | 598 | | ${adverb_manner} | string | unexpectedly | 599 | | ${adverb_place} | string | here | 600 | | ${adverb_time_definite} | string | yesterday | 601 | | ${adverb_time_indefinite} | string | just | 602 | | ${adverb} | string | far | 603 | | ${animal_type} | string | mammals | 604 | | ${animal} | string | ape | 605 | | ${app_author} | string | RedLaser | 606 | | ${app_name} | string | SlateBlueweek | 607 | | ${app_version} | string | 3.2.10 | 608 | | ${bitcoin_address} | string | 16YmZ5ol5aXKjilZT2c2nIeHpbq | 609 | | ${bitcoin_private_key} | string | 5JzwyfrpHRoiA59Y1Pd9yLq52cQrAXxSNK4QrGrRUxkak5Howhe | 610 | | ${bool} | bool | true | 611 | | ${breakfast} | string | Awesome orange chocolate muffins | 612 | | ${bs} | string | leading-edge | 613 | | ${car_fuel_type} | string | LPG | 614 | | ${car_maker} | string | Seat | 615 | | ${car_model} | string | Camry Solara Convertible | 616 | | ${car_transmission_type} | string | Manual | 617 | | ${car_type} | string | Passenger car mini | 618 | | ${chrome_user_agent} | string | Mozilla/5.0 (X11; Linux i686) AppleWebKit/5310 (KHTML, like Gecko) Chrome/37.0.882.0 Mobile Safari/5310 | 619 | | ${city} | string | Memphis | 620 | | ${color} | string | DarkBlue | 621 | | ${company_suffix} | string | LLC | 622 | | ${company} | string | PlanetEcosystems | 623 | | ${connective_casual} | string | an effect of | 624 | | ${connective_complaint} | string | i.e. | 625 | | ${connective_examplify} | string | for example | 626 | | ${connective_listing} | string | next | 627 | | ${connective_time} | string | soon | 628 | | ${connective} | string | for instance | 629 | | ${country_abr} | string | VU | 630 | | ${country} | string | Eswatini | 631 | | ${credit_card_cvv} | string | 315 | 632 | | ${credit_card_exp} | string | 06/28 | 633 | | ${credit_card_type} | string | Mastercard | 634 | | ${currency_long} | string | Mozambique Metical | 635 | | ${currency_short} | string | SCR | 636 | | ${date} | time.Time | 2005-01-25 22:17:55.371781952 +0000 UTC | 637 | | ${day} | int | 27 | 638 | | ${dessert} | string | Chocolate coconut dream bars | 639 | | ${dinner} | string | Creole potato salad | 640 | | ${domain_name} | string | centralb2c.net | 641 | | ${domain_suffix} | string | com | 642 | | ${email} | string | ethanlebsack@lynch.name | 643 | | ${emoji} | string | ♻️ | 644 | | ${file_extension} | string | csv | 645 | | ${file_mime_type} | string | image/vasa | 646 | | ${firefox_user_agent} | string | Mozilla/5.0 (X11; Linux x86_64; rv:6.0) Gecko/1951-07-21 Firefox/37.0 | 647 | | ${first_name} | string | Kailee | 648 | | ${flipacoin} | string | Tails | 649 | | ${float32} | float32 | 2.7906555e+38 | 650 | | ${float64} | float64 | 4.314310154193861e+307 | 651 | | ${fruit} | string | Eggplant | 652 | | ${gender} | string | female | 653 | | ${hexcolor} | string | #6daf06 | 654 | | ${hobby} | string | Bowling | 655 | | ${hour} | int | 18 | 656 | | ${http_method} | string | DELETE | 657 | | ${http_status_code_simple} | int | 404 | 658 | | ${http_status_code} | int | 503 | 659 | | ${http_version} | string | HTTP/1.1 | 660 | | ${int16} | int16 | 18940 | 661 | | ${int32} | int32 | 2129368442 | 662 | | ${int64} | int64 | 5051946056392951363 | 663 | | ${int8} | int8 | 110 | 664 | | ${ipv4_address} | string | 191.131.155.85 | 665 | | ${ipv6_address} | string | 1642:94b:52d8:3a4e:38bc:4d87:846e:9c83 | 666 | | ${job_descriptor} | string | Senior | 667 | | ${job_level} | string | Identity | 668 | | ${job_title} | string | Executive | 669 | | ${language_abbreviation} | string | kn | 670 | | ${language} | string | Bengali | 671 | | ${last_name} | string | Friesen | 672 | | ${latitude} | float64 | 45.919913 | 673 | | ${longitude} | float64 | -110.313125 | 674 | | ${lunch} | string | Sweet and sour pork balls | 675 | | ${mac_address} | string | bd:e8:ce:66:da:5b | 676 | | ${minute} | int | 23 | 677 | | ${month_string} | string | April | 678 | | ${month} | int | 10 | 679 | | ${name_prefix} | string | Ms. | 680 | | ${name_suffix} | string | I | 681 | | ${name} | string | Paxton Schumm | 682 | | ${nanosecond} | int | 349669923 | 683 | | ${nicecolors} | []string | [#490a3d #bd1550 #e97f02 #f8ca00 #8a9b0f] | 684 | | ${noun_abstract} | string | timing | 685 | | ${noun_collective_animal} | string | brace | 686 | | ${noun_collective_people} | string | mob | 687 | | ${noun_collective_thing} | string | orchard | 688 | | ${noun_common} | string | problem | 689 | | ${noun_concrete} | string | town | 690 | | ${noun_countable} | string | cat | 691 | | ${noun_uncountable} | string | wisdom | 692 | | ${noun} | string | case | 693 | | ${opera_user_agent} | string | Opera/10.10 (Windows NT 5.01; en-US) Presto/2.11.165 Version/13.00 | 694 | | ${password} | string | 1k0vWN 9Z | 4f={B YPRda4ys. | 695 | | ${pet_name} | string | Bernadette | 696 | | ${phone_formatted} | string | (476)455-2253 | 697 | | ${phone} | string | 2692528685 | 698 | | ${phrase} | string | I'm straight | 699 | | ${preposition_compound} | string | ahead of | 700 | | ${preposition_double} | string | next to | 701 | | ${preposition_simple} | string | at | 702 | | ${preposition} | string | outside of | 703 | | ${programming_language} | string | PL/SQL | 704 | | ${pronoun_demonstrative} | string | those | 705 | | ${pronoun_interrogative} | string | whom | 706 | | ${pronoun_object} | string | us | 707 | | ${pronoun_personal} | string | I | 708 | | ${pronoun_possessive} | string | mine | 709 | | ${pronoun_reflective} | string | yourself | 710 | | ${pronoun_relative} | string | whom | 711 | | ${pronoun} | string | those | 712 | | ${quote} | string | "Raw denim tilde cronut mlkshk photo booth kickstarter." - Gunnar Rice | 713 | | ${rgbcolor} | []int | [152 74 172] | 714 | | ${safari_user_agent} | string | Mozilla/5.0 (Windows; U; Windows 95) AppleWebKit/536.41.5 (KHTML, like Gecko) Version/5.2 Safari/536.41.5 | 715 | | ${safecolor} | string | gray | 716 | | ${second} | int | 58 | 717 | | ${snack} | string | Crispy fried chicken spring rolls | 718 | | ${ssn} | string | 783135577 | 719 | | ${state_abr} | string | AL | 720 | | ${state} | string | Kentucky | 721 | | ${street_name} | string | Way | 722 | | ${street_number} | string | 6234 | 723 | | ${street_prefix} | string | Port | 724 | | ${street_suffix} | string | stad | 725 | | ${street} | string | 11083 Lake Fall mouth | 726 | | ${time_zone_abv} | string | ADT | 727 | | ${time_zone_full} | string | (UTC-02:00) Coordinated Universal Time-02 | 728 | | ${time_zone_offset} | float32 | 3 | 729 | | ${time_zone_region} | string | Asia/Aqtau | 730 | | ${time_zone} | string | Mountain Standard Time (Mexico) | 731 | | ${uint128_hex} | string | 0xcd50930d5bc0f2e8fa36205e3d7bd7b2 | 732 | | ${uint16_hex} | string | 0x7c80 | 733 | | ${uint16} | uint16 | 25076 | 734 | | ${uint256_hex} | string | 0x61334b8c51fa841bf9a3f1f0ac3750cd1b51ca2046b0fb75627ac73001f0c5aa | 735 | | ${uint32_hex} | string | 0xfe208664 | 736 | | ${uint32} | uint32 | 783098878 | 737 | | ${uint64_hex} | string | 0xc8b91dc44e631956 | 738 | | ${uint64} | uint64 | 5722659847801560283 | 739 | | ${uint8_hex} | string | 0x65 | 740 | | ${uint8} | uint8 | 192 | 741 | | ${url} | string | https://www.leadcutting-edge.net/productize | 742 | | ${user_agent} | string | Opera/10.64 (Windows NT 5.2; en-US) Presto/2.13.295 Version/10.00 | 743 | | ${username} | string | Gutmann2845 | 744 | | ${uuid} | string | e6e34ff4-1def-41e5-9afb-f697a51c0359 | 745 | | ${vegetable} | string | Tomato | 746 | | ${verb_action} | string | knit | 747 | | ${verb_helping} | string | did | 748 | | ${verb_linking} | string | has | 749 | | ${verb} | string | be | 750 | | ${weekday} | string | Tuesday | 751 | | ${word} | string | month | 752 | | ${year} | int | 1962 | 753 | | ${zip} | string | 45618 | 754 | 755 | ### Building releases locally 756 | 757 | ``` 758 | $ VERSION=0.1.0 make release 759 | ``` 760 | 761 | ### Thanks 762 | 763 | Thanks to the maintainers of the following fantastic packages, whose code this tools makes use of: 764 | 765 | - [samber/lo](https://github.com/samber/lo) 766 | - [brianvoe/gofakeit](https://github.com/brianvoe/gofakeit) 767 | - [go-yaml/yaml](https://github.com/go-yaml/yaml) 768 | - [stretchr/testify](github.com/stretchr/testify/assert) 769 | 770 | ### Todos 771 | 772 | - Improve code coverage 773 | - Write file after generating, then only keep columns that other tables need 774 | - Support for range without a table count (e.g. the following results in zero rows unless a count is provided) 775 | 776 | ```yaml 777 | - name: bet_types 778 | count: 3 779 | columns: 780 | - name: id 781 | type: range 782 | processor: 783 | type: int 784 | from: 1 785 | step: 1 786 | - name: description 787 | type: const 788 | processor: 789 | values: [Win, Lose, Draw] 790 | ``` 791 | -------------------------------------------------------------------------------- /assets/cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codingconcepts/dg/e0a02910aab9dabf73a19c087be2a730c5d96b1c/assets/cover.png -------------------------------------------------------------------------------- /assets/cover_grey.svg: -------------------------------------------------------------------------------- 1 | 2 | 61 | 62 | -------------------------------------------------------------------------------- /dg.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "os" 9 | "path" 10 | "runtime/pprof" 11 | "strings" 12 | "text/template" 13 | "time" 14 | 15 | "github.com/codingconcepts/dg/internal/pkg/generator" 16 | "github.com/codingconcepts/dg/internal/pkg/model" 17 | "github.com/codingconcepts/dg/internal/pkg/source" 18 | "github.com/codingconcepts/dg/internal/pkg/ui" 19 | "github.com/codingconcepts/dg/internal/pkg/web" 20 | "github.com/samber/lo" 21 | ) 22 | 23 | var ( 24 | version string 25 | ) 26 | 27 | func main() { 28 | log.SetFlags(0) 29 | 30 | configPath := flag.String("c", "", "the absolute or relative path to the config file") 31 | outputDir := flag.String("o", ".", "the absolute or relative path to the output dir") 32 | createImports := flag.String("i", "", "write import statements to file") 33 | cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file") 34 | versionFlag := flag.Bool("version", false, "display the current version number") 35 | port := flag.Int("p", 0, "port to serve files from (omit to generate without serving)") 36 | flag.Parse() 37 | 38 | if *cpuprofile != "" { 39 | defer launchProfiler(*cpuprofile)() 40 | } 41 | 42 | if *versionFlag { 43 | fmt.Println(version) 44 | return 45 | } 46 | 47 | if *configPath == "" { 48 | flag.Usage() 49 | os.Exit(2) 50 | } 51 | 52 | tt := ui.TimeTracker(os.Stdout, realClock{}, 40) 53 | defer tt(time.Now(), "done") 54 | 55 | c, err := loadConfig(*configPath, tt) 56 | if err != nil { 57 | log.Fatalf("error loading config: %v", err) 58 | } 59 | 60 | files := make(map[string]model.CSVFile) 61 | 62 | if err = loadInputs(c, path.Dir(*configPath), tt, files); err != nil { 63 | log.Fatalf("error loading inputs: %v", err) 64 | } 65 | 66 | if err = generateTables(c, tt, files); err != nil { 67 | log.Fatalf("error generating tables: %v", err) 68 | } 69 | 70 | if err = removeSuppressedColumns(c, tt, files); err != nil { 71 | log.Fatalf("error removing supressed columns: %v", err) 72 | } 73 | 74 | if err := writeFiles(*outputDir, files, tt); err != nil { 75 | log.Fatalf("error writing csv files: %v", err) 76 | } 77 | 78 | if *createImports != "" { 79 | if err := writeImports(*outputDir, *createImports, c, files, tt); err != nil { 80 | log.Fatalf("error writing import statements: %v", err) 81 | } 82 | } 83 | 84 | if *port == 0 { 85 | return 86 | } 87 | 88 | log.Fatal(web.Serve(*outputDir, *port)) 89 | } 90 | 91 | func loadConfig(filename string, tt ui.TimerFunc) (model.Config, error) { 92 | defer tt(time.Now(), "loaded config file") 93 | 94 | file, err := os.Open(filename) 95 | if err != nil { 96 | return model.Config{}, fmt.Errorf("opening file: %w", err) 97 | } 98 | defer file.Close() 99 | 100 | return model.LoadConfig(file) 101 | } 102 | 103 | func loadInputs(c model.Config, configDir string, tt ui.TimerFunc, files map[string]model.CSVFile) error { 104 | defer tt(time.Now(), "loaded data sources") 105 | 106 | for _, input := range c.Inputs { 107 | if err := loadInput(input, configDir, tt, files); err != nil { 108 | return fmt.Errorf("loading input for %q: %w", input.Name, err) 109 | } 110 | } 111 | 112 | return nil 113 | } 114 | 115 | func loadInput(input model.Input, configDir string, tt ui.TimerFunc, files map[string]model.CSVFile) error { 116 | defer tt(time.Now(), fmt.Sprintf("loaded data source: %s", input.Name)) 117 | 118 | switch input.Type { 119 | case "csv": 120 | var s model.SourceCSV 121 | if err := input.Source.UnmarshalFunc(&s); err != nil { 122 | return fmt.Errorf("parsing csv source for %s: %w", input.Name, err) 123 | } 124 | 125 | if err := source.LoadCSVSource(input.Name, configDir, s, files); err != nil { 126 | return fmt.Errorf("loading csv for %s: %w", input.Name, err) 127 | } 128 | } 129 | 130 | return nil 131 | } 132 | 133 | func generateTables(c model.Config, tt ui.TimerFunc, files map[string]model.CSVFile) error { 134 | defer tt(time.Now(), "generated all tables") 135 | 136 | for _, table := range c.Tables { 137 | if err := generateTable(table, files, tt); err != nil { 138 | return fmt.Errorf("generating csv file for %q: %w", table.Name, err) 139 | } 140 | } 141 | 142 | return nil 143 | } 144 | 145 | func generateTable(t model.Table, files map[string]model.CSVFile, tt ui.TimerFunc) error { 146 | defer tt(time.Now(), fmt.Sprintf("generated table: %s", t.Name)) 147 | 148 | // Create the Cartesian product of any each types first. 149 | var eg generator.EachGenerator 150 | if err := eg.Generate(t, files); err != nil { 151 | return fmt.Errorf("generating each columns: %w", err) 152 | } 153 | 154 | // Create any const columns next. 155 | var cg generator.ConstGenerator 156 | if err := cg.Generate(t, files); err != nil { 157 | return fmt.Errorf("generating const columns: %w", err) 158 | } 159 | 160 | for _, col := range t.Columns { 161 | switch col.Type { 162 | case "ref": 163 | var g generator.RefGenerator 164 | if err := col.Generator.UnmarshalFunc(&g); err != nil { 165 | return fmt.Errorf("parsing ref process for %s.%s: %w", t.Name, col.Name, err) 166 | } 167 | if err := g.Generate(t, col, files); err != nil { 168 | return fmt.Errorf("running ref process for %s.%s: %w", t.Name, col.Name, err) 169 | } 170 | 171 | case "gen": 172 | var g generator.GenGenerator 173 | if err := col.Generator.UnmarshalFunc(&g); err != nil { 174 | return fmt.Errorf("parsing each process for %s: %w", col.Name, err) 175 | } 176 | if err := g.Generate(t, col, files); err != nil { 177 | return fmt.Errorf("running gen process for %s.%s: %w", t.Name, col.Name, err) 178 | } 179 | 180 | case "set": 181 | var g generator.SetGenerator 182 | if err := col.Generator.UnmarshalFunc(&g); err != nil { 183 | return fmt.Errorf("parsing set process for %s.%s: %w", t.Name, col.Name, err) 184 | } 185 | if err := g.Generate(t, col, files); err != nil { 186 | return fmt.Errorf("running set process for %s.%s: %w", t.Name, col.Name, err) 187 | } 188 | 189 | // case "const": 190 | // var g generator.ConstGenerator 191 | // if err := col.Generator.UnmarshalFunc(&g); err != nil { 192 | // return fmt.Errorf("parsing const process for %s.%s: %w", t.Name, col.Name, err) 193 | // } 194 | // if err := g.Generate(t, col, files); err != nil { 195 | // return fmt.Errorf("running const process for %s.%s: %w", t.Name, col.Name, err) 196 | // } 197 | 198 | case "inc": 199 | var g generator.IncGenerator 200 | if err := col.Generator.UnmarshalFunc(&g); err != nil { 201 | return fmt.Errorf("parsing each process for %s: %w", col.Name, err) 202 | } 203 | if err := g.Generate(t, col, files); err != nil { 204 | return fmt.Errorf("running inc process for %s.%s: %w", t.Name, col.Name, err) 205 | } 206 | 207 | case "range": 208 | var g generator.RangeGenerator 209 | if err := col.Generator.UnmarshalFunc(&g); err != nil { 210 | return fmt.Errorf("parsing range process for %s: %w", col.Name, err) 211 | } 212 | if err := g.Generate(t, col, files); err != nil { 213 | return fmt.Errorf("running range process for %s.%s: %w", t.Name, col.Name, err) 214 | } 215 | 216 | case "match": 217 | var g generator.MatchGenerator 218 | if err := col.Generator.UnmarshalFunc(&g); err != nil { 219 | return fmt.Errorf("parsing match process for %s: %w", col.Name, err) 220 | } 221 | if err := g.Generate(t, col, files); err != nil { 222 | return fmt.Errorf("running match process for %s.%s: %w", t.Name, col.Name, err) 223 | } 224 | } 225 | } 226 | 227 | file, ok := files[t.Name] 228 | if !ok { 229 | return fmt.Errorf("missing table: %q", t.Name) 230 | } 231 | 232 | if len(file.UniqueColumns) > 0 { 233 | file.Lines = generator.Transpose(file.Lines) 234 | file.Lines = file.Unique() 235 | file.Lines = generator.Transpose(file.Lines) 236 | } 237 | files[t.Name] = file 238 | 239 | return nil 240 | } 241 | 242 | func removeSuppressedColumns(c model.Config, tt ui.TimerFunc, files map[string]model.CSVFile) error { 243 | defer tt(time.Now(), "removed suppressed columns") 244 | 245 | for _, table := range c.Tables { 246 | for _, column := range table.Columns { 247 | if !column.Suppress { 248 | continue 249 | } 250 | 251 | file, ok := files[table.Name] 252 | if !ok { 253 | return fmt.Errorf("missing table: %q", table.Name) 254 | } 255 | 256 | // Remove suppressed column from header. 257 | var headerIndex int 258 | file.Header = lo.Reject(file.Header, func(v string, i int) bool { 259 | if v == column.Name { 260 | headerIndex = i 261 | return true 262 | } 263 | return false 264 | }) 265 | 266 | // Remove suppressed column from lines. 267 | file.Lines = append(file.Lines[:headerIndex], file.Lines[headerIndex+1:]...) 268 | 269 | files[table.Name] = file 270 | } 271 | } 272 | 273 | return nil 274 | } 275 | 276 | func writeFiles(outputDir string, cfs map[string]model.CSVFile, tt ui.TimerFunc) error { 277 | defer tt(time.Now(), "wrote all csvs") 278 | 279 | if err := os.MkdirAll(outputDir, os.ModePerm); err != nil { 280 | return fmt.Errorf("creating output directory: %w", err) 281 | } 282 | 283 | for name, file := range cfs { 284 | if !file.Output { 285 | continue 286 | } 287 | 288 | if err := writeFile(outputDir, name, file, tt); err != nil { 289 | return fmt.Errorf("writing file %q: %w", file.Name, err) 290 | } 291 | } 292 | 293 | return nil 294 | } 295 | 296 | func writeFile(outputDir, name string, cf model.CSVFile, tt ui.TimerFunc) error { 297 | defer tt(time.Now(), fmt.Sprintf("wrote csv: %s", name)) 298 | 299 | fullPath := path.Join(outputDir, fmt.Sprintf("%s.csv", name)) 300 | file, err := os.Create(fullPath) 301 | if err != nil { 302 | return fmt.Errorf("creating csv file %q: %w", name, err) 303 | } 304 | defer file.Close() 305 | 306 | writer := csv.NewWriter(file) 307 | if err = writer.Write(cf.Header); err != nil { 308 | return fmt.Errorf("writing csv header for %q: %w", name, err) 309 | } 310 | 311 | cf.Lines = generator.Transpose(cf.Lines) 312 | 313 | if err = writer.WriteAll(cf.Lines); err != nil { 314 | return fmt.Errorf("writing csv lines for %q: %w", name, err) 315 | } 316 | 317 | writer.Flush() 318 | return nil 319 | } 320 | 321 | func writeImports(outputDir, name string, c model.Config, files map[string]model.CSVFile, tt ui.TimerFunc) error { 322 | defer tt(time.Now(), fmt.Sprintf("wrote imports: %s", name)) 323 | 324 | importTmpl := template.Must(template.New("import"). 325 | Funcs(template.FuncMap{"join": strings.Join}). 326 | Parse(`IMPORT INTO {{.Name}} ( 327 | {{ join .Header ", " }} 328 | ) 329 | CSV DATA ( 330 | '.../{{.Name}}.csv' 331 | ) 332 | WITH skip='1', nullif = '', allow_quoted_null; 333 | 334 | `), 335 | ) 336 | 337 | fullPath := path.Join(outputDir, name) 338 | file, err := os.Create(fullPath) 339 | if err != nil { 340 | return fmt.Errorf("creating csv file %q: %w", name, err) 341 | } 342 | defer file.Close() 343 | 344 | // Iterate through the tables in the config file, so the imports are in the right order. 345 | for _, table := range c.Tables { 346 | csv := files[table.Name] 347 | if !csv.Output { 348 | continue 349 | } 350 | 351 | if err := importTmpl.Execute(file, csv); err != nil { 352 | return fmt.Errorf("writing import statement for %q: %w", name, err) 353 | } 354 | } 355 | 356 | return nil 357 | } 358 | 359 | func launchProfiler(cpuprofile string) func() { 360 | f, err := os.Create(cpuprofile) 361 | if err != nil { 362 | log.Fatalf("creating file for profiler: %v", err) 363 | } 364 | pprof.StartCPUProfile(f) 365 | 366 | return func() { 367 | pprof.StopCPUProfile() 368 | } 369 | } 370 | 371 | type realClock struct{} 372 | 373 | func (realClock) Now() time.Time { 374 | return time.Now() 375 | } 376 | 377 | func (realClock) Since(t time.Time) time.Duration { 378 | return time.Since(t) 379 | } 380 | -------------------------------------------------------------------------------- /examples/const_test/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | - name: one 3 | columns: 4 | - name: c1 5 | type: const 6 | processor: 7 | values: [a, b, c] 8 | 9 | - name: two 10 | columns: 11 | - name: c1 12 | type: const 13 | processor: 14 | values: [a, b, c, d, e] 15 | 16 | - name: c2 17 | type: const 18 | processor: 19 | values: [a, b] 20 | 21 | - name: three 22 | columns: 23 | - name: c1 24 | type: const 25 | processor: 26 | values: [a, b] 27 | 28 | - name: c2 29 | type: const 30 | processor: 31 | values: [a, b, c, d, e] -------------------------------------------------------------------------------- /examples/each_match_test/config.yaml: -------------------------------------------------------------------------------- 1 | inputs: 2 | 3 | - name: market 4 | type: csv 5 | source: 6 | file_name: market.csv 7 | 8 | tables: 9 | 10 | - name: product 11 | count: 1 12 | unique_columns: [name] 13 | columns: 14 | - name: id 15 | type: gen 16 | processor: 17 | value: ${uuid} 18 | - name: name 19 | type: gen 20 | processor: 21 | value: ${adjective} ${adverb} ${noun} 22 | 23 | - name: market_product 24 | columns: 25 | - name: id 26 | type: gen 27 | processor: 28 | value: ${uuid} 29 | - name: product_id 30 | type: each 31 | processor: 32 | table: product 33 | column: id 34 | - name: market 35 | type: each 36 | processor: 37 | table: market 38 | column: code 39 | - name: region 40 | type: match 41 | processor: 42 | source_table: market 43 | source_column: code 44 | source_value: region 45 | match_column: market 46 | -------------------------------------------------------------------------------- /examples/each_match_test/market.csv: -------------------------------------------------------------------------------- 1 | code,region 2 | us,us-east-1 3 | uk,eu-west-1 4 | -------------------------------------------------------------------------------- /examples/input_test/config.yaml: -------------------------------------------------------------------------------- 1 | inputs: 2 | - name: significant_event 3 | type: csv 4 | source: 5 | file_name: significant_dates.csv 6 | 7 | tables: 8 | - name: events 9 | columns: 10 | - name: timeline_date 11 | type: range 12 | processor: 13 | type: date 14 | from: 1885-01-01 15 | to: 1985-10-26 16 | format: 2006-01-02 17 | step: 24h 18 | - name: timeline_event 19 | type: match 20 | processor: 21 | source_table: significant_event 22 | source_column: date 23 | source_value: events 24 | match_column: timeline_date -------------------------------------------------------------------------------- /examples/input_test/significant_dates.csv: -------------------------------------------------------------------------------- 1 | date,events 2 | 1885-01-01,"Buford Tannen presumably hires an unknown Blacksmith to shoe his horse, but does not pay him." 3 | 1885-07-04,"Hill Valley's Fourth of July Celebrations held in the Palace Saloon, and Chester, the bartender gives additional drinks such as Whiskey to the locals." 4 | 1885-08-29,"There is a meeting of the townspeople over who will pick up the new schoolteacher on September 4. No one volunteers." 5 | 1885-09-02,"Native American Horseback-archers are chased near Hill Valley by the United States Cavalry." 6 | 1885-09-03,"Stinky Lomax is hanged in Haysville, California, for an unknown crime and Marshall Strickland is a witness of the hanging." 7 | 1885-09-04,"A train pulls into Hill Valley Station, carrying the new clock for the Hill Valley Courthouse[10] - and Clara Clayton (who had been commissioned to be the new teacher at the schoolhouse near Carson Spur, outside Hill Valley) aboard as a passenger. With no one to meet Clara upon her arrival, she rents two horses and a wagon buckboard from Joe Statler, then sets out to find the schoolhouse on her own.\n\nA snake spooks the horses pulling Clara's wagon. They then ride madly into Shonash Ravine, killing her instantly. The ravine where she dies is renamed Clayton Ravine in her memory." 8 | 1885-09-05,"The Hill Valley Festival. As part of Hill Valley’s anniversary festivities, the new courthouse clock is started at exactly 8:00 p.m. PST. Photographs with the clock are offered to townspeople wishing to pose with the new timepiece. The clock will remain in faithful service to Hill Valley for the next seven decades." 9 | 1885-09-06,"Buford Tannen and his gang, rob the Pine City Stage" 10 | 1885-09-08,"Hill Valley holds a funeral for Clara Clayton." 11 | 1885-12-01,"Shonash Ravine is renamed Clayton Ravine." 12 | 1917-04-06,"The United States declares war on Germany. Due to hostility against German-Americans during World War I, Doc's father changes his family's name from Von Braun to Brown." 13 | 1930-05-31,"Clint Eastwood, Jr., American film actor, director, producer, and composer (and Marty's alter ego in 1885 Timeline 8) is born." 14 | 1937-03-27,"Biff Tannen is born." 15 | 1938-04-01,"George McFly is born to Arthur and Sylvia McFly." 16 | 1942-11-19,"Calvin Richard Klein, American fashion designer (and Marty's alter ego in 1955 Timeline 2) is born." 17 | 1955-11-05,"Doc Brown slips off his toilet whilst hanging a clock and has a vision of the flux capacitor.\n\nBiff Tannen and his gang pick on George McFly at Lou's Café, warning George to never go into the café again. The café's busboy, Goldie Wilson, gives George advice on standing up for himself, but George ignores it.\n\nWhile 'birdwatching', i.e. spying on a girl (Lorraine Baines) changing through her window, George McFly slips and falls out of a tree into the street right in front of Sam Baines's car. After hitting him with the car, Baines takes him in, and his daughter Lorraine takes care of George. The two fall in love." 18 | 1955-11-07,"On the first day of school after George was hit by Sam Baines's car, Lorraine is infatuated with George as a result of the Florence Nightingale effect. With his attentions focused on Lorraine, George stops writing science fiction stories and having missed Science Fiction Theatre on the 5th (due to his injury), apparently has no problem missing the show scheduled for Saturday the 12th - the night of the Enchantment Under the Sea dance. George then asks Lorraine to the dance, and she accepts. Biff, meanwhile, apparently leaves them alone for awhile." 19 | 1955-11-08,"Biff and his gang stop by Lou's Cafe to make sure that George McFly isn't there, then drive off, perhaps commenting on a manure truck that is parked nearby." 20 | 1955-11-12,"Doc continues to think about the vision he had had a week earlier about the flux capacitor. If Doc is conducting an experiment, it is probably in his home rather than in Courthouse Square.\n\nLorraine, accompanied by her friend Babs, goes to Ruth's Frock Shop in downtown Hill Valley to pick up a dress she had made for the dance later that night. There is nobody else present.\n\nThe Enchantment Under the Sea dance takes place at the Hill Valley High School gym. Marvin Berry and the Starlighters play a set of music, taking a break at 9 o'clock.\n\nBiff and his gang spend the evening drinking, but avoid going to the dance.\n\nDuring Marvin Berry and the Starlighters' second (and final for the night) set, George and Lorraine share their first kiss on the dance floor. It is at that moment that Lorraine realizes that she is going to spend the rest of her life with George.\n\n10:04 p.m.: The Hill Valley Courthouse is struck by lightning, stopping its clock after 70 years, 2 months, 7 days, 2 hours and 4 minutes of continuous ticking. Despite repeated attempts over the intervening years, the clock is never successfully repaired; in fact, most Hill Valley residents voice a desire for its preservation as is to memorialize the unusual occurrence." 21 | 1956-06-20,"George McFly and Lorraine Baines McFly graduate from Hill Valley High School." 22 | 1958-03-31,"Chuck Berry releases the single Johnny B. Goode." 23 | 1958-03-27,"Biff Tannen turns 21, the legal gambling age." 24 | 1958-12-07,"George McFly and Lorraine Baines get married." 25 | 1962-08-01,"Doc Brown's mansion is destroyed by fire, except for the detached garage (which Doc moves into)." 26 | 1967-01-18,"A Fistful of Dollars, starring Clint Eastwood, is released to US cinemas." 27 | 1968-06-09,"Marty McFly is born to George and Lorraine McFly." 28 | 1968-08-06,"Douglas J. Needles is born." 29 | 1968-10-29,"Jennifer Parker is born." 30 | 1983-05-22,"Ronald Reagan announces he will run for a second term in office. Although a newspaper headline, seen in Timeline 2, mentions that Doc Brown is commended and wins an award, it is unknown whether this happened in the original timeline (because in 1985, Dr. Brown is described by Stanford S. Strickland as 'a real nutcase')." 31 | 1985-10-12,"Libyan terrorist steal plutonium from the local power plant so Doc can build them a nuclear weapon.[22] It is dismissed by the FBI as a 'simple clerical error'." 32 | 1985-10-25,"8:18 a.m. Opening titles of 'Back to the Future': all the clocks in Doc's lab are set 25 minutes slow, displayed incorrectly as 7:53 a.m.\n\n8:25 a.m. Doc asks Marty to meet him at Twin Pines Mall at 1:15 a.m. next morning.\n\nMarty arrives to school late then fails his audition with his band, The Pinheads, later after school.\n\nMarty admires a black Toyota Hilux 4X4 on a flatbed truck at the Texaco gas station.\n\nA woman asks Marty for a donation to preserve the non-functioning clock tower, handing him a flyer containing specific date and time details about the lightning strike to the clock. Marty puts the flyer in his pocket.\n\nBiff borrows George's car. He crashes it while drinking and driving (at the same time!), and the two get into an argument about whether or not George knew about a blind spot.\n\nLorraine's brother Joey fails to make parole again." 33 | 1985-10-26,"12:28 a.m. Doc calls Marty to stop by at his garage to pick up his JVC camcorder.\n\n1:16 a.m. Marty arrives at Twin Pines Mall.\n\n1:18 a.m. Doc demonstrates Temporal Experiment #1.\n\n1:20 a.m. Temporal Experiment #1 proves to be a success. Einstein becomes the world's first time traveler by departing one minute into the future. (This time jump does not create a new timeline; timelines are only created by going back in time. Einstein merely moves one minute forward on Timeline 1. The Doc states this is the case with timelines in the blackboard scene in Part II.)\n\n1:21 a.m. Einstein arrives at one minute into the future.\n\n1:33 a.m. Emmett Brown is shot dead by Libyan terrorists.\n\n1:35 a.m. Marty flees from the Libyans in the DeLorean and accidentally departs 1985 for 1955 by accelerating to 88 m.p.h. His arrival there brings Timeline 2 into existence, and Timeline 1 fades from existence." -------------------------------------------------------------------------------- /examples/many_to_many/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | # Create data for a person table. 3 | - name: person 4 | count: 10000 5 | columns: 6 | - name: id 7 | type: gen 8 | processor: 9 | value: ${uuid} 10 | 11 | # Create data for an event table. 12 | - name: event 13 | count: 100 14 | columns: 15 | - name: id 16 | type: gen 17 | processor: 18 | value: ${uuid} 19 | 20 | # Create data for a person_type table, with 5 rows. 21 | - name: person_type 22 | count: 5 23 | columns: 24 | - name: id 25 | type: gen 26 | processor: 27 | value: ${uuid} 28 | 29 | - name: name 30 | type: gen 31 | processor: 32 | value: TYPE-${uint16} 33 | format: "%05d" 34 | 35 | # Create data for a person_event many-to-many resolver table between 36 | # the person and event tables. 37 | - name: person_event 38 | columns: 39 | - name: person_id 40 | type: each 41 | processor: 42 | table: person 43 | column: id 44 | 45 | - name: event_id 46 | type: each 47 | processor: 48 | table: event 49 | column: id -------------------------------------------------------------------------------- /examples/many_to_many/create.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE "person" ( 2 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid() 3 | ); 4 | 5 | CREATE TABLE "event" ( 6 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid() 7 | ); 8 | 9 | CREATE TABLE "person_type" ( 10 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid(), 11 | "name" STRING NOT NULL 12 | ); 13 | 14 | CREATE TABLE "person_event" ( 15 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid(), 16 | "person_type" UUID NOT NULL REFERENCES "person_type"("id"), 17 | "person_id" UUID NOT NULL REFERENCES "person"("id"), 18 | "event_id" UUID NOT NULL REFERENCES "event"("id") 19 | ); -------------------------------------------------------------------------------- /examples/many_to_many/insert.sql: -------------------------------------------------------------------------------- 1 | IMPORT INTO "person" ( 2 | "id" 3 | ) 4 | CSV DATA ( 5 | 'http://localhost:3000/person.csv' 6 | ) 7 | WITH 8 | skip='1', 9 | nullif = '', 10 | allow_quoted_null; 11 | 12 | IMPORT INTO "event" ( 13 | "id" 14 | ) 15 | CSV DATA ( 16 | 'http://localhost:3000/event.csv' 17 | ) 18 | WITH 19 | skip='1', 20 | nullif = '', 21 | allow_quoted_null; 22 | 23 | IMPORT INTO "person_type" ( 24 | "id", 25 | "name" 26 | ) 27 | CSV DATA ( 28 | 'http://localhost:3000/person_type.csv' 29 | ) 30 | WITH 31 | skip='1', 32 | nullif = '', 33 | allow_quoted_null; 34 | 35 | IMPORT INTO "person_event" ( 36 | "person_id", 37 | "event_id", 38 | "id", 39 | "person_type" 40 | ) 41 | CSV DATA ( 42 | 'http://localhost:3000/person_event.csv' 43 | ) 44 | WITH 45 | skip='1', 46 | nullif = '', 47 | allow_quoted_null; -------------------------------------------------------------------------------- /examples/match_test/config.yaml: -------------------------------------------------------------------------------- 1 | inputs: 2 | 3 | - name: market 4 | type: csv 5 | source: 6 | file_name: market.csv 7 | 8 | tables: 9 | 10 | - name: market_product 11 | count: 10 12 | columns: 13 | - name: id 14 | type: gen 15 | processor: 16 | value: ${uuid} 17 | - name: market 18 | type: set 19 | processor: 20 | values: ["us", "in"] 21 | - name: region 22 | type: match 23 | processor: 24 | source_table: market 25 | source_column: code 26 | source_value: region 27 | match_column: market 28 | -------------------------------------------------------------------------------- /examples/match_test/market.csv: -------------------------------------------------------------------------------- 1 | code,region 2 | us,us-east-1 3 | in,ap-south-1 -------------------------------------------------------------------------------- /examples/pattern_test/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | # Generate data for a person table using the pattern generator. 3 | - name: person 4 | count: 1000 5 | columns: 6 | - name: id 7 | type: gen 8 | processor: 9 | value: ${uuid} 10 | - name: mobile 11 | type: gen 12 | processor: 13 | pattern: \d{3}-\d{3}-\d{4} -------------------------------------------------------------------------------- /examples/person/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | # Generate data for a person table, showing off a couple of column generators. 3 | - name: person 4 | count: 100000 5 | columns: 6 | - name: id 7 | type: gen 8 | processor: 9 | value: ${uuid} 10 | - name: full_name 11 | type: gen 12 | processor: 13 | value: ${name_prefix} ${first_name} ${last_name} 14 | - name: date_of_birth 15 | type: gen 16 | processor: 17 | value: ${date} 18 | format: 2006-01-02 19 | - name: user_type 20 | type: set 21 | processor: 22 | values: [admin, regular, read-only] 23 | - name: favourite_animal 24 | type: set 25 | processor: 26 | values: [rabbit, dog, cat] 27 | weights: [10, 60, 30] -------------------------------------------------------------------------------- /examples/person/create.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE person_type AS ENUM ('admin', 'regular', 'read-only'); 2 | CREATE TYPE animal_type AS ENUM ('rabbit', 'dog', 'cat'); 3 | 4 | CREATE TABLE person ( 5 | "id" UUID PRIMARY KEY, 6 | "full_name" STRING NOT NULL, 7 | "date_of_birth" DATE NOT NULL, 8 | "user_type" person_type NOT NULL, 9 | "favourite_animal" animal_type NOT NULL 10 | ); -------------------------------------------------------------------------------- /examples/person/insert.sql: -------------------------------------------------------------------------------- 1 | IMPORT INTO "person"( 2 | "id", 3 | "full_name", 4 | "date_of_birth", 5 | "user_type", 6 | "favourite_animal" 7 | ) 8 | CSV DATA ( 9 | 'http://localhost:3000/person.csv' 10 | ) 11 | WITH 12 | skip='1', 13 | nullif = '', 14 | allow_quoted_null; -------------------------------------------------------------------------------- /examples/range_test/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | - name: auto_incrementing_id 3 | count: 20 4 | columns: 5 | - name: id 6 | type: range 7 | processor: 8 | type: int 9 | from: 1 10 | 11 | - name: bet_types 12 | columns: 13 | - name: id 14 | type: range 15 | processor: 16 | type: int 17 | from: 1 18 | step: 1 19 | - name: description 20 | type: const 21 | processor: 22 | values: [Win, Lose, Draw] 23 | 24 | - name: previous_table 25 | count: 20 26 | columns: 27 | - name: id 28 | type: gen 29 | processor: 30 | value: ${uuid} 31 | 32 | - name: each_test 33 | count: 10 # Ignored due to count from "each" generator. 34 | columns: 35 | - name: id 36 | type: each 37 | processor: 38 | table: previous_table 39 | column: id 40 | - name: should_have_20 41 | type: range 42 | processor: 43 | type: date 44 | from: 2020-01-01 45 | to: 2023-01-01 46 | format: 2006-01-02 47 | step: 1h # Ignored due to count from "each" generator. 48 | 49 | - name: count_test 50 | count: 10 51 | columns: 52 | - name: should_have_10 53 | type: range 54 | processor: 55 | type: date 56 | from: 2020-01-01 57 | to: 2023-01-01 58 | format: 2006-01-02 59 | step: 1h # Ignored due to count value. 60 | 61 | - name: step_test 62 | columns: 63 | - name: should_have_37 64 | type: range 65 | processor: 66 | type: date 67 | from: 2020-01-01 68 | to: 2023-01-01 69 | format: 2006-01-02 70 | step: 730h # Ignored due to count value. -------------------------------------------------------------------------------- /examples/supress_column/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | 3 | - name: example 4 | count: 1 5 | columns: 6 | 7 | - name: one 8 | type: gen 9 | processor: 10 | value: ${uint8} 11 | 12 | - name: two 13 | suppress: true 14 | type: gen 15 | processor: 16 | value: ${uint64} -------------------------------------------------------------------------------- /examples/unique_test/config.yaml: -------------------------------------------------------------------------------- 1 | tables: 2 | - name: person 3 | unique_columns: [col_a, col_b] 4 | count: 10 5 | columns: 6 | 7 | - name: col_a 8 | type: set 9 | processor: 10 | values: [a, b, c] 11 | 12 | - name: col_b 13 | type: set 14 | processor: 15 | values: [d, e, f] 16 | 17 | - name: col_c 18 | type: set 19 | processor: 20 | values: [g, h, i] -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/codingconcepts/dg 2 | 3 | go 1.20 4 | 5 | require ( 6 | github.com/brianvoe/gofakeit/v6 v6.22.0 7 | github.com/samber/lo v1.38.1 8 | github.com/stretchr/testify v1.8.4 9 | gopkg.in/yaml.v3 v3.0.1 10 | ) 11 | 12 | require ( 13 | github.com/davecgh/go-spew v1.1.1 // indirect 14 | github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb // indirect 15 | github.com/pmezard/go-difflib v1.0.0 // indirect 16 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect 17 | ) 18 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/brianvoe/gofakeit/v6 v6.22.0 h1:BzOsDot1o3cufTfOk+fWKE9nFYojyDV+XHdCWL2+uyE= 2 | github.com/brianvoe/gofakeit/v6 v6.22.0/go.mod h1:Ow6qC71xtwm79anlwKRlWZW6zVq9D2XHE4QSSMP/rU8= 3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb h1:w1g9wNDIE/pHSTmAaUhv4TZQuPBS6GV3mMz5hkgziIU= 6 | github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb/go.mod h1:5ELEyG+X8f+meRWHuqUOewBOhvHkl7M76pdGEansxW4= 7 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 8 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 9 | github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM= 10 | github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= 11 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 12 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 13 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM= 14 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE= 15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 17 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 18 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 19 | -------------------------------------------------------------------------------- /internal/pkg/generator/array.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "github.com/samber/lo" 5 | ) 6 | 7 | // CartesianProduct returns the Cartesian product of a variable number of arrays. 8 | func CartesianProduct(a ...[]string) [][]string { 9 | if len(a) == 0 { 10 | return [][]string{} 11 | } 12 | 13 | totalCombinations := lo.Reduce(a, func(agg int, item []string, index int) int { 14 | return agg * len(item) 15 | }, 1) 16 | 17 | // Preallocate the result slice with the correct capacity. 18 | result := make([][]string, 0, totalCombinations) 19 | result = append(result, []string{}) 20 | 21 | // Generate the Cartesian products. 22 | for _, arr := range a { 23 | temp := make([][]string, 0, totalCombinations) 24 | for _, element := range arr { 25 | for _, combination := range result { 26 | // Create a new combination by appending the current element. 27 | newCombination := make([]string, len(combination)+1) 28 | copy(newCombination, combination) 29 | newCombination[len(combination)] = element 30 | temp = append(temp, newCombination) 31 | } 32 | } 33 | result = temp 34 | } 35 | 36 | return result 37 | } 38 | 39 | // Transpose a multi-dimensional array. 40 | func Transpose(m [][]string) [][]string { 41 | max := lo.MaxBy(m, func(a, b []string) bool { 42 | return len(a) > len(b) 43 | }) 44 | 45 | r := make([][]string, len(max)) 46 | 47 | for x := range r { 48 | r[x] = make([]string, len(m)) 49 | } 50 | 51 | for y, s := range m { 52 | for x, e := range s { 53 | r[x][y] = e 54 | } 55 | } 56 | return r 57 | } 58 | -------------------------------------------------------------------------------- /internal/pkg/generator/array_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestCartesianProduct(t *testing.T) { 11 | cases := []struct { 12 | name string 13 | input [][]string 14 | output [][]string 15 | }{ 16 | { 17 | name: "single input", 18 | input: [][]string{ 19 | {"a", "b", "c"}, 20 | }, 21 | output: [][]string{ 22 | {"a"}, {"b"}, {"c"}, 23 | }, 24 | }, 25 | { 26 | name: "multiple input", 27 | input: [][]string{ 28 | {"a", "b", "c"}, 29 | {"d", "e", "f"}, 30 | }, 31 | output: [][]string{ 32 | {"a", "d"}, 33 | {"b", "d"}, 34 | {"c", "d"}, 35 | {"a", "e"}, 36 | {"b", "e"}, 37 | {"c", "e"}, 38 | {"a", "f"}, 39 | {"b", "f"}, 40 | {"c", "f"}, 41 | }, 42 | }, 43 | { 44 | name: "small array big array", 45 | input: [][]string{ 46 | {"a", "b"}, 47 | {"d", "e", "f"}, 48 | }, 49 | output: [][]string{ 50 | {"a", "d"}, 51 | {"b", "d"}, 52 | {"a", "e"}, 53 | {"b", "e"}, 54 | {"a", "f"}, 55 | {"b", "f"}, 56 | }, 57 | }, 58 | { 59 | name: "big array small array", 60 | input: [][]string{ 61 | {"a", "b", "c"}, 62 | {"d", "e"}, 63 | }, 64 | output: [][]string{ 65 | {"a", "d"}, 66 | {"b", "d"}, 67 | {"c", "d"}, 68 | {"a", "e"}, 69 | {"b", "e"}, 70 | {"c", "e"}, 71 | }, 72 | }, 73 | } 74 | 75 | for _, c := range cases { 76 | t.Run(c.name, func(t *testing.T) { 77 | actual := CartesianProduct(c.input...) 78 | assert.Equal(t, c.output, actual) 79 | }) 80 | } 81 | } 82 | 83 | func TestTranspose(t *testing.T) { 84 | cases := []struct { 85 | name string 86 | input [][]string 87 | output [][]string 88 | }{ 89 | { 90 | name: "single input", 91 | input: [][]string{ 92 | {"a", "b", "c"}, 93 | }, 94 | output: [][]string{ 95 | {"a"}, {"b"}, {"c"}, 96 | }, 97 | }, 98 | { 99 | name: "multiple input", 100 | input: [][]string{ 101 | {"a", "b", "c"}, 102 | {"d", "e", "f"}, 103 | }, 104 | output: [][]string{ 105 | {"a", "d"}, 106 | {"b", "e"}, 107 | {"c", "f"}, 108 | }, 109 | }, 110 | { 111 | name: "first input bigger than second", 112 | input: [][]string{ 113 | {"a", "b", "c", "1"}, 114 | {"d", "e", "f"}, 115 | }, 116 | output: [][]string{ 117 | {"a", "d"}, 118 | {"b", "e"}, 119 | {"c", "f"}, 120 | {"1", ""}, 121 | }, 122 | }, 123 | { 124 | name: "second input bigger than first", 125 | input: [][]string{ 126 | {"a", "b", "c"}, 127 | {"d", "e", "f", "2"}, 128 | }, 129 | output: [][]string{ 130 | {"a", "d"}, 131 | {"b", "e"}, 132 | {"c", "f"}, 133 | {"", "2"}, 134 | }, 135 | }, 136 | } 137 | 138 | for _, c := range cases { 139 | t.Run(c.name, func(t *testing.T) { 140 | actual := Transpose(c.input) 141 | assert.Equal(t, c.output, actual) 142 | }) 143 | } 144 | } 145 | 146 | func BenchmarkCartesianProduct(b *testing.B) { 147 | cases := []struct { 148 | name string 149 | input [][]string 150 | }{ 151 | { 152 | name: "single array", 153 | input: [][]string{ 154 | {"a", "b", "c"}, 155 | }, 156 | }, 157 | { 158 | name: "two small arrays", 159 | input: [][]string{ 160 | {"a", "b", "c"}, 161 | {"d", "e", "f"}, 162 | }, 163 | }, 164 | { 165 | name: "three small arrays", 166 | input: [][]string{ 167 | {"a", "b", "c"}, 168 | {"d", "e", "f"}, 169 | {"g", "h", "i"}, 170 | }, 171 | }, 172 | { 173 | name: "small array and big array", 174 | input: [][]string{ 175 | {"a", "b", "c"}, 176 | strings.Split(strings.Repeat("d", 1000), ""), 177 | }, 178 | }, 179 | { 180 | name: "big array and small array", 181 | input: [][]string{ 182 | strings.Split(strings.Repeat("a", 1000), ""), 183 | {"d", "e", "f"}, 184 | }, 185 | }, 186 | { 187 | name: "big arrays", 188 | input: [][]string{ 189 | strings.Split(strings.Repeat("a", 1000), ""), 190 | strings.Split(strings.Repeat("d", 1000), ""), 191 | }, 192 | }, 193 | } 194 | 195 | for _, c := range cases { 196 | b.Run(c.name, func(b *testing.B) { 197 | for i := 0; i < b.N; i++ { 198 | CartesianProduct(c.input...) 199 | } 200 | }) 201 | } 202 | } 203 | 204 | func BenchmarkTranspose(b *testing.B) { 205 | cases := []struct { 206 | name string 207 | input [][]string 208 | }{ 209 | { 210 | name: "single array", 211 | input: [][]string{ 212 | {"a", "b", "c"}, 213 | }, 214 | }, 215 | { 216 | name: "multiple small arrays", 217 | input: [][]string{ 218 | {"a", "b", "c"}, 219 | {"d", "e", "f"}, 220 | }, 221 | }, 222 | { 223 | name: "small array and big array", 224 | input: [][]string{ 225 | {"a", "b", "c"}, 226 | strings.Split(strings.Repeat("d", 1000), ""), 227 | }, 228 | }, 229 | { 230 | name: "big array and small array", 231 | input: [][]string{ 232 | strings.Split(strings.Repeat("a", 1000), ""), 233 | {"d", "e", "f"}, 234 | }, 235 | }, 236 | } 237 | 238 | for _, c := range cases { 239 | b.Run(c.name, func(b *testing.B) { 240 | for i := 0; i < b.N; i++ { 241 | CartesianProduct(c.input...) 242 | } 243 | }) 244 | } 245 | } 246 | -------------------------------------------------------------------------------- /internal/pkg/generator/common.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | ) 8 | 9 | // AddTable adds a column to a table in the given files map. 10 | func AddTable(table model.Table, column string, line []string, files map[string]model.CSVFile) { 11 | if _, ok := files[table.Name]; !ok { 12 | files[table.Name] = model.CSVFile{ 13 | Name: table.Name, 14 | Output: !table.Suppress, 15 | UniqueColumns: table.UniqueColumns, 16 | } 17 | } 18 | 19 | add(files, table.Name, column, line) 20 | } 21 | 22 | // AddInput adds a column to a table in the given files map. 23 | func AddInput(table, column string, line []string, files map[string]model.CSVFile) { 24 | if _, ok := files[table]; !ok { 25 | files[table] = model.CSVFile{ 26 | Name: table, 27 | Output: false, 28 | } 29 | } 30 | 31 | add(files, table, column, line) 32 | } 33 | 34 | func add(files map[string]model.CSVFile, table string, column string, line []string) { 35 | foundTable := files[table] 36 | foundTable.Header = append(foundTable.Header, column) 37 | foundTable.Lines = append(foundTable.Lines, line) 38 | files[table] = foundTable 39 | } 40 | 41 | func formatValue(fp FormatterProcessor, value any) string { 42 | format := fp.GetFormat() 43 | if format != "" { 44 | // Check if the value implements the formatter interface and use that first, 45 | // otherwise, just perform a simple string format. 46 | if f, ok := value.(Formatter); ok { 47 | return f.Format(format) 48 | } else { 49 | return fmt.Sprintf(format, value) 50 | } 51 | } else { 52 | return fmt.Sprintf("%v", value) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /internal/pkg/generator/common_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/codingconcepts/dg/internal/pkg/model" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestAddToFile(t *testing.T) { 13 | cases := []struct { 14 | name string 15 | table string 16 | column string 17 | line []string 18 | filesBefore map[string]model.CSVFile 19 | filesAfter map[string]model.CSVFile 20 | }{ 21 | { 22 | name: "first column for table", 23 | table: "person", 24 | column: "id", 25 | line: []string{"a", "b", "c"}, 26 | filesBefore: map[string]model.CSVFile{}, 27 | filesAfter: map[string]model.CSVFile{ 28 | "person": { 29 | Name: "person", 30 | Header: []string{"id"}, 31 | Lines: [][]string{{"a", "b", "c"}}, 32 | }, 33 | }, 34 | }, 35 | { 36 | name: "second column for table", 37 | table: "person", 38 | column: "name", 39 | line: []string{"1", "2", "3"}, 40 | filesBefore: map[string]model.CSVFile{ 41 | "person": { 42 | Name: "person", 43 | Header: []string{"id"}, 44 | Lines: [][]string{{"a", "b", "c"}}, 45 | }, 46 | }, 47 | filesAfter: map[string]model.CSVFile{ 48 | "person": { 49 | Name: "person", 50 | Header: []string{"id", "name"}, 51 | Lines: [][]string{{"a", "b", "c"}, {"1", "2", "3"}}, 52 | }, 53 | }, 54 | }, 55 | } 56 | 57 | for _, c := range cases { 58 | t.Run(c.name, func(t *testing.T) { 59 | table := model.Table{ 60 | Name: c.table, 61 | } 62 | AddTable(table, c.column, c.line, c.filesBefore) 63 | 64 | assert.Equal(t, c.filesAfter[c.table].Header, c.filesBefore[c.table].Header) 65 | assert.Equal(t, c.filesAfter[c.table].Lines, c.filesBefore[c.table].Lines) 66 | assert.Equal(t, c.filesAfter[c.table].Name, c.filesBefore[c.table].Name) 67 | }) 68 | } 69 | } 70 | 71 | func TestFormatValue(t *testing.T) { 72 | cases := []struct { 73 | name string 74 | format string 75 | value any 76 | exp string 77 | }{ 78 | { 79 | name: "no format", 80 | value: 1, 81 | exp: "1", 82 | }, 83 | { 84 | name: "int format", 85 | value: 1, 86 | format: "PREFIX_%d_SUFFIX", 87 | exp: "PREFIX_1_SUFFIX", 88 | }, 89 | { 90 | name: "time format", 91 | value: time.Date(2023, 1, 2, 3, 4, 5, 6, time.UTC), 92 | format: "2006-01-02T15:04:05Z07:00", 93 | exp: "2023-01-02T03:04:05Z", 94 | }, 95 | } 96 | 97 | for _, c := range cases { 98 | t.Run(c.name, func(t *testing.T) { 99 | formatter := GenGenerator{Format: c.format} 100 | act := formatValue(formatter, c.value) 101 | 102 | assert.Equal(t, c.exp, act) 103 | }) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /internal/pkg/generator/const_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | 7 | "github.com/codingconcepts/dg/internal/pkg/model" 8 | "github.com/samber/lo" 9 | ) 10 | 11 | // ConstGenerator provides additional context to a const column. 12 | type ConstGenerator struct { 13 | Values []string `yaml:"values"` 14 | } 15 | 16 | // Generate values for a column based on a series of provided values. 17 | func (g ConstGenerator) Generate(t model.Table, files map[string]model.CSVFile) error { 18 | cols := lo.Filter(t.Columns, func(c model.Column, _ int) bool { 19 | return c.Type == "const" 20 | }) 21 | 22 | sortColumns(cols) 23 | 24 | for _, c := range cols { 25 | var cg ConstGenerator 26 | if err := c.Generator.UnmarshalFunc(&cg); err != nil { 27 | return fmt.Errorf("parsing const process for %s.%s: %w", t.Name, c.Name, err) 28 | } 29 | if err := cg.generate(t, c, files); err != nil { 30 | return fmt.Errorf("generating const columns: %w", err) 31 | } 32 | } 33 | 34 | return nil 35 | } 36 | 37 | func sortColumns(cols []model.Column) { 38 | sort.Slice(cols, func(i, j int) bool { 39 | var g1 ConstGenerator 40 | if err := cols[i].Generator.UnmarshalFunc(&g1); err != nil { 41 | return false 42 | } 43 | 44 | var g2 ConstGenerator 45 | if err := cols[j].Generator.UnmarshalFunc(&g2); err != nil { 46 | return false 47 | } 48 | 49 | return len(g1.Values) > len(g2.Values) 50 | }) 51 | } 52 | 53 | func (g ConstGenerator) generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 54 | if len(g.Values) == 0 { 55 | return fmt.Errorf("no values provided for const generator") 56 | } 57 | 58 | count := len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool { 59 | return len(a) > len(b) 60 | })) 61 | 62 | if count == 0 { 63 | count = t.Count 64 | } 65 | 66 | // Repeat the values until they equal the count. 67 | if count > len(g.Values) { 68 | for i := 0; len(g.Values) < count; i++ { 69 | g.Values = append(g.Values, g.Values[i%len(g.Values)]) 70 | } 71 | } 72 | 73 | AddTable(t, c.Name, g.Values, files) 74 | return nil 75 | } 76 | -------------------------------------------------------------------------------- /internal/pkg/generator/const_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | "github.com/samber/lo" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestGenerateConstColumn(t *testing.T) { 12 | cases := []struct { 13 | name string 14 | tableCount int 15 | files map[string]model.CSVFile 16 | values []string 17 | exp []string 18 | expErr error 19 | }{ 20 | { 21 | name: "first column in table", 22 | tableCount: 3, 23 | files: map[string]model.CSVFile{}, 24 | values: []string{"a", "b", "c"}, 25 | }, 26 | { 27 | name: "less than current table size", 28 | files: map[string]model.CSVFile{ 29 | "table": { 30 | Name: "table", 31 | Header: []string{"col_a", "col_b", "col_c"}, 32 | Lines: [][]string{ 33 | {"val_1", "val_2", "val_3"}, 34 | {"val_1", "val_2", "val_3"}, 35 | }, 36 | }, 37 | }, 38 | values: []string{"a", "b"}, 39 | exp: []string{"a", "b", "a"}, 40 | }, 41 | { 42 | name: "less than current table size with table count", 43 | tableCount: 10, 44 | files: map[string]model.CSVFile{ 45 | "table": { 46 | Name: "table", 47 | Header: []string{"col_a", "col_b", "col_c"}, 48 | Lines: [][]string{ 49 | {"val_1", "val_2", "val_3"}, 50 | {"val_1", "val_2", "val_3"}, 51 | }, 52 | }, 53 | }, 54 | values: []string{"a", "b"}, 55 | exp: []string{"a", "b", "a"}, 56 | }, 57 | { 58 | name: "same as current table size", 59 | files: map[string]model.CSVFile{ 60 | "table": { 61 | Name: "table", 62 | Header: []string{"col_a", "col_b", "col_c"}, 63 | Lines: [][]string{ 64 | {"val_1", "val_2", "val_3"}, 65 | {"val_1", "val_2", "val_3"}, 66 | }, 67 | }, 68 | }, 69 | values: []string{"a", "b", "c"}, 70 | }, 71 | { 72 | name: "more than current table size", 73 | files: map[string]model.CSVFile{ 74 | "table": { 75 | Name: "table", 76 | Header: []string{"col_a", "col_b", "col_c"}, 77 | Lines: [][]string{ 78 | {"val_1", "val_2", "val_3"}, 79 | {"val_1", "val_2", "val_3"}, 80 | }, 81 | }, 82 | }, 83 | values: []string{"a", "b", "c", "d", "e"}, 84 | }, 85 | } 86 | 87 | for _, c := range cases { 88 | t.Run(c.name, func(t *testing.T) { 89 | g := ConstGenerator{ 90 | Values: c.values, 91 | } 92 | 93 | table := model.Table{ 94 | Name: "table", 95 | Count: c.tableCount, 96 | Columns: []model.Column{ 97 | {Name: "col", Type: "const", Generator: model.ToRawMessage(t, g)}, 98 | }, 99 | } 100 | 101 | actErr := g.Generate(table, c.files) 102 | assert.Equal(t, c.expErr, actErr) 103 | if actErr != nil { 104 | return 105 | } 106 | 107 | exp := lo.Ternary(c.exp != nil, c.exp, c.values) 108 | 109 | assert.Equal(t, exp, c.files["table"].Lines[len(c.files["table"].Lines)-1]) 110 | }) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /internal/pkg/generator/each_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/samber/lo" 9 | ) 10 | 11 | // EachGenerator provides additional context to an each or ref column. 12 | type EachGenerator struct { 13 | Table string `yaml:"table"` 14 | Column string `yaml:"column"` 15 | } 16 | 17 | // Generate looks for any each type columns for a table, and 18 | // returns their Cartesian product back into the given files map. 19 | func (g EachGenerator) Generate(t model.Table, files map[string]model.CSVFile) error { 20 | cols := lo.Filter(t.Columns, func(c model.Column, _ int) bool { 21 | return c.Type == "each" 22 | }) 23 | 24 | if len(cols) == 0 { 25 | return nil 26 | } 27 | 28 | var preCartesian [][]string 29 | for _, col := range cols { 30 | var gCol EachGenerator 31 | if err := col.Generator.UnmarshalFunc(&gCol); err != nil { 32 | return fmt.Errorf("parsing each process for %s.%s: %w", t.Name, col.Name, err) 33 | } 34 | 35 | srcTable := files[gCol.Table] 36 | srcColumn := gCol.Column 37 | srcColumnIndex := lo.IndexOf(srcTable.Header, srcColumn) 38 | 39 | if len(srcTable.Lines)-1 < srcColumnIndex { 40 | return fmt.Errorf("column %q out of bounds for table %q", srcColumn, srcTable.Name) 41 | } 42 | 43 | preCartesian = append(preCartesian, srcTable.Lines[srcColumnIndex]) 44 | } 45 | 46 | // Compute Cartesian product of all columns. 47 | cartesianColumns := Transpose(CartesianProduct(preCartesian...)) 48 | 49 | // Add the header 50 | for i, col := range cartesianColumns { 51 | AddTable(t, cols[i].Name, col, files) 52 | } 53 | 54 | return nil 55 | } 56 | -------------------------------------------------------------------------------- /internal/pkg/generator/each_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestGenerateEachColumn(t *testing.T) { 11 | table := model.Table{ 12 | Name: "person_event", 13 | Columns: []model.Column{ 14 | { 15 | Name: "person_id", 16 | Type: "each", 17 | Generator: model.ToRawMessage(t, EachGenerator{ 18 | Table: "person", 19 | Column: "id", 20 | }), 21 | }, 22 | { 23 | Name: "event_id", 24 | Type: "each", 25 | Generator: model.ToRawMessage(t, EachGenerator{ 26 | Table: "event", 27 | Column: "id", 28 | }), 29 | }, 30 | }, 31 | } 32 | 33 | files := map[string]model.CSVFile{ 34 | "person": { 35 | Name: "person", 36 | Header: []string{"id", "name"}, 37 | Lines: [][]string{ 38 | {"p-i-1", "p-i-2"}, 39 | {"p-one", "p-two"}, 40 | }, 41 | }, 42 | "event": { 43 | Name: "event", 44 | Header: []string{"id", "name"}, 45 | Lines: [][]string{ 46 | {"e-i-1", "e-i-2"}, 47 | {"e-one", "e-two"}, 48 | }, 49 | }, 50 | } 51 | 52 | g := EachGenerator{} 53 | 54 | err := g.Generate(table, files) 55 | assert.Nil(t, err) 56 | 57 | exp := model.CSVFile{ 58 | Name: "person_event", 59 | Header: []string{"person_id", "event_id"}, 60 | Lines: [][]string{ 61 | {"p-i-1", "p-i-2", "p-i-1", "p-i-2"}, 62 | {"e-i-1", "e-i-1", "e-i-2", "e-i-2"}, 63 | }, 64 | Output: true, 65 | } 66 | assert.Equal(t, exp, files["person_event"]) 67 | } 68 | -------------------------------------------------------------------------------- /internal/pkg/generator/formatter.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | // Formatter determines the behaviour for anything that can take a format 4 | // string and return another. 5 | type Formatter interface { 6 | Format(string) string 7 | } 8 | 9 | // FormatterProcessor can be called to get the Format string out of a struct 10 | // that implements this interface. 11 | type FormatterProcessor interface { 12 | GetFormat() string 13 | } 14 | -------------------------------------------------------------------------------- /internal/pkg/generator/gen_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/codingconcepts/dg/internal/pkg/model" 8 | "github.com/codingconcepts/dg/internal/pkg/random" 9 | "github.com/lucasjones/reggen" 10 | "github.com/samber/lo" 11 | ) 12 | 13 | // GenGenerator provides additional context to a gen column. 14 | type GenGenerator struct { 15 | Value string `yaml:"value"` 16 | Pattern string `yaml:"pattern"` 17 | NullPercentage int `yaml:"null_percentage"` 18 | Format string `yaml:"format"` 19 | 20 | patternGenerator *reggen.Generator 21 | } 22 | 23 | func (g GenGenerator) GetFormat() string { 24 | return g.Format 25 | } 26 | 27 | // Generate random data for a given column. 28 | func (g GenGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 29 | if g.Value == "" && g.Pattern == "" { 30 | return fmt.Errorf("gen must have either 'value' or 'pattern'") 31 | } 32 | 33 | if t.Count == 0 { 34 | t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool { 35 | return len(a) > len(b) 36 | })) 37 | } 38 | 39 | if g.Pattern != "" { 40 | var err error 41 | if g.patternGenerator, err = reggen.NewGenerator(g.Pattern); err != nil { 42 | return fmt.Errorf("creating regex generator: %w", err) 43 | } 44 | } 45 | 46 | var line []string 47 | for i := 0; i < t.Count; i++ { 48 | s := g.generate() 49 | line = append(line, s) 50 | } 51 | 52 | AddTable(t, c.Name, line, files) 53 | return nil 54 | } 55 | 56 | func (pg GenGenerator) generate() string { 57 | r := random.Intn(100) 58 | if r < pg.NullPercentage { 59 | return "" 60 | } 61 | 62 | if pg.Pattern != "" { 63 | return pg.patternGenerator.Generate(255) 64 | } 65 | 66 | s := pg.Value 67 | 68 | // Look for quick single-replacements. 69 | if v, ok := replacements[s]; ok { 70 | return formatValue(pg, v()) 71 | } 72 | 73 | // Process multipe-replacements. 74 | for k, v := range replacements { 75 | if strings.Contains(s, k) { 76 | valueStr := formatValue(pg, v()) 77 | s = strings.ReplaceAll(s, k, valueStr) 78 | } 79 | } 80 | 81 | return s 82 | } 83 | -------------------------------------------------------------------------------- /internal/pkg/generator/gen_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "regexp" 5 | "strconv" 6 | "strings" 7 | "testing" 8 | "time" 9 | 10 | "github.com/codingconcepts/dg/internal/pkg/model" 11 | "github.com/lucasjones/reggen" 12 | 13 | "github.com/stretchr/testify/assert" 14 | ) 15 | 16 | func TestGenerateGenColumn(t *testing.T) { 17 | cases := []struct { 18 | name string 19 | value string 20 | pattern string 21 | format string 22 | expShapeFunc func(val string) bool 23 | }{ 24 | { 25 | name: "multiple space-delimited strings", 26 | value: "${first_name} ${last_name}", 27 | expShapeFunc: func(val string) bool { 28 | return len(strings.Split(val, " ")) == 2 29 | }, 30 | }, 31 | { 32 | name: "formatted date string", 33 | value: "${date}", 34 | format: "2006-01-02T15:04:05", 35 | expShapeFunc: func(val string) bool { 36 | _, err := time.Parse("2006-01-02T15:04:05", val) 37 | return err == nil 38 | }, 39 | }, 40 | { 41 | name: "integer", 42 | value: "${int64}", 43 | expShapeFunc: func(val string) bool { 44 | _, err := strconv.Atoi(val) 45 | if err != nil { 46 | t.Fatal(err) 47 | } 48 | return err == nil 49 | }, 50 | }, 51 | { 52 | name: "pattern", 53 | pattern: `[a-z]{3}-[A-Z]{3}-\d{3}`, 54 | expShapeFunc: func(val string) bool { 55 | re := regexp.MustCompile(`[a-z]{3}-[A-Z]{3}-\d{3}`) 56 | return re.MatchString(val) 57 | }, 58 | }, 59 | } 60 | 61 | for _, c := range cases { 62 | t.Run(c.name, func(t *testing.T) { 63 | table := model.Table{ 64 | Name: "table", 65 | Count: 1, 66 | } 67 | 68 | column := model.Column{ 69 | Name: "col", 70 | } 71 | 72 | g := GenGenerator{ 73 | Value: c.value, 74 | Pattern: c.pattern, 75 | Format: c.format, 76 | } 77 | 78 | files := map[string]model.CSVFile{} 79 | err := g.Generate(table, column, files) 80 | assert.Nil(t, err) 81 | assert.True(t, c.expShapeFunc(files["table"].Lines[0][0])) 82 | }) 83 | } 84 | } 85 | 86 | func BenchmarkGeneratePattern(b *testing.B) { 87 | pattern := `[a-z]{3}-[A-Z]{3}-\d{3}` 88 | patternGenerator, err := reggen.NewGenerator(pattern) 89 | assert.NoError(b, err) 90 | 91 | g := GenGenerator{ 92 | Pattern: pattern, 93 | patternGenerator: patternGenerator, 94 | } 95 | 96 | for i := 0; i < b.N; i++ { 97 | g.generate() 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /internal/pkg/generator/inc_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "github.com/codingconcepts/dg/internal/pkg/model" 5 | "github.com/samber/lo" 6 | ) 7 | 8 | // IncGenerator provides additional context to an inc column. 9 | type IncGenerator struct { 10 | Start int `yaml:"start"` 11 | Format string `yaml:"format"` 12 | } 13 | 14 | func (pi IncGenerator) GetFormat() string { 15 | return pi.Format 16 | } 17 | 18 | // Generate an incrementing number value for a column. 19 | func (g IncGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 20 | if t.Count == 0 { 21 | t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool { 22 | return len(a) > len(b) 23 | })) 24 | } 25 | 26 | var line []string 27 | for i := 0; i < t.Count; i++ { 28 | line = append(line, formatValue(g, g.Start+i)) 29 | } 30 | 31 | AddTable(t, c.Name, line, files) 32 | return nil 33 | } 34 | -------------------------------------------------------------------------------- /internal/pkg/generator/inc_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestGenerateIncColumn(t *testing.T) { 12 | cases := []struct { 13 | name string 14 | count int 15 | start int 16 | files map[string]model.CSVFile 17 | exp [][]string 18 | }{ 19 | { 20 | name: "with count generates as many as specified by count", 21 | count: 10, 22 | start: 100, 23 | files: map[string]model.CSVFile{}, 24 | exp: [][]string{ 25 | {"100", "101", "102", "103", "104", "105", "106", "107", "108", "109"}, 26 | }, 27 | }, 28 | { 29 | name: "without count generates as many as the max line", 30 | start: 200, 31 | files: map[string]model.CSVFile{ 32 | "table": { 33 | Lines: [][]string{ 34 | {"a", "b", "c"}, 35 | {"a", "b", "c", "d", "e"}, 36 | }, 37 | }, 38 | }, 39 | exp: [][]string{ 40 | {"a", "b", "c"}, 41 | {"a", "b", "c", "d", "e"}, 42 | {"200", "201", "202", "203", "204"}, 43 | }, 44 | }, 45 | } 46 | 47 | for _, c := range cases { 48 | t.Run(c.name, func(t *testing.T) { 49 | table := model.Table{ 50 | Name: "table", 51 | Count: c.count, 52 | } 53 | 54 | column := model.Column{ 55 | Name: "id", 56 | } 57 | 58 | g := IncGenerator{ 59 | Start: c.start, 60 | } 61 | 62 | err := g.Generate(table, column, c.files) 63 | assert.Nil(t, err) 64 | assert.Equal(t, 65 | []string([]string{"id"}), 66 | c.files["table"].Header, 67 | ) 68 | assert.Equal(t, 69 | c.exp, 70 | c.files["table"].Lines, 71 | ) 72 | }) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /internal/pkg/generator/match_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/samber/lo" 9 | ) 10 | 11 | // MatchGenerator provides additional context to a match column. 12 | type MatchGenerator struct { 13 | SourceTable string `yaml:"source_table"` 14 | SourceColumn string `yaml:"source_column"` 15 | SourceValue string `yaml:"source_value"` 16 | MatchColumn string `yaml:"match_column"` 17 | } 18 | 19 | // Generate matches values from a previously generated table and inserts values 20 | // into a new table where match is found. 21 | func (g MatchGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 22 | sourceTable, ok := files[g.SourceTable] 23 | if !ok { 24 | return fmt.Errorf("missing source table %q for match lookup", g.SourceTable) 25 | } 26 | 27 | sourceColumnIndex := lo.IndexOf(sourceTable.Header, g.SourceColumn) 28 | sourceColumn := sourceTable.Lines[sourceColumnIndex] 29 | 30 | valueColumnIndex := lo.IndexOf(sourceTable.Header, g.SourceValue) 31 | valueColumn := sourceTable.Lines[valueColumnIndex] 32 | 33 | sourceMap := map[string]string{} 34 | for i := 0; i < len(sourceColumn); i++ { 35 | sourceMap[sourceColumn[i]] = valueColumn[i] 36 | } 37 | 38 | matchTable, ok := files[t.Name] 39 | if !ok { 40 | return fmt.Errorf("missing destination table %q for match lookup", t.Name) 41 | } 42 | 43 | // Use the match table headers to determine index, as the each processor 44 | // will re-order columns. 45 | _, matchColumnIndex, ok := lo.FindIndexOf(matchTable.Header, func(c string) bool { 46 | return c == g.MatchColumn 47 | }) 48 | if !ok { 49 | return fmt.Errorf("missing match column %q in current table", g.MatchColumn) 50 | } 51 | 52 | matchColumn := matchTable.Lines[matchColumnIndex] 53 | 54 | lines := make([]string, len(matchColumn)) 55 | for i, matchC := range matchColumn { 56 | if sourceValue, ok := sourceMap[matchC]; ok { 57 | lines[i] = sourceValue 58 | } 59 | } 60 | 61 | AddTable(t, c.Name, lines, files) 62 | return nil 63 | } 64 | -------------------------------------------------------------------------------- /internal/pkg/generator/match_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/samber/lo" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestGenerateMatchColumn(t *testing.T) { 13 | cases := []struct { 14 | name string 15 | srcTable *model.CSVFile 16 | srcTableName string 17 | srcColumnName string 18 | srcValueName string 19 | dstTable *model.CSVFile 20 | dstColumns []model.Column 21 | dstColumn model.Column 22 | matchColumn string 23 | expColumn []string 24 | expError error 25 | }{ 26 | { 27 | name: "generates matching columns", 28 | srcTable: &model.CSVFile{ 29 | Name: "significant_events", 30 | Header: []string{"date", "event"}, 31 | Lines: [][]string{ 32 | {"2023-01-01", "2023-01-03"}, 33 | {"abc", "def"}, 34 | }, 35 | }, 36 | srcTableName: "significant_events", 37 | srcColumnName: "date", 38 | srcValueName: "event", 39 | dstTable: &model.CSVFile{ 40 | Name: "timeline", 41 | Header: []string{"timeline_date"}, 42 | Lines: [][]string{ 43 | {"2023-01-01", "2023-01-02", "2023-01-03"}, 44 | }, 45 | }, 46 | dstColumns: []model.Column{ 47 | {Name: "timeline_date"}, 48 | }, 49 | dstColumn: model.Column{ 50 | Name: "timeline_event", 51 | }, 52 | matchColumn: "timeline_date", 53 | expColumn: []string{"abc", "", "def"}, 54 | }, 55 | } 56 | 57 | for _, c := range cases { 58 | t.Run(c.name, func(t *testing.T) { 59 | table := model.Table{ 60 | Name: c.dstTable.Name, 61 | Columns: c.dstColumns, 62 | } 63 | 64 | column := c.dstColumn 65 | 66 | g := MatchGenerator{ 67 | SourceTable: c.srcTableName, 68 | SourceColumn: c.srcColumnName, 69 | SourceValue: c.srcValueName, 70 | MatchColumn: c.matchColumn, 71 | } 72 | 73 | files := map[string]model.CSVFile{} 74 | if c.srcTable != nil { 75 | files[c.srcTable.Name] = *c.srcTable 76 | } 77 | if c.dstTable != nil { 78 | files[c.dstTable.Name] = *c.dstTable 79 | } 80 | 81 | err := g.Generate(table, column, files) 82 | assert.Equal(t, c.expError, err) 83 | if err != nil { 84 | return 85 | } 86 | 87 | actColumnIndex := lo.IndexOf(files[c.dstTable.Name].Header, c.dstColumn.Name) 88 | assert.Equal(t, c.expColumn, files[c.dstTable.Name].Lines[actColumnIndex]) 89 | }) 90 | } 91 | } 92 | 93 | /* 94 | source table: 95 | 96 | date, event 97 | 2023-01-01, abc 98 | 2023-01-03, def 99 | 100 | 101 | dest table: 102 | 103 | timeline_date, timeline_event 104 | 2023-01-01 105 | 2023-01-02 106 | 2023-01-03 107 | 108 | 109 | outcome: 110 | timeline_date, timeline_event 111 | 2023-01-01, abc 112 | 2023-01-02 113 | 2023-01-03, def 114 | */ 115 | -------------------------------------------------------------------------------- /internal/pkg/generator/placeholder.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import "github.com/brianvoe/gofakeit/v6" 4 | 5 | var ( 6 | replacements = map[string]func() any{ 7 | "${ach_account}": func() any { return gofakeit.AchAccount() }, 8 | "${ach_routing}": func() any { return gofakeit.AchRouting() }, 9 | "${adjective_demonstrative}": func() any { return gofakeit.AdjectiveDemonstrative() }, 10 | "${adjective_descriptive}": func() any { return gofakeit.AdjectiveDescriptive() }, 11 | "${adjective_indefinite}": func() any { return gofakeit.AdjectiveIndefinite() }, 12 | "${adjective_interrogative}": func() any { return gofakeit.AdjectiveInterrogative() }, 13 | "${adjective_possessive}": func() any { return gofakeit.AdjectivePossessive() }, 14 | "${adjective_proper}": func() any { return gofakeit.AdjectiveProper() }, 15 | "${adjective_quantitative}": func() any { return gofakeit.AdjectiveQuantitative() }, 16 | "${adjective}": func() any { return gofakeit.Adjective() }, 17 | "${adverb_degree}": func() any { return gofakeit.AdverbDegree() }, 18 | "${adverb_frequency_definite}": func() any { return gofakeit.AdverbFrequencyDefinite() }, 19 | "${adverb_frequency_indefinite}": func() any { return gofakeit.AdverbFrequencyIndefinite() }, 20 | "${adverb_manner}": func() any { return gofakeit.AdverbManner() }, 21 | "${adverb_place}": func() any { return gofakeit.AdverbPlace() }, 22 | "${adverb_time_definite}": func() any { return gofakeit.AdverbTimeDefinite() }, 23 | "${adverb_time_indefinite}": func() any { return gofakeit.AdverbTimeIndefinite() }, 24 | "${adverb}": func() any { return gofakeit.Adverb() }, 25 | "${animal_type}": func() any { return gofakeit.AnimalType() }, 26 | "${animal}": func() any { return gofakeit.Animal() }, 27 | "${app_author}": func() any { return gofakeit.AppAuthor() }, 28 | "${app_name}": func() any { return gofakeit.AppName() }, 29 | "${app_version}": func() any { return gofakeit.AppVersion() }, 30 | "${bitcoin_address}": func() any { return gofakeit.BitcoinAddress() }, 31 | "${bitcoin_private_key}": func() any { return gofakeit.BitcoinPrivateKey() }, 32 | "${bool}": func() any { return gofakeit.Bool() }, 33 | "${breakfast}": func() any { return gofakeit.Breakfast() }, 34 | "${bs}": func() any { return gofakeit.BS() }, 35 | "${car_fuel_type}": func() any { return gofakeit.CarFuelType() }, 36 | "${car_maker}": func() any { return gofakeit.CarMaker() }, 37 | "${car_model}": func() any { return gofakeit.CarModel() }, 38 | "${car_transmission_type}": func() any { return gofakeit.CarTransmissionType() }, 39 | "${car_type}": func() any { return gofakeit.CarType() }, 40 | "${chrome_user_agent}": func() any { return gofakeit.ChromeUserAgent() }, 41 | "${city}": func() any { return gofakeit.City() }, 42 | "${color}": func() any { return gofakeit.Color() }, 43 | "${company_suffix}": func() any { return gofakeit.CompanySuffix() }, 44 | "${company}": func() any { return gofakeit.Company() }, 45 | "${connective_casual}": func() any { return gofakeit.ConnectiveCasual() }, 46 | "${connective_complaint}": func() any { return gofakeit.ConnectiveComplaint() }, 47 | "${connective_examplify}": func() any { return gofakeit.ConnectiveExamplify() }, 48 | "${connective_listing}": func() any { return gofakeit.ConnectiveListing() }, 49 | "${connective_time}": func() any { return gofakeit.ConnectiveTime() }, 50 | "${connective}": func() any { return gofakeit.Connective() }, 51 | "${country_abr}": func() any { return gofakeit.CountryAbr() }, 52 | "${country}": func() any { return gofakeit.Country() }, 53 | "${credit_card_cvv}": func() any { return gofakeit.CreditCardCvv() }, 54 | "${credit_card_exp}": func() any { return gofakeit.CreditCardExp() }, 55 | "${credit_card_type}": func() any { return gofakeit.CreditCardType() }, 56 | "${currency_long}": func() any { return gofakeit.CurrencyLong() }, 57 | "${currency_short}": func() any { return gofakeit.CurrencyShort() }, 58 | "${date}": func() any { return gofakeit.Date() }, 59 | "${day}": func() any { return gofakeit.Day() }, 60 | "${dessert}": func() any { return gofakeit.Dessert() }, 61 | "${dinner}": func() any { return gofakeit.Dinner() }, 62 | "${domain_name}": func() any { return gofakeit.DomainName() }, 63 | "${domain_suffix}": func() any { return gofakeit.DomainSuffix() }, 64 | "${email}": func() any { return gofakeit.Email() }, 65 | "${emoji}": func() any { return gofakeit.Emoji() }, 66 | "${file_extension}": func() any { return gofakeit.FileExtension() }, 67 | "${file_mime_type}": func() any { return gofakeit.FileMimeType() }, 68 | "${firefox_user_agent}": func() any { return gofakeit.FirefoxUserAgent() }, 69 | "${first_name}": func() any { return gofakeit.FirstName() }, 70 | "${flipacoin}": func() any { return gofakeit.FlipACoin() }, 71 | "${float32}": func() any { return gofakeit.Float32() }, 72 | "${float64}": func() any { return gofakeit.Float64() }, 73 | "${fruit}": func() any { return gofakeit.Fruit() }, 74 | "${gender}": func() any { return gofakeit.Gender() }, 75 | "${hexcolor}": func() any { return gofakeit.HexColor() }, 76 | "${hobby}": func() any { return gofakeit.Hobby() }, 77 | "${hour}": func() any { return gofakeit.Hour() }, 78 | "${http_method}": func() any { return gofakeit.HTTPMethod() }, 79 | "${http_status_code_simple}": func() any { return gofakeit.HTTPStatusCodeSimple() }, 80 | "${http_status_code}": func() any { return gofakeit.HTTPStatusCode() }, 81 | "${http_version}": func() any { return gofakeit.HTTPVersion() }, 82 | "${int16}": func() any { return gofakeit.Int16() }, 83 | "${int32}": func() any { return gofakeit.Int32() }, 84 | "${int64}": func() any { return gofakeit.Int64() }, 85 | "${int8}": func() any { return gofakeit.Int8() }, 86 | "${ipv4_address}": func() any { return gofakeit.IPv4Address() }, 87 | "${ipv6_address}": func() any { return gofakeit.IPv6Address() }, 88 | "${job_descriptor}": func() any { return gofakeit.JobDescriptor() }, 89 | "${job_level}": func() any { return gofakeit.JobLevel() }, 90 | "${job_title}": func() any { return gofakeit.JobTitle() }, 91 | "${language_abbreviation}": func() any { return gofakeit.LanguageAbbreviation() }, 92 | "${language}": func() any { return gofakeit.Language() }, 93 | "${last_name}": func() any { return gofakeit.LastName() }, 94 | "${latitude}": func() any { return gofakeit.Latitude() }, 95 | "${longitude}": func() any { return gofakeit.Longitude() }, 96 | "${lunch}": func() any { return gofakeit.Lunch() }, 97 | "${mac_address}": func() any { return gofakeit.MacAddress() }, 98 | "${minute}": func() any { return gofakeit.Minute() }, 99 | "${month_string}": func() any { return gofakeit.MonthString() }, 100 | "${month}": func() any { return gofakeit.Month() }, 101 | "${name_prefix}": func() any { return gofakeit.NamePrefix() }, 102 | "${name_suffix}": func() any { return gofakeit.NameSuffix() }, 103 | "${name}": func() any { return gofakeit.Name() }, 104 | "${nanosecond}": func() any { return gofakeit.NanoSecond() }, 105 | "${nicecolors}": func() any { return gofakeit.NiceColors() }, 106 | "${noun_abstract}": func() any { return gofakeit.NounAbstract() }, 107 | "${noun_collective_animal}": func() any { return gofakeit.NounCollectiveAnimal() }, 108 | "${noun_collective_people}": func() any { return gofakeit.NounCollectivePeople() }, 109 | "${noun_collective_thing}": func() any { return gofakeit.NounCollectiveThing() }, 110 | "${noun_common}": func() any { return gofakeit.NounCommon() }, 111 | "${noun_concrete}": func() any { return gofakeit.NounConcrete() }, 112 | "${noun_countable}": func() any { return gofakeit.NounCountable() }, 113 | "${noun_uncountable}": func() any { return gofakeit.NounUncountable() }, 114 | "${noun}": func() any { return gofakeit.Noun() }, 115 | "${opera_user_agent}": func() any { return gofakeit.OperaUserAgent() }, 116 | "${password}": func() any { return gofakeit.Password(true, true, true, true, true, 25) }, 117 | "${pet_name}": func() any { return gofakeit.PetName() }, 118 | "${phone_formatted}": func() any { return gofakeit.PhoneFormatted() }, 119 | "${phone}": func() any { return gofakeit.Phone() }, 120 | "${phrase}": func() any { return gofakeit.Phrase() }, 121 | "${preposition_compound}": func() any { return gofakeit.PrepositionCompound() }, 122 | "${preposition_double}": func() any { return gofakeit.PrepositionDouble() }, 123 | "${preposition_simple}": func() any { return gofakeit.PrepositionSimple() }, 124 | "${preposition}": func() any { return gofakeit.Preposition() }, 125 | "${programming_language}": func() any { return gofakeit.ProgrammingLanguage() }, 126 | "${pronoun_demonstrative}": func() any { return gofakeit.PronounDemonstrative() }, 127 | "${pronoun_interrogative}": func() any { return gofakeit.PronounInterrogative() }, 128 | "${pronoun_object}": func() any { return gofakeit.PronounObject() }, 129 | "${pronoun_personal}": func() any { return gofakeit.PronounPersonal() }, 130 | "${pronoun_possessive}": func() any { return gofakeit.PronounPossessive() }, 131 | "${pronoun_reflective}": func() any { return gofakeit.PronounReflective() }, 132 | "${pronoun_relative}": func() any { return gofakeit.PronounRelative() }, 133 | "${pronoun}": func() any { return gofakeit.Pronoun() }, 134 | "${quote}": func() any { return gofakeit.Quote() }, 135 | "${rgbcolor}": func() any { return gofakeit.RGBColor() }, 136 | "${safari_user_agent}": func() any { return gofakeit.SafariUserAgent() }, 137 | "${safecolor}": func() any { return gofakeit.SafeColor() }, 138 | "${second}": func() any { return gofakeit.Second() }, 139 | "${snack}": func() any { return gofakeit.Snack() }, 140 | "${ssn}": func() any { return gofakeit.SSN() }, 141 | "${state_abr}": func() any { return gofakeit.StateAbr() }, 142 | "${state}": func() any { return gofakeit.State() }, 143 | "${street_name}": func() any { return gofakeit.StreetName() }, 144 | "${street_number}": func() any { return gofakeit.StreetNumber() }, 145 | "${street_prefix}": func() any { return gofakeit.StreetPrefix() }, 146 | "${street_suffix}": func() any { return gofakeit.StreetSuffix() }, 147 | "${street}": func() any { return gofakeit.Street() }, 148 | "${time_zone_abv}": func() any { return gofakeit.TimeZoneAbv() }, 149 | "${time_zone_full}": func() any { return gofakeit.TimeZoneFull() }, 150 | "${time_zone_offset}": func() any { return gofakeit.TimeZoneOffset() }, 151 | "${time_zone_region}": func() any { return gofakeit.TimeZoneRegion() }, 152 | "${time_zone}": func() any { return gofakeit.TimeZone() }, 153 | "${uint128_hex}": func() any { return gofakeit.HexUint128() }, 154 | "${uint16_hex}": func() any { return gofakeit.HexUint16() }, 155 | "${uint16}": func() any { return gofakeit.Uint16() }, 156 | "${uint256_hex}": func() any { return gofakeit.HexUint256() }, 157 | "${uint32_hex}": func() any { return gofakeit.HexUint32() }, 158 | "${uint32}": func() any { return gofakeit.Uint32() }, 159 | "${uint64_hex}": func() any { return gofakeit.HexUint64() }, 160 | "${uint64}": func() any { return gofakeit.Uint64() }, 161 | "${uint8_hex}": func() any { return gofakeit.HexUint8() }, 162 | "${uint8}": func() any { return gofakeit.Uint8() }, 163 | "${url}": func() any { return gofakeit.URL() }, 164 | "${user_agent}": func() any { return gofakeit.UserAgent() }, 165 | "${username}": func() any { return gofakeit.Username() }, 166 | "${uuid}": func() any { return gofakeit.UUID() }, 167 | "${vegetable}": func() any { return gofakeit.Vegetable() }, 168 | "${verb_action}": func() any { return gofakeit.VerbAction() }, 169 | "${verb_helping}": func() any { return gofakeit.VerbHelping() }, 170 | "${verb_linking}": func() any { return gofakeit.VerbLinking() }, 171 | "${verb}": func() any { return gofakeit.Verb() }, 172 | "${weekday}": func() any { return gofakeit.WeekDay() }, 173 | "${word}": func() any { return gofakeit.Word() }, 174 | "${year}": func() any { return gofakeit.Year() }, 175 | "${zip}": func() any { return gofakeit.Zip() }, 176 | } 177 | ) 178 | -------------------------------------------------------------------------------- /internal/pkg/generator/range_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "time" 7 | 8 | "github.com/codingconcepts/dg/internal/pkg/model" 9 | "github.com/samber/lo" 10 | ) 11 | 12 | // RangeGenerator provides additional context to a range column. 13 | type RangeGenerator struct { 14 | Type string `yaml:"type"` 15 | From string `yaml:"from"` 16 | To string `yaml:"to"` 17 | Step string `yaml:"step"` 18 | Format string `yaml:"format"` 19 | } 20 | 21 | // Generate sequential data between a given start and end range. 22 | func (g RangeGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 23 | count := len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool { 24 | return len(a) > len(b) 25 | })) 26 | 27 | if count == 0 { 28 | count = t.Count 29 | } 30 | 31 | switch g.Type { 32 | case "date": 33 | lines, err := g.generateDateSlice(count) 34 | if err != nil { 35 | return fmt.Errorf("generating date slice: %w", err) 36 | } 37 | 38 | AddTable(t, c.Name, lines, files) 39 | return nil 40 | 41 | case "int": 42 | lines, err := g.generateIntSlice(count) 43 | if err != nil { 44 | return fmt.Errorf("generating int slice: %w", err) 45 | } 46 | 47 | AddTable(t, c.Name, lines, files) 48 | return nil 49 | 50 | default: 51 | return fmt.Errorf("%q is not a valid range type", g.Type) 52 | } 53 | } 54 | 55 | func (g RangeGenerator) generateDateSlice(count int) ([]string, error) { 56 | // Validate that we have everything we need. 57 | if count == 0 && g.Step == "" { 58 | return nil, fmt.Errorf("either a count or a step must be provided to a date range generator") 59 | } 60 | 61 | from, err := time.Parse(g.Format, g.From) 62 | if err != nil { 63 | return nil, fmt.Errorf("parsing from date: %w", err) 64 | } 65 | 66 | to, err := time.Parse(g.Format, g.To) 67 | if err != nil { 68 | return nil, fmt.Errorf("parsing to date: %w", err) 69 | } 70 | 71 | var step time.Duration 72 | if count > 0 { 73 | step = to.Sub(from) / time.Duration(count) 74 | } else { 75 | if step, err = time.ParseDuration(g.Step); err != nil { 76 | return nil, fmt.Errorf("parsing step: %w", err) 77 | } 78 | } 79 | 80 | var s []string 81 | for i := from; i.Before(to); i = i.Add(step) { 82 | s = append(s, i.Format(g.Format)) 83 | } 84 | 85 | return s, nil 86 | } 87 | 88 | func (g RangeGenerator) generateIntSlice(count int) ([]string, error) { 89 | // Validate that we have everything we need. 90 | if count == 0 && g.Step == "" { 91 | return nil, fmt.Errorf("either a count or a step must be provided to an int range generator") 92 | } 93 | 94 | from, err := strconv.Atoi(g.From) 95 | if err != nil { 96 | return nil, fmt.Errorf("parsing from number: %w", err) 97 | } 98 | 99 | var to int 100 | if g.To == "" { 101 | to = from + count - 1 102 | } else { 103 | if to, err = strconv.Atoi(g.To); err != nil { 104 | return nil, fmt.Errorf("parsing to number: %w", err) 105 | } 106 | } 107 | 108 | var step int 109 | if count > 0 { 110 | step = (to - from) / (count - 1) 111 | } else { 112 | if step, err = strconv.Atoi(g.Step); err != nil { 113 | return nil, fmt.Errorf("parsing step number: %w", err) 114 | } 115 | } 116 | 117 | var s []string 118 | for i := from; i <= to; i += step { 119 | s = append(s, strconv.Itoa(i)) 120 | } 121 | 122 | return s, nil 123 | } 124 | -------------------------------------------------------------------------------- /internal/pkg/generator/range_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestGenerateRangeColumn(t *testing.T) { 12 | cases := []struct { 13 | name string 14 | files map[string]model.CSVFile 15 | rtype string 16 | count int 17 | from string 18 | to string 19 | step string 20 | format string 21 | expLines []string 22 | expErr error 23 | }{ 24 | { 25 | name: "generates date range for existing table", 26 | files: map[string]model.CSVFile{ 27 | "table": { 28 | Lines: [][]string{ 29 | {"a"}, 30 | {"a", "b"}, 31 | {"a", "b", "c"}, 32 | }, 33 | }, 34 | }, 35 | rtype: "date", 36 | count: 5, 37 | from: "2023-01-01", 38 | to: "2023-02-01", 39 | step: "24h", 40 | format: "2006-01-02", 41 | expLines: []string{ 42 | "2023-01-01", 43 | "2023-01-11", 44 | "2023-01-21", 45 | }, 46 | }, 47 | { 48 | name: "generates date range for count", 49 | files: map[string]model.CSVFile{}, 50 | rtype: "date", 51 | count: 4, 52 | from: "2023-01-01", 53 | to: "2023-02-01", 54 | step: "24h", 55 | format: "2006-01-02", 56 | expLines: []string{ 57 | "2023-01-01", 58 | "2023-01-08", 59 | "2023-01-16", 60 | "2023-01-24", 61 | }, 62 | }, 63 | { 64 | name: "generates date range for step", 65 | files: map[string]model.CSVFile{}, 66 | rtype: "date", 67 | from: "2023-01-01", 68 | to: "2023-02-01", 69 | step: "72h", 70 | format: "2006-01-02", 71 | expLines: []string{ 72 | "2023-01-01", 73 | "2023-01-04", 74 | "2023-01-07", 75 | "2023-01-10", 76 | "2023-01-13", 77 | "2023-01-16", 78 | "2023-01-19", 79 | "2023-01-22", 80 | "2023-01-25", 81 | "2023-01-28", 82 | "2023-01-31", 83 | }, 84 | }, 85 | { 86 | name: "generates date range for count", 87 | files: map[string]model.CSVFile{}, 88 | rtype: "int", 89 | count: 10, 90 | from: "1", 91 | expLines: []string{ 92 | "1", 93 | "2", 94 | "3", 95 | "4", 96 | "5", 97 | "6", 98 | "7", 99 | "8", 100 | "9", 101 | "10", 102 | }, 103 | }, 104 | { 105 | name: "generates int range for existing table", 106 | files: map[string]model.CSVFile{ 107 | "table": { 108 | Lines: [][]string{ 109 | {"a"}, 110 | {"a", "b"}, 111 | {"a", "b", "c"}, 112 | }, 113 | }, 114 | }, 115 | rtype: "int", 116 | count: 5, 117 | from: "1", 118 | to: "5", 119 | expLines: []string{ 120 | "1", 121 | "3", 122 | "5", 123 | }, 124 | }, 125 | { 126 | name: "generates int range for count", 127 | files: map[string]model.CSVFile{}, 128 | rtype: "int", 129 | count: 4, 130 | from: "10", 131 | to: "40", 132 | step: "10", 133 | expLines: []string{ 134 | "10", 135 | "20", 136 | "30", 137 | "40", 138 | }, 139 | }, 140 | { 141 | name: "generates int range for const", 142 | files: map[string]model.CSVFile{}, 143 | rtype: "int", 144 | count: 4, 145 | from: "1", 146 | step: "1", 147 | expLines: []string{ 148 | "1", 149 | "2", 150 | "3", 151 | "4", 152 | }, 153 | }, 154 | } 155 | 156 | for _, c := range cases { 157 | t.Run(c.name, func(t *testing.T) { 158 | table := model.Table{ 159 | Name: "table", 160 | Count: c.count, 161 | } 162 | 163 | column := model.Column{ 164 | Name: "col", 165 | } 166 | 167 | g := RangeGenerator{ 168 | Type: c.rtype, 169 | From: c.from, 170 | To: c.to, 171 | Step: c.step, 172 | Format: c.format, 173 | } 174 | 175 | files := c.files 176 | 177 | err := g.Generate(table, column, files) 178 | assert.Equal(t, c.expErr, err) 179 | 180 | if err != nil { 181 | return 182 | } 183 | 184 | assert.Equal(t, c.expLines, files["table"].Lines[len(files["table"].Lines)-1]) 185 | }) 186 | } 187 | } 188 | 189 | func TestGenerateDateSlice(t *testing.T) { 190 | cases := []struct { 191 | name string 192 | from string 193 | to string 194 | format string 195 | count int 196 | step string 197 | expSlice []string 198 | expError string 199 | }{ 200 | { 201 | name: "no count or step", 202 | expError: "either a count or a step must be provided to a date range generator", 203 | }, 204 | { 205 | name: "count", 206 | count: 10, 207 | from: "2023-01-01", 208 | to: "2023-01-10", 209 | format: "2006-01-02", 210 | expSlice: []string{ 211 | "2023-01-01", "2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07", "2023-01-08", "2023-01-09", 212 | }, 213 | }, 214 | { 215 | name: "step", 216 | step: "24h", 217 | from: "2023-01-10", 218 | to: "2023-01-20", 219 | format: "2006-01-02", 220 | expSlice: []string{ 221 | "2023-01-10", "2023-01-11", "2023-01-12", "2023-01-13", "2023-01-14", "2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-19", 222 | }, 223 | }, 224 | { 225 | name: "invalid format", 226 | count: 10, 227 | from: "2023-01-01", 228 | to: "2023-01-10", 229 | format: "abc", 230 | expError: `parsing from date: parsing time "2023-01-01" as "abc": cannot parse "2023-01-01" as "abc"`, 231 | }, 232 | { 233 | name: "invalid from date", 234 | count: 10, 235 | from: "abc", 236 | format: "2006-01-02", 237 | 238 | to: "2023-01-10", 239 | expError: `parsing from date: parsing time "abc" as "2006-01-02": cannot parse "abc" as "2006"`, 240 | }, 241 | { 242 | name: "invalid to date", 243 | count: 10, 244 | from: "2023-01-01", 245 | to: "abc", 246 | format: "2006-01-02", 247 | expError: `parsing to date: parsing time "abc" as "2006-01-02": cannot parse "abc" as "2006"`, 248 | }, 249 | { 250 | name: "invalid step", 251 | step: "abc", 252 | from: "2023-01-01", 253 | to: "2023-01-10", 254 | format: "2006-01-02", 255 | expError: `parsing step: time: invalid duration "abc"`, 256 | }, 257 | } 258 | 259 | for _, c := range cases { 260 | t.Run(c.name, func(t *testing.T) { 261 | g := RangeGenerator{ 262 | From: c.from, 263 | To: c.to, 264 | Format: c.format, 265 | Step: c.step, 266 | } 267 | 268 | actSlice, actErr := g.generateDateSlice(c.count) 269 | if c.expError != "" { 270 | assert.Equal(t, c.expError, actErr.Error()) 271 | return 272 | } 273 | 274 | assert.Equal(t, c.expSlice, actSlice) 275 | }) 276 | } 277 | } 278 | -------------------------------------------------------------------------------- /internal/pkg/generator/ref_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | "github.com/codingconcepts/dg/internal/pkg/random" 8 | 9 | "github.com/samber/lo" 10 | ) 11 | 12 | // RefGenerator provides additional context to a ref column. 13 | type RefGenerator struct { 14 | Table string `yaml:"table"` 15 | Column string `yaml:"column"` 16 | } 17 | 18 | // Generate looks to previously generated table data and references that when generating data 19 | // for the given table. 20 | func (g RefGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 21 | if t.Count == 0 { 22 | t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool { 23 | return len(a) > len(b) 24 | })) 25 | } 26 | 27 | table, ok := files[g.Table] 28 | if !ok { 29 | return fmt.Errorf("missing table %q for ref lookup", g.Table) 30 | } 31 | 32 | colIndex := lo.IndexOf(table.Header, g.Column) 33 | column := table.Lines[colIndex] 34 | 35 | var line []string 36 | for i := 0; i < t.Count; i++ { 37 | line = append(line, column[random.Intn(len(column))]) 38 | } 39 | 40 | AddTable(t, c.Name, line, files) 41 | return nil 42 | } 43 | -------------------------------------------------------------------------------- /internal/pkg/generator/ref_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestGenerateRefColumn(t *testing.T) { 12 | table := model.Table{ 13 | Name: "pet", 14 | Count: 2, 15 | } 16 | 17 | column := model.Column{ 18 | Name: "person_id", 19 | } 20 | 21 | g := RefGenerator{ 22 | Table: "person", 23 | Column: "id", 24 | } 25 | 26 | files := map[string]model.CSVFile{ 27 | "person": { 28 | Header: []string{"id"}, 29 | Lines: [][]string{{"ce9af887-37eb-4e08-9790-4f481b0fa594"}}, 30 | }, 31 | } 32 | err := g.Generate(table, column, files) 33 | assert.Nil(t, err) 34 | assert.Equal(t, "ce9af887-37eb-4e08-9790-4f481b0fa594", files["pet"].Lines[0][0]) 35 | assert.Equal(t, "ce9af887-37eb-4e08-9790-4f481b0fa594", files["pet"].Lines[0][1]) 36 | } 37 | -------------------------------------------------------------------------------- /internal/pkg/generator/set_generator.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | "github.com/codingconcepts/dg/internal/pkg/random" 8 | "github.com/samber/lo" 9 | ) 10 | 11 | // SetGenerator provides additional context to a set column. 12 | type SetGenerator struct { 13 | Values []string `yaml:"values"` 14 | Weights []int `yaml:"weights"` 15 | } 16 | 17 | // Generate selects between a set of values for a given table. 18 | func (g SetGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error { 19 | if len(g.Values) == 0 { 20 | return fmt.Errorf("no values provided for set generator") 21 | } 22 | 23 | count := len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool { 24 | return len(a) > len(b) 25 | })) 26 | 27 | if count == 0 { 28 | count = t.Count 29 | } 30 | 31 | var line []string 32 | if len(g.Weights) > 0 { 33 | items, err := g.buildWeightedItems() 34 | if err != nil { 35 | return fmt.Errorf("making weighted items collection: %w", err) 36 | } 37 | 38 | for i := 0; i < count; i++ { 39 | line = append(line, items.choose()) 40 | } 41 | } else { 42 | for i := 0; i < count; i++ { 43 | line = append(line, g.Values[random.Intn(len(g.Values))]) 44 | } 45 | } 46 | 47 | AddTable(t, c.Name, line, files) 48 | return nil 49 | } 50 | 51 | func (g SetGenerator) buildWeightedItems() (weightedItems, error) { 52 | if len(g.Values) != len(g.Weights) { 53 | return weightedItems{}, fmt.Errorf("set values and weights need to be the same") 54 | } 55 | 56 | weightedItems := make([]weightedItem, len(g.Values)) 57 | for i, v := range g.Values { 58 | weightedItems = append(weightedItems, weightedItem{ 59 | Value: v, 60 | Weight: g.Weights[i], 61 | }) 62 | } 63 | 64 | return makeWeightedItems(weightedItems), nil 65 | } 66 | -------------------------------------------------------------------------------- /internal/pkg/generator/set_generator_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/codingconcepts/dg/internal/pkg/model" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestGenerateSetColumn(t *testing.T) { 12 | table := model.Table{ 13 | Name: "table", 14 | Count: 10, 15 | } 16 | 17 | column := model.Column{ 18 | Name: "id", 19 | } 20 | 21 | g := SetGenerator{ 22 | Values: []string{"a", "b", "c"}, 23 | Weights: []int{0, 1, 0}, 24 | } 25 | 26 | files := map[string]model.CSVFile{} 27 | 28 | err := g.Generate(table, column, files) 29 | assert.Nil(t, err) 30 | assert.Equal(t, 31 | []string([]string{"id"}), 32 | files["table"].Header, 33 | ) 34 | assert.Equal(t, 35 | [][]string{{"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"}}, 36 | files["table"].Lines, 37 | ) 38 | } 39 | -------------------------------------------------------------------------------- /internal/pkg/generator/weighted_random.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "github.com/brianvoe/gofakeit/v6" 5 | "github.com/samber/lo" 6 | ) 7 | 8 | type weightedItem struct { 9 | Value string 10 | Weight int 11 | } 12 | 13 | type weightedItems struct { 14 | items []weightedItem 15 | totalWeight int 16 | } 17 | 18 | func makeWeightedItems(items []weightedItem) weightedItems { 19 | wi := weightedItems{ 20 | items: items, 21 | } 22 | 23 | wi.totalWeight = lo.SumBy(items, func(wi weightedItem) int { 24 | return wi.Weight 25 | }) 26 | 27 | return wi 28 | } 29 | 30 | func (wi weightedItems) choose() string { 31 | randomWeight := gofakeit.IntRange(1, wi.totalWeight) 32 | for _, i := range wi.items { 33 | randomWeight -= i.Weight 34 | if randomWeight <= 0 { 35 | return i.Value 36 | } 37 | } 38 | 39 | return "" 40 | } 41 | -------------------------------------------------------------------------------- /internal/pkg/generator/weighted_random_test.go: -------------------------------------------------------------------------------- 1 | package generator 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMakeWeightedItems(t *testing.T) { 10 | items := makeWeightedItems( 11 | []weightedItem{ 12 | {Value: "a", Weight: 10}, 13 | {Value: "b", Weight: 20}, 14 | {Value: "c", Weight: 30}, 15 | }, 16 | ) 17 | 18 | assert.Equal(t, 60, items.totalWeight) 19 | } 20 | 21 | func TestChoose(t *testing.T) { 22 | cases := []struct { 23 | name string 24 | items []weightedItem 25 | exp []string 26 | }{ 27 | { 28 | name: "3 items 1 has all the weight", 29 | items: []weightedItem{ 30 | {Value: "a", Weight: 100}, 31 | {Value: "b", Weight: 0}, 32 | {Value: "c", Weight: 0}, 33 | }, 34 | exp: []string{"a", "a", "a", "a", "a", "a", "a", "a", "a", "a"}, 35 | }, 36 | } 37 | 38 | for _, c := range cases { 39 | t.Run(c.name, func(t *testing.T) { 40 | items := makeWeightedItems(c.items) 41 | 42 | var act []string 43 | for i := 0; i < 10; i++ { 44 | act = append(act, items.choose()) 45 | } 46 | 47 | assert.Equal(t, c.exp, act) 48 | }) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /internal/pkg/model/config.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | "gopkg.in/yaml.v3" 8 | ) 9 | 10 | // Config represents the entire contents of a config file. 11 | type Config struct { 12 | Tables []Table `yaml:"tables"` 13 | Inputs []Input `yaml:"inputs"` 14 | } 15 | 16 | // Table represents the instructions to create one CSV file. 17 | type Table struct { 18 | Name string `yaml:"name"` 19 | Count int `yaml:"count"` 20 | Suppress bool `yaml:"suppress"` 21 | UniqueColumns []string `yaml:"unique_columns"` 22 | Columns []Column `yaml:"columns"` 23 | } 24 | 25 | // Column represents the instructions to populate one CSV file column. 26 | type Column struct { 27 | Name string `yaml:"name"` 28 | Type string `yaml:"type"` 29 | Suppress bool `yaml:"suppress"` 30 | Generator RawMessage `yaml:"processor"` 31 | } 32 | 33 | // Input represents a data source provided by the user. 34 | type Input struct { 35 | Name string `yaml:"name"` 36 | Type string `yaml:"type"` 37 | Source RawMessage `yaml:"source"` 38 | } 39 | 40 | // Load config from a file 41 | func LoadConfig(r io.Reader) (Config, error) { 42 | var c Config 43 | if err := yaml.NewDecoder(r).Decode(&c); err != nil { 44 | return Config{}, fmt.Errorf("parsing file: %w", err) 45 | } 46 | 47 | return c, nil 48 | } 49 | -------------------------------------------------------------------------------- /internal/pkg/model/config_test.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestLoadConfig(t *testing.T) { 11 | y := ` 12 | inputs: 13 | - name: my_data 14 | type: csv 15 | source: 16 | file_name: my_data.csv 17 | 18 | tables: 19 | - name: person 20 | count: 100 21 | columns: 22 | - name: id 23 | type: inc 24 | processor: 25 | start: 1 26 | format: "P%03d" 27 | ` 28 | 29 | config, err := LoadConfig(strings.NewReader(y)) 30 | assert.Nil(t, err) 31 | 32 | exp := Config{ 33 | Inputs: []Input{ 34 | { 35 | Name: "my_data", 36 | Type: "csv", 37 | Source: ToRawMessage(t, SourceCSV{ 38 | FileName: "my_data.csv", 39 | }), 40 | }, 41 | }, 42 | Tables: []Table{ 43 | { 44 | Name: "person", 45 | Count: 100, 46 | Columns: []Column{ 47 | { 48 | Name: "id", 49 | Type: "inc", 50 | Generator: ToRawMessage(t, map[string]any{ 51 | "start": 1, 52 | "format": "P%03d", 53 | }), 54 | }, 55 | }, 56 | }, 57 | }, 58 | } 59 | 60 | assert.Equal(t, exp.Inputs[0].Name, config.Inputs[0].Name) 61 | assert.Equal(t, exp.Inputs[0].Type, config.Inputs[0].Type) 62 | 63 | var expSource SourceCSV 64 | assert.Nil(t, exp.Inputs[0].Source.UnmarshalFunc(&expSource)) 65 | 66 | var actSource SourceCSV 67 | assert.Nil(t, config.Inputs[0].Source.UnmarshalFunc(&actSource)) 68 | 69 | assert.Equal(t, expSource, actSource) 70 | 71 | assert.Equal(t, exp.Tables[0].Name, config.Tables[0].Name) 72 | assert.Equal(t, exp.Tables[0].Count, config.Tables[0].Count) 73 | assert.Equal(t, exp.Tables[0].Columns[0].Name, config.Tables[0].Columns[0].Name) 74 | assert.Equal(t, exp.Tables[0].Columns[0].Type, config.Tables[0].Columns[0].Type) 75 | 76 | var expProcessor map[string]any 77 | assert.Nil(t, exp.Tables[0].Columns[0].Generator.UnmarshalFunc(&expProcessor)) 78 | 79 | var actProcessor map[string]any 80 | assert.Nil(t, config.Tables[0].Columns[0].Generator.UnmarshalFunc(&actProcessor)) 81 | 82 | assert.Equal(t, expProcessor, actProcessor) 83 | } 84 | -------------------------------------------------------------------------------- /internal/pkg/model/csv_file.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "github.com/samber/lo" 5 | ) 6 | 7 | // CSVFile represents the content of a CSV file. 8 | type CSVFile struct { 9 | Name string 10 | Header []string 11 | Lines [][]string 12 | UniqueColumns []string 13 | Output bool 14 | } 15 | 16 | // Unique removes any duplicates from the CSVFile's lines. 17 | func (c *CSVFile) Unique() [][]string { 18 | uniqueColumnIndexes := uniqueIndexes(c.Header, c.UniqueColumns) 19 | 20 | uniqueValues := map[string]struct{}{} 21 | var uniqueLines [][]string 22 | 23 | for i := 0; i < len(c.Lines); i++ { 24 | key := uniqueKey(uniqueColumnIndexes, c.Lines[i]) 25 | 26 | if _, ok := uniqueValues[key]; !ok { 27 | uniqueLines = append(uniqueLines, c.Lines[i]) 28 | uniqueValues[key] = struct{}{} 29 | } 30 | } 31 | 32 | return uniqueLines 33 | } 34 | 35 | func uniqueIndexes(header, uniqueColumns []string) []int { 36 | indexes := []int{} 37 | 38 | for i, h := range header { 39 | if lo.Contains(uniqueColumns, h) { 40 | indexes = append(indexes, i) 41 | } 42 | } 43 | 44 | return indexes 45 | } 46 | 47 | func uniqueKey(indexes []int, line []string) string { 48 | output := "" 49 | 50 | for i, col := range line { 51 | if lo.Contains(indexes, i) { 52 | output += col 53 | } else { 54 | output += "-" 55 | } 56 | } 57 | 58 | return output 59 | } 60 | -------------------------------------------------------------------------------- /internal/pkg/model/csv_file_test.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestUnique(t *testing.T) { 10 | cases := []struct { 11 | name string 12 | uniqueColumns []string 13 | exp [][]string 14 | }{ 15 | { 16 | name: "1 column unique", 17 | uniqueColumns: []string{"col_1"}, 18 | exp: [][]string{ 19 | {"a", "d", "g"}, 20 | {"b", "d", "g"}, 21 | {"c", "d", "g"}, 22 | }, 23 | }, 24 | { 25 | name: "2 column unique", 26 | uniqueColumns: []string{"col_1", "col_2"}, 27 | exp: [][]string{ 28 | {"a", "d", "g"}, 29 | {"b", "d", "g"}, 30 | {"c", "d", "g"}, 31 | {"a", "e", "g"}, 32 | {"b", "e", "g"}, 33 | {"c", "e", "g"}, 34 | {"a", "f", "g"}, 35 | {"b", "f", "g"}, 36 | {"c", "f", "g"}, 37 | }, 38 | }, 39 | { 40 | name: "3 column unique", 41 | uniqueColumns: []string{"col_1", "col_2", "col_3"}, 42 | exp: [][]string{ 43 | {"a", "d", "g"}, 44 | {"b", "d", "g"}, 45 | {"c", "d", "g"}, 46 | {"a", "e", "g"}, 47 | {"b", "e", "g"}, 48 | {"c", "e", "g"}, 49 | {"a", "f", "g"}, 50 | {"b", "f", "g"}, 51 | {"c", "f", "g"}, 52 | {"a", "d", "h"}, 53 | {"b", "d", "h"}, 54 | {"c", "d", "h"}, 55 | {"a", "e", "h"}, 56 | {"b", "e", "h"}, 57 | {"c", "e", "h"}, 58 | {"a", "f", "h"}, 59 | {"b", "f", "h"}, 60 | {"c", "f", "h"}, 61 | {"a", "d", "i"}, 62 | {"b", "d", "i"}, 63 | {"c", "d", "i"}, 64 | {"a", "e", "i"}, 65 | {"b", "e", "i"}, 66 | {"c", "e", "i"}, 67 | {"a", "f", "i"}, 68 | {"b", "f", "i"}, 69 | {"c", "f", "i"}, 70 | }, 71 | }, 72 | } 73 | 74 | for _, c := range cases { 75 | t.Run(c.name, func(t *testing.T) { 76 | file := CSVFile{ 77 | Header: []string{"col_1", "col_2", "col_3"}, 78 | UniqueColumns: c.uniqueColumns, 79 | Lines: [][]string{ 80 | {"a", "d", "g"}, 81 | {"b", "d", "g"}, 82 | {"c", "d", "g"}, 83 | {"a", "e", "g"}, 84 | {"b", "e", "g"}, 85 | {"c", "e", "g"}, 86 | {"a", "f", "g"}, 87 | {"b", "f", "g"}, 88 | {"c", "f", "g"}, 89 | {"a", "d", "h"}, 90 | {"b", "d", "h"}, 91 | {"c", "d", "h"}, 92 | {"a", "e", "h"}, 93 | {"b", "e", "h"}, 94 | {"c", "e", "h"}, 95 | {"a", "f", "h"}, 96 | {"b", "f", "h"}, 97 | {"c", "f", "h"}, 98 | {"a", "d", "i"}, 99 | {"b", "d", "i"}, 100 | {"c", "d", "i"}, 101 | {"a", "e", "i"}, 102 | {"b", "e", "i"}, 103 | {"c", "e", "i"}, 104 | {"a", "f", "i"}, 105 | {"b", "f", "i"}, 106 | {"c", "f", "i"}, 107 | }, 108 | } 109 | 110 | act := file.Unique() 111 | 112 | assert.Equal(t, c.exp, act) 113 | }) 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /internal/pkg/model/raw_message.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "gopkg.in/yaml.v3" 8 | ) 9 | 10 | // RawMessage does what json.RawMessage does but for YAML. 11 | type RawMessage struct { 12 | UnmarshalFunc func(interface{}) error 13 | } 14 | 15 | func (msg *RawMessage) UnmarshalYAML(unmarshal func(interface{}) error) error { 16 | msg.UnmarshalFunc = unmarshal 17 | return nil 18 | } 19 | 20 | // ToRawMessage converts an object into a model.RawMessage for testing purposes. 21 | func ToRawMessage(t *testing.T, v any) RawMessage { 22 | buf := &bytes.Buffer{} 23 | if err := yaml.NewEncoder(buf).Encode(v); err != nil { 24 | t.Fatalf("error encoding to yaml: %v", err) 25 | } 26 | 27 | var rawMessage RawMessage 28 | if err := yaml.NewDecoder(buf).Decode(&rawMessage); err != nil { 29 | t.Fatalf("error decoding from yaml: %v", err) 30 | } 31 | 32 | return rawMessage 33 | } 34 | -------------------------------------------------------------------------------- /internal/pkg/model/raw_message_test.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "log" 5 | "strings" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | "gopkg.in/yaml.v3" 10 | ) 11 | 12 | func TestRawMessageUnmarshal(t *testing.T) { 13 | type test struct { 14 | R RawMessage `yaml:"r"` 15 | } 16 | 17 | y := `r: hello raw message` 18 | 19 | var tst test 20 | if err := yaml.NewDecoder(strings.NewReader(y)).Decode(&tst); err != nil { 21 | t.Fatalf("error decoding yaml: %v", err) 22 | } 23 | 24 | var s string 25 | if err := tst.R.UnmarshalFunc(&s); err != nil { 26 | log.Fatalf("error decoding yaml: %v", err) 27 | } 28 | 29 | assert.Equal(t, "hello raw message", s) 30 | } 31 | -------------------------------------------------------------------------------- /internal/pkg/model/source.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | // SourceCSV represents a CSV file source. 4 | type SourceCSV struct { 5 | FileName string `yaml:"file_name"` 6 | } 7 | -------------------------------------------------------------------------------- /internal/pkg/random/xorshift.go: -------------------------------------------------------------------------------- 1 | package random 2 | 3 | import "time" 4 | 5 | var ( 6 | r = newSplitMix64(time.Now().UnixNano()) 7 | ) 8 | 9 | type splitMix64 struct { 10 | s uint64 11 | } 12 | 13 | func newSplitMix64(seed int64) *splitMix64 { 14 | return &splitMix64{ 15 | s: uint64(seed), 16 | } 17 | } 18 | 19 | // Intn returns a non-negative pseudo-random int. 20 | func Intn(n int) int { 21 | return int(r.uint64()&(1<<63-1)) % n 22 | 23 | } 24 | 25 | func (x *splitMix64) uint64() uint64 { 26 | x.s = x.s + uint64(0x9E3779B97F4A7C15) 27 | z := x.s 28 | z = (z ^ (z >> 30)) * uint64(0xBF58476D1CE4E5B9) 29 | z = (z ^ (z >> 27)) * uint64(0x94D049BB133111EB) 30 | return z ^ (z >> 31) 31 | } 32 | -------------------------------------------------------------------------------- /internal/pkg/source/csv_source.go: -------------------------------------------------------------------------------- 1 | package source 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "os" 8 | "path" 9 | 10 | "github.com/codingconcepts/dg/internal/pkg/generator" 11 | "github.com/codingconcepts/dg/internal/pkg/model" 12 | ) 13 | 14 | // LoadCSVSource loads a CSV file from disk and adds it as a table to files. 15 | func LoadCSVSource(table, configDir string, s model.SourceCSV, files map[string]model.CSVFile) (err error) { 16 | fullPath := path.Join(configDir, s.FileName) 17 | file, err := os.Open(fullPath) 18 | if err != nil { 19 | return fmt.Errorf("opening csv file: %w", err) 20 | } 21 | defer func() { 22 | if ferr := file.Close(); ferr != nil { 23 | err = ferr 24 | } 25 | }() 26 | 27 | return processCSVSource(file, table, files) 28 | } 29 | 30 | func processCSVSource(file io.Reader, table string, files map[string]model.CSVFile) error { 31 | reader := csv.NewReader(file) 32 | rows, err := reader.ReadAll() 33 | if err != nil { 34 | return fmt.Errorf("reading csv file: %w", err) 35 | } 36 | 37 | headers := rows[0] 38 | columns := generator.Transpose(rows[1:]) 39 | 40 | for i, column := range columns { 41 | generator.AddInput(table, headers[i], column, files) 42 | } 43 | 44 | return nil 45 | } 46 | -------------------------------------------------------------------------------- /internal/pkg/source/csv_source_test.go: -------------------------------------------------------------------------------- 1 | package source 2 | 3 | import ( 4 | "os" 5 | "path" 6 | "testing" 7 | 8 | "github.com/codingconcepts/dg/internal/pkg/model" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestLoadCSVSource(t *testing.T) { 13 | filePath := path.Join(t.TempDir(), "load_test.csv") 14 | assert.NoError(t, os.WriteFile(filePath, []byte("col_a,col_b,col_c\nA,B,C\n1,2,3"), os.ModePerm)) 15 | 16 | table := "input" 17 | files := make(map[string]model.CSVFile) 18 | s := model.SourceCSV{FileName: "load_test.csv"} 19 | 20 | assert.NoError(t, LoadCSVSource(table, path.Dir(filePath), s, files)) 21 | 22 | expCSVFile := model.CSVFile{ 23 | Name: "input", 24 | Header: []string{"col_a", "col_b", "col_c"}, 25 | Lines: [][]string{ 26 | {"A", "1"}, 27 | {"B", "2"}, 28 | {"C", "3"}}, 29 | Output: false} 30 | 31 | assert.Equal(t, expCSVFile, files["input"]) 32 | } 33 | -------------------------------------------------------------------------------- /internal/pkg/ui/time_tracker.go: -------------------------------------------------------------------------------- 1 | package ui 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | // Clock allows time.Now to be mocked in tests. 11 | type Clock interface { 12 | Since(time.Time) time.Duration 13 | } 14 | 15 | // TimerFunc is a function whose execution can be deferred in order to 16 | // time an event. 17 | type TimerFunc func(time.Time, string) 18 | 19 | // TimeTracker returns a new TimerFunc, given a maxMessageLen, which 20 | // determines at which point messages should start to get truncated. 21 | func TimeTracker(w io.Writer, c Clock, maxMessageLen int) TimerFunc { 22 | return func(start time.Time, msg string) { 23 | elapsed := c.Since(start) 24 | switch { 25 | case elapsed > time.Second: 26 | elapsed = elapsed.Round(time.Second) 27 | case elapsed > time.Millisecond: 28 | elapsed = elapsed.Round(time.Millisecond) 29 | default: 30 | elapsed = elapsed.Round(time.Microsecond) 31 | } 32 | 33 | if len(msg) > maxMessageLen { 34 | msg = msg[:maxMessageLen-3] + "..." 35 | } 36 | 37 | padding := strings.Repeat(" ", maxMessageLen-len(msg)) 38 | fmt.Fprintf(w, "%s %stook: %s\n", msg, padding, elapsed) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /internal/pkg/ui/time_tracker_test.go: -------------------------------------------------------------------------------- 1 | package ui 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | type mockClock struct { 12 | elapsed time.Duration 13 | } 14 | 15 | func (c mockClock) Since(time.Time) time.Duration { 16 | return c.elapsed 17 | } 18 | 19 | func TestTimerFunc(t *testing.T) { 20 | cases := []struct { 21 | name string 22 | maxMsgLen int 23 | msg string 24 | elapsed time.Duration 25 | exp string 26 | }{ 27 | { 28 | name: "microsecond scale", 29 | maxMsgLen: 1, 30 | msg: "a", 31 | elapsed: time.Nanosecond * 123500, 32 | exp: "a took: 124µs\n", 33 | }, 34 | { 35 | name: "millisecond scale", 36 | maxMsgLen: 1, 37 | msg: "a", 38 | elapsed: time.Microsecond * 123500, 39 | exp: "a took: 124ms\n", 40 | }, 41 | { 42 | name: "second scale", 43 | maxMsgLen: 1, 44 | msg: "a", 45 | elapsed: time.Millisecond * 123500, 46 | exp: "a took: 2m4s\n", 47 | }, 48 | { 49 | name: "minute scale", 50 | maxMsgLen: 1, 51 | msg: "a", 52 | elapsed: time.Second * 123500, 53 | exp: "a took: 34h18m20s\n", 54 | }, 55 | { 56 | name: "message same as truncate size", 57 | maxMsgLen: 10, 58 | msg: "aaaaaaaaaa", 59 | elapsed: 0, 60 | exp: "aaaaaaaaaa took: 0s\n", 61 | }, 62 | { 63 | name: "message over truncate size", 64 | maxMsgLen: 10, 65 | msg: "aaaaaaaaaaa", 66 | elapsed: 0, 67 | exp: "aaaaaaa... took: 0s\n", 68 | }, 69 | } 70 | 71 | for _, c := range cases { 72 | t.Run(c.name, func(t *testing.T) { 73 | clock := mockClock{ 74 | elapsed: c.elapsed, 75 | } 76 | 77 | buf := new(bytes.Buffer) 78 | tt := TimeTracker(buf, clock, c.maxMsgLen) 79 | 80 | tt(time.Now(), c.msg) 81 | 82 | assert.Equal(t, c.exp, buf.String()) 83 | }) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /internal/pkg/web/file_server.go: -------------------------------------------------------------------------------- 1 | package web 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "net/http" 7 | ) 8 | 9 | // Serve files from the output csv directory on a given port. 10 | // 11 | // Note: This is a blocking call. 12 | func Serve(dir string, port int) error { 13 | fs := http.FileServer(http.Dir(dir)) 14 | http.Handle("/", fs) 15 | 16 | addr := fmt.Sprintf(":%d", port) 17 | 18 | log.Printf("Serving files on %s", addr) 19 | return http.ListenAndServe(addr, nil) 20 | } 21 | --------------------------------------------------------------------------------