├── .github
└── workflows
│ └── coverage.yaml
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── assets
├── cover.png
└── cover_grey.svg
├── dg.go
├── examples
├── const_test
│ └── config.yaml
├── each_match_test
│ ├── config.yaml
│ └── market.csv
├── input_test
│ ├── config.yaml
│ └── significant_dates.csv
├── many_to_many
│ ├── config.yaml
│ ├── create.sql
│ └── insert.sql
├── match_test
│ ├── config.yaml
│ └── market.csv
├── pattern_test
│ └── config.yaml
├── person
│ ├── config.yaml
│ ├── create.sql
│ └── insert.sql
├── range_test
│ └── config.yaml
├── supress_column
│ └── config.yaml
└── unique_test
│ └── config.yaml
├── go.mod
├── go.sum
└── internal
└── pkg
├── generator
├── array.go
├── array_test.go
├── common.go
├── common_test.go
├── const_generator.go
├── const_generator_test.go
├── each_generator.go
├── each_generator_test.go
├── formatter.go
├── gen_generator.go
├── gen_generator_test.go
├── inc_generator.go
├── inc_generator_test.go
├── match_generator.go
├── match_generator_test.go
├── placeholder.go
├── range_generator.go
├── range_generator_test.go
├── ref_generator.go
├── ref_generator_test.go
├── set_generator.go
├── set_generator_test.go
├── weighted_random.go
└── weighted_random_test.go
├── model
├── config.go
├── config_test.go
├── csv_file.go
├── csv_file_test.go
├── raw_message.go
├── raw_message_test.go
└── source.go
├── random
└── xorshift.go
├── source
├── csv_source.go
└── csv_source_test.go
├── ui
├── time_tracker.go
└── time_tracker_test.go
└── web
└── file_server.go
/.github/workflows/coverage.yaml:
--------------------------------------------------------------------------------
1 | name: main-coverage
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 |
8 | jobs:
9 | coverage:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v3
13 |
14 | - name: Set up Go
15 | uses: actions/setup-go@v4
16 | with:
17 | go-version: '1.20'
18 |
19 | - name: Test
20 | run: go test -v -race -covermode=atomic -coverprofile=coverage.out ./...
21 |
22 | - name: Upload coverage reports to Codecov
23 | uses: codecov/codecov-action@v3
24 | env:
25 | CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Binaries for programs and plugins
2 | dg
3 | *.exe
4 | *.exe~
5 | *.dll
6 | *.so
7 | *.dylib
8 |
9 | # Test binary, built with `go test -c`
10 | *.test
11 |
12 | # Output of the go coverage tool, specifically when used with LiteIDE
13 | *.out
14 | *.cov
15 |
16 | # Go workspace file
17 | go.work
18 |
19 | # OS-specific
20 | .DS_Store
21 |
22 | # Application-specific
23 | csvs
24 | releases
25 | scratchpad
26 |
27 | # Code editor
28 | .vscode/
29 |
30 | .idea
31 | *.iml
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Rob Reid
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | validate_version:
2 | ifndef VERSION
3 | $(error VERSION is undefined)
4 | endif
5 |
6 | db:
7 | cockroach demo --insecure --no-example-database
8 |
9 | tables:
10 | cockroach sql --insecure < examples/many_to_many/create.sql
11 |
12 | data_many_to_many:
13 | go run dg.go -c ./examples/many_to_many/config.yaml -o ./csvs/many_to_many -i import.sql
14 |
15 | data_person:
16 | go run dg.go -c ./examples/person/config.yaml -o ./csvs/person
17 |
18 | data_range_test:
19 | go run dg.go -c ./examples/range_test/config.yaml -o ./csvs/range_test
20 |
21 | data_input_test:
22 | go run dg.go -c ./examples/input_test/config.yaml -o ./csvs/input_test
23 |
24 | data_unique_test:
25 | go run dg.go -c ./examples/unique_test/config.yaml -o ./csvs/unique_test
26 |
27 | data_const_test:
28 | go run dg.go -c ./examples/const_test/config.yaml -o ./csvs/const_test
29 |
30 | data_match:
31 | go run dg.go -c ./examples/match_test/config.yaml -o ./csvs/match -i import.sql
32 |
33 | data_each_match:
34 | go run dg.go -c ./examples/each_match_test/config.yaml -o ./csvs/each_match -i import.sql
35 |
36 | data_pattern:
37 | go run dg.go -c ./examples/pattern_test/config.yaml -o ./csvs/pattern_test -i import.sql
38 |
39 | data: data_many_to_many data_person data_range_test data_input_test data_unique_test data_const_test
40 | echo "done"
41 |
42 | file_server:
43 | python3 -m http.server 3000 -d csvs/many_to_many
44 |
45 | import:
46 | cockroach sql --insecure < examples/many_to_many/insert.sql
47 |
48 | test:
49 | go test ./... -v -cover
50 |
51 | cover:
52 | go test -v -coverpkg=./... -coverprofile=profile.cov ./... -count=1
53 | go tool cover -func profile.cov
54 | # go tool cover -html coverage.out
55 |
56 | profile:
57 | go run dg.go -c ./examples/many_to_many/config.yaml -o ./csvs/many_to_many -cpuprofile profile.out
58 | go tool pprof -http=:8080 profile.out
59 |
60 | release: validate_version
61 | # make sure the folder exists
62 | mkdir -p ./releases
63 |
64 | # linux
65 | GOOS=linux go build -ldflags "-X main.version=${VERSION}" -o dg ;\
66 | tar -zcvf ./releases/dg_${VERSION}_linux.tar.gz ./dg ;\
67 |
68 | # macos (arm)
69 | GOOS=darwin GOARCH=arm64 go build -ldflags "-X main.version=${VERSION}" -o dg ;\
70 | tar -zcvf ./releases/dg_${VERSION}_macos_arm64.tar.gz ./dg ;\
71 |
72 | # macos (amd)
73 | GOOS=darwin GOARCH=amd64 go build -ldflags "-X main.version=${VERSION}" -o dg ;\
74 | tar -zcvf ./releases/dg_${VERSION}_macos_amd64.tar.gz ./dg ;\
75 |
76 | # windows
77 | GOOS=windows go build -ldflags "-X main.version=${VERSION}" -o dg ;\
78 | tar -zcvf ./releases/dg_${VERSION}_windows.tar.gz ./dg ;\
79 |
80 | rm ./dg
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | A fast data generator that produces CSV files from generated relational data.
6 |
7 | ## Table of Contents
8 |
9 | 1. [Installation](#installation)
10 | 1. [Usage](#usage)
11 | - Import via [HTTP](#import-via-http)
12 | - Import via [psql](#import-via-psql)
13 | - Import via [nodelocal](#import-via-nodelocal)
14 | 1. [Tables](#tables)
15 | - [gen](#gen)
16 | - [set](#set)
17 | - [inc](#inc)
18 | - [ref](#ref)
19 | - [each](#each)
20 | - [range](#range)
21 | - [match](#match)
22 | 1. [Inputs](#inputs)
23 | - [csv](#csv)
24 | 1. [Functions](#functions)
25 | 1. [Thanks](#thanks)
26 | 1. [Todos](#todos)
27 |
28 | ### Installation
29 |
30 | Find the release that matches your architecture on the [releases](https://github.com/codingconcepts/dg/releases) page.
31 |
32 | Download the tar, extract the executable, and move it into your PATH:
33 |
34 | ```
35 | $ tar -xvf dg_[VERSION]-rc1_macOS.tar.gz
36 | ```
37 |
38 | ### Usage
39 |
40 | ```
41 | $ dg
42 | Usage dg:
43 | -c string
44 | the absolute or relative path to the config file
45 | -cpuprofile string
46 | write cpu profile to file
47 | -i string
48 | write import statements to file
49 | -o string
50 | the absolute or relative path to the output dir (default ".")
51 | -p int
52 | port to serve files from (omit to generate without serving)
53 | -version
54 | display the current version number
55 | ```
56 |
57 | Create a config file. In the following example, we create 10,000 people, 50 events, 5 person types, and then populate the many-to-many `person_event` resolver table with 500,000 rows that represent the Cartesian product between the person and event tables:
58 |
59 | ```yaml
60 | tables:
61 | - name: person
62 | count: 10000
63 | columns:
64 | # Generate a random UUID for each person
65 | - name: id
66 | type: gen
67 | processor:
68 | value: ${uuid}
69 |
70 | - name: event
71 | count: 50
72 | columns:
73 | # Generate a random UUID for each event
74 | - name: id
75 | type: gen
76 | processor:
77 | value: ${uuid}
78 |
79 | - name: person_type
80 | count: 5
81 | columns:
82 | # Generate a random UUID for each person_type
83 | - name: id
84 | type: gen
85 | processor:
86 | value: ${uuid}
87 |
88 | # Generate a random 16 bit number and left-pad it to 5 digits
89 | - name: name
90 | type: gen
91 | processor:
92 | value: ${uint16}
93 | format: "%05d"
94 |
95 | - name: person_event
96 | columns:
97 | # Generate a random UUID for each person_event
98 | - name: id
99 | type: gen
100 | processor:
101 | value: ${uuid}
102 |
103 | # Select a random id from the person_type table
104 | - name: person_type
105 | type: ref
106 | processor:
107 | table: person_type
108 | column: id
109 |
110 | # Generate a person_id column for each id in the person table
111 | - name: person_id
112 | type: each
113 | processor:
114 | table: person
115 | column: id
116 |
117 | # Generate an event_id column for each id in the event table
118 | - name: event_id
119 | type: each
120 | processor:
121 | table: event
122 | column: id
123 | ```
124 |
125 | Run the application:
126 |
127 | ```
128 | $ dg -c your_config_file.yaml -o your_output_dir -p 3000
129 | loaded config file took: 428µs
130 | generated table: person took: 41ms
131 | generated table: event took: 159µs
132 | generated table: person_type took: 42µs
133 | generated table: person_event took: 1s
134 | generated all tables took: 1s
135 | wrote csv: person took: 1ms
136 | wrote csv: event took: 139µs
137 | wrote csv: person_type took: 110µs
138 | wrote csv: person_event took: 144ms
139 | wrote all csvs took: 145ms
140 | ```
141 |
142 | This will output and dg will then run an HTTP server allow you to import the files from localhost.
143 |
144 | ```
145 | your_output_dir
146 | ├── event.csv
147 | ├── person.csv
148 | ├── person_event.csv
149 | └── person_type.csv
150 | ```
151 |
152 | ##### Import via HTTP
153 |
154 | Then import the files as you would any other; here's an example insert into CockroachDB:
155 |
156 | ```sql
157 | IMPORT INTO "person" ("id")
158 | CSV DATA (
159 | 'http://localhost:3000/person.csv'
160 | )
161 | WITH skip='1', nullif = '', allow_quoted_null;
162 |
163 | IMPORT INTO "event" ("id")
164 | CSV DATA (
165 | 'http://localhost:3000/event.csv'
166 | )
167 | WITH skip='1', nullif = '', allow_quoted_null;
168 |
169 | IMPORT INTO "person_type" ("id", "name")
170 | CSV DATA (
171 | 'http://localhost:3000/person_type.csv'
172 | )
173 | WITH skip='1', nullif = '', allow_quoted_null;
174 |
175 | IMPORT INTO "person_event" ("person_id", "event_id", "id", "person_type")
176 | CSV DATA (
177 | 'http://localhost:3000/person_event.csv'
178 | )
179 | WITH skip='1', nullif = '', allow_quoted_null;
180 | ```
181 |
182 | ##### Import via psql
183 |
184 | If you're working with a remote database and have access to the `psql` binary, try importing the CSV file as follows:
185 |
186 | ```sh
187 | psql "postgres://root@localhost:26257/defaultdb?sslmode=disable" \
188 | -c "\COPY public.person (id, full_name, date_of_birth, user_type, favourite_animal) FROM './csvs/person/person.csv' WITH DELIMITER ',' CSV HEADER NULL E''"
189 | ```
190 |
191 | ##### Import via nodelocal
192 |
193 | If you're working with a remote database and have access to the `cockroach` binary, try importing the CSV file as follows:
194 |
195 | ```sh
196 | cockroach nodelocal upload ./csvs/person/person.csv imports/person.csv \
197 | --url "postgres://root@localhost:26257?sslmode=disable"
198 | ```
199 |
200 | Then importing the file as follows:
201 |
202 | ```sql
203 | IMPORT INTO person ("id", "full_name", "date_of_birth", "user_type", "favourite_animal")
204 | CSV DATA (
205 | 'nodelocal://1/imports/person.csv'
206 | ) WITH skip = '1';
207 | ```
208 |
209 | ### Tables
210 |
211 | Table elements instruct dg to generate data for a single table and output it as a csv file. Here are the configuration options for a table:
212 |
213 | ```yaml
214 | tables:
215 | - name: person
216 | unique_columns: [col_a, col_b]
217 | count: 10
218 | columns: ...
219 | ```
220 |
221 | This config generates 10 random rows for the person table. Here's a breakdown of the fields:
222 |
223 | | Field Name | Optional | Description |
224 | | -------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------- |
225 | | name | No | Name of the table. Must be unique. |
226 | | unique_columns | Yes | Removes duplicates from the table based on the column names provided |
227 | | count | Yes | If provided, will determine the number of rows created. If not provided, will be calculated by the current table size. |
228 | | suppress | Yes | If `true` the table won't be written to a CSV. Useful when you need to generate intermediate tables to combine data locally. |
229 | | columns | No | A collection of columns to generate for the table. |
230 |
231 | #### Processors
232 |
233 | dg takes its configuration from a config file that is parsed in the form of an object containing arrays of objects; `tables` and `inputs`. Each object in the `tables` array represents a CSV file to be generated for a named table and contains a collection of columns to generate data for.
234 |
235 | ##### gen
236 |
237 | Generate a random value for the column. Here's an example:
238 |
239 | ```yaml
240 | - name: sku
241 | type: gen
242 | processor:
243 | value: SKU${uint16}
244 | format: "%05d"
245 | ```
246 |
247 | This configuration will generate a random left-padded `uint16` with a prefix of "SKU" for a column called "sku". `value` contains zero or more function placeholders that can be used to generate data. A list of available functions can be found [here](https://github.com/codingconcepts/dg#functions).
248 |
249 | Generate a pattern-based value for the column. Here's an example:
250 |
251 | ```yaml
252 | - name: phone
253 | type: gen
254 | processor:
255 | pattern: \d{3}-\d{3}-\d{4}
256 | ```
257 |
258 | This configuration will generate US-format phone number, like 123-456-7890.
259 |
260 | ##### const
261 |
262 | Provide a constant set of values for a column. Here's an example:
263 |
264 | ```yaml
265 | - name: options
266 | type: const
267 | processor:
268 | values: [bed_breakfast, bed]
269 | ```
270 |
271 | This configuration will create a column containing two rows.
272 |
273 | ##### set
274 |
275 | Select a value from a given set. Here's an example:
276 |
277 | ```yaml
278 | - name: user_type
279 | type: set
280 | processor:
281 | values: [admin, regular, read-only]
282 | ```
283 |
284 | This configuration will select between the values "admin", "regular", and "read-only"; each with an equal probability of being selected.
285 |
286 | Items in a set can also be given a weight, which will affect their likelihood of being selected. Here's an example:
287 |
288 | ```yaml
289 | - name: favourite_animal
290 | type: set
291 | processor:
292 | values: [rabbit, dog, cat]
293 | weights: [10, 60, 30]
294 | ```
295 |
296 | This configuration will select between the values "rabbit", "dog", and "cat"; each with different probabilities of being selected. Rabbits will be selected approximately 10% of the time, dogs 60%, and cats 30%. The total value doesn't have to be 100, however, you can use whichever numbers make most sense to you.
297 |
298 | ##### inc
299 |
300 | Generates an incrementing number. Here's an example:
301 |
302 | ```yaml
303 | - name: id
304 | type: inc
305 | processor:
306 | start: 1
307 | format: "P%03d"
308 | ```
309 |
310 | This configuration will generate left-padded ids starting from 1, and format them with a prefix of "P".
311 |
312 | ##### ref
313 |
314 | References a value from a previously generated table. Here's an example:
315 |
316 | ```yaml
317 | - name: ptype
318 | type: ref
319 | processor:
320 | table: person_type
321 | column: id
322 | ```
323 |
324 | This configuration will choose a random id from the person_type table and create a `ptype` column to store the values.
325 |
326 | Use the `ref` type if you need to reference another table but don't need to generate a new row for _every_ instance of the referenced column.
327 |
328 | ##### each
329 |
330 | Creates a row for each value in another table. If multiple `each` columns are provided, a Cartesian product of both columns will be generated.
331 |
332 | Here's an example of one `each` column:
333 |
334 | ```yaml
335 | - name: person
336 | count: 3
337 | columns:
338 | - name: id
339 | type: gen
340 | processor:
341 | value: ${uuid}
342 |
343 | # person
344 | #
345 | # id
346 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695
347 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea
348 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9
349 |
350 | - name: pet
351 | columns:
352 | - name: person_id
353 | type: each
354 | processor:
355 | table: person
356 | column: id
357 | - name: name
358 | type: gen
359 | processor:
360 | value: first_name
361 | # pet
362 | #
363 | # person_id name
364 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 Carlo
365 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea Armando
366 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 Kailey
367 | ```
368 |
369 | Here's an example of two `each` columns:
370 |
371 | ```yaml
372 | - name: person
373 | count: 3
374 | columns:
375 | - name: id
376 | type: gen
377 | processor:
378 | value: ${uuid}
379 |
380 | # person
381 | #
382 | # id
383 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695
384 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea
385 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9
386 |
387 | - name: event
388 | count: 3
389 | columns:
390 | - name: id
391 | type: gen
392 | processor:
393 | value: ${uuid}
394 |
395 | # event
396 | #
397 | # id
398 | # 39faeb54-67d1-46db-a38b-825b41bfe919
399 | # 7be981a9-679b-432a-8a0f-4a0267170c68
400 | # 9954f321-8040-4cd7-96e6-248d03ee9266
401 |
402 | - name: person_event
403 | columns:
404 | - name: person_id
405 | type: each
406 | processor:
407 | table: person
408 | column: id
409 | - name: event_id
410 | type: each
411 | processor:
412 | table: event
413 | column: id
414 | # person_event
415 | #
416 | # person_id
417 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 39faeb54-67d1-46db-a38b-825b41bfe919
418 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 7be981a9-679b-432a-8a0f-4a0267170c68
419 | # c40819f8-2c76-44dd-8c44-5eef6a0f2695 9954f321-8040-4cd7-96e6-248d03ee9266
420 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 39faeb54-67d1-46db-a38b-825b41bfe919
421 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 7be981a9-679b-432a-8a0f-4a0267170c68
422 | # 58f42be2-6cc9-4a8c-b702-c72ab1decfea 9954f321-8040-4cd7-96e6-248d03ee9266
423 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 39faeb54-67d1-46db-a38b-825b41bfe919
424 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 7be981a9-679b-432a-8a0f-4a0267170c68
425 | # ccbc2244-667b-4bb5-a5cd-a1e9626a90f9 9954f321-8040-4cd7-96e6-248d03ee9266
426 | ```
427 |
428 | Use the `each` type if you need to reference another table and need to generate a new row for _every_ instance of the referenced column.
429 |
430 | ##### range
431 |
432 | Generates data within a given range. Note that a number of factors determine how this generator will behave. The step (and hence, number of rows) will be generated in the following priority order:
433 |
434 | 1. If an `each` generator is being used, step will be derived from that
435 | 1. If a `count` is provided, step will be derived from that
436 | 1. Otherwise, `step` will be used
437 |
438 | Here's an example that generates monotonically increasing ids for a table, starting from 1:
439 |
440 | ```yaml
441 | - name: users
442 | count: 10000
443 | columns:
444 | - name: id
445 | type: range
446 | processor:
447 | type: int
448 | from: 1
449 | step: 1
450 | ```
451 |
452 | Here's an example that generates all dates between `2020-01-01` and `2023-01-01` at daily intervals:
453 |
454 | ```yaml
455 | - name: event
456 | columns:
457 | - name: date
458 | type: range
459 | processor:
460 | type: date
461 | from: 2020-01-01
462 | to: 2023-01-01
463 | step: 24h
464 | format: 2006-01-02
465 | ```
466 |
467 | Here's an example that generates 10 dates between `2020-01-01` and `2023-01-02`:
468 |
469 | ```yaml
470 | - name: event
471 | count: 10
472 | columns:
473 | - name: date
474 | type: range
475 | processor:
476 | type: date
477 | from: 2020-01-01
478 | to: 2023-01-01
479 | format: 2006-01-02
480 | step: 24h # Ignored due to table count.
481 | ```
482 |
483 | Here's an example that generates 20 dates (one for every row found from an `each` generator) between `2020-01-01` and `2023-01-02`:
484 |
485 | ```yaml
486 | - name: person
487 | count: 20
488 | columns:
489 | - name: id
490 | type: gen
491 | processor:
492 | value: ${uuid}
493 |
494 | - name: event
495 | count: 10 # Ignored due to resulting count from "each" generator.
496 | columns:
497 | - name: person_id
498 | type: each
499 | processor:
500 | table: person
501 | column: id
502 |
503 | - name: date
504 | type: range
505 | processor:
506 | type: date
507 | from: 2020-01-01
508 | to: 2023-01-01
509 | format: 2006-01-02
510 | ```
511 |
512 | The range generate currently supports the following data types:
513 |
514 | - `date` - Generate dates between a from and to value
515 | - `int` - Generate integers between a from and to value
516 |
517 | ##### match
518 |
519 | Generates data by matching data in another table. In this example, we'll assume there's a CSV file for the `significant_event` input that generates the following table:
520 |
521 | | date | event |
522 | | ---------- | ----- |
523 | | 2023-01-10 | abc |
524 | | 2023-01-11 | |
525 | | 2023-01-12 | def |
526 |
527 | ```yaml
528 | inputs:
529 | - name: significant_event
530 | type: csv
531 | source:
532 | file_name: significant_dates.csv
533 |
534 | tables:
535 | - name: events
536 | columns:
537 | - name: timeline_date
538 | type: range
539 | processor:
540 | type: date
541 | from: 2023-01-09
542 | to: 2023-01-13
543 | format: 2006-01-02
544 | step: 24h
545 | - name: timeline_event
546 | type: match
547 | processor:
548 | source_table: significant_event
549 | source_column: date
550 | source_value: events
551 | match_column: timeline_date
552 | ```
553 |
554 | dg will match rows in the significant_event table with rows in the events table based on the match between `significant_event.date` and `events.timeline_date`, and take the value from the `significant_events.event` column where there's a match (otherwise leaving `NULL`). This will result in the following `events` table being generated:
555 |
556 | | timeline_date | timeline_event |
557 | | ------------- | -------------- |
558 | | 2023-01-09 | |
559 | | 2023-01-10 | abc |
560 | | 2023-01-11 | |
561 | | 2023-01-12 | def |
562 | | 2023-01-13 | |
563 |
564 | ### Inputs
565 |
566 | dg takes its configuration from a config file that is parsed in the form of an object containing arrays of objects; `tables` and `inputs`. Each object in the `inputs` array represents a data source from which a table can be created. Tables created via inputs will not result in output CSVs.
567 |
568 | ##### csv
569 |
570 | Reads in a CSV file as a table that can be referenced from other tables. Here's an example:
571 |
572 | ```yaml
573 | - name: significant_event
574 | type: csv
575 | source:
576 | file_name: significant_dates.csv
577 | ```
578 |
579 | This configuration will read from a file called significant_dates.csv and create a table from its contents. Note that the `file_name` should be relative to the config directory, so if your CSV file is in the same directory as your config file, just include the file name.
580 |
581 | ### Functions
582 |
583 | | Name | Type | Example |
584 | | ------------------------------ | --------- | --------------------------------------------------------------------------------------------------------- |
585 | | ${ach_account} | string | 586981797546 |
586 | | ${ach_routing} | string | 441478502 |
587 | | ${adjective_demonstrative} | string | there |
588 | | ${adjective_descriptive} | string | eager |
589 | | ${adjective_indefinite} | string | several |
590 | | ${adjective_interrogative} | string | whose |
591 | | ${adjective_possessive} | string | her |
592 | | ${adjective_proper} | string | Iraqi |
593 | | ${adjective_quantitative} | string | sufficient |
594 | | ${adjective} | string | double |
595 | | ${adverb_degree} | string | far |
596 | | ${adverb_frequency_definite} | string | daily |
597 | | ${adverb_frequency_indefinite} | string | always |
598 | | ${adverb_manner} | string | unexpectedly |
599 | | ${adverb_place} | string | here |
600 | | ${adverb_time_definite} | string | yesterday |
601 | | ${adverb_time_indefinite} | string | just |
602 | | ${adverb} | string | far |
603 | | ${animal_type} | string | mammals |
604 | | ${animal} | string | ape |
605 | | ${app_author} | string | RedLaser |
606 | | ${app_name} | string | SlateBlueweek |
607 | | ${app_version} | string | 3.2.10 |
608 | | ${bitcoin_address} | string | 16YmZ5ol5aXKjilZT2c2nIeHpbq |
609 | | ${bitcoin_private_key} | string | 5JzwyfrpHRoiA59Y1Pd9yLq52cQrAXxSNK4QrGrRUxkak5Howhe |
610 | | ${bool} | bool | true |
611 | | ${breakfast} | string | Awesome orange chocolate muffins |
612 | | ${bs} | string | leading-edge |
613 | | ${car_fuel_type} | string | LPG |
614 | | ${car_maker} | string | Seat |
615 | | ${car_model} | string | Camry Solara Convertible |
616 | | ${car_transmission_type} | string | Manual |
617 | | ${car_type} | string | Passenger car mini |
618 | | ${chrome_user_agent} | string | Mozilla/5.0 (X11; Linux i686) AppleWebKit/5310 (KHTML, like Gecko) Chrome/37.0.882.0 Mobile Safari/5310 |
619 | | ${city} | string | Memphis |
620 | | ${color} | string | DarkBlue |
621 | | ${company_suffix} | string | LLC |
622 | | ${company} | string | PlanetEcosystems |
623 | | ${connective_casual} | string | an effect of |
624 | | ${connective_complaint} | string | i.e. |
625 | | ${connective_examplify} | string | for example |
626 | | ${connective_listing} | string | next |
627 | | ${connective_time} | string | soon |
628 | | ${connective} | string | for instance |
629 | | ${country_abr} | string | VU |
630 | | ${country} | string | Eswatini |
631 | | ${credit_card_cvv} | string | 315 |
632 | | ${credit_card_exp} | string | 06/28 |
633 | | ${credit_card_type} | string | Mastercard |
634 | | ${currency_long} | string | Mozambique Metical |
635 | | ${currency_short} | string | SCR |
636 | | ${date} | time.Time | 2005-01-25 22:17:55.371781952 +0000 UTC |
637 | | ${day} | int | 27 |
638 | | ${dessert} | string | Chocolate coconut dream bars |
639 | | ${dinner} | string | Creole potato salad |
640 | | ${domain_name} | string | centralb2c.net |
641 | | ${domain_suffix} | string | com |
642 | | ${email} | string | ethanlebsack@lynch.name |
643 | | ${emoji} | string | ♻️ |
644 | | ${file_extension} | string | csv |
645 | | ${file_mime_type} | string | image/vasa |
646 | | ${firefox_user_agent} | string | Mozilla/5.0 (X11; Linux x86_64; rv:6.0) Gecko/1951-07-21 Firefox/37.0 |
647 | | ${first_name} | string | Kailee |
648 | | ${flipacoin} | string | Tails |
649 | | ${float32} | float32 | 2.7906555e+38 |
650 | | ${float64} | float64 | 4.314310154193861e+307 |
651 | | ${fruit} | string | Eggplant |
652 | | ${gender} | string | female |
653 | | ${hexcolor} | string | #6daf06 |
654 | | ${hobby} | string | Bowling |
655 | | ${hour} | int | 18 |
656 | | ${http_method} | string | DELETE |
657 | | ${http_status_code_simple} | int | 404 |
658 | | ${http_status_code} | int | 503 |
659 | | ${http_version} | string | HTTP/1.1 |
660 | | ${int16} | int16 | 18940 |
661 | | ${int32} | int32 | 2129368442 |
662 | | ${int64} | int64 | 5051946056392951363 |
663 | | ${int8} | int8 | 110 |
664 | | ${ipv4_address} | string | 191.131.155.85 |
665 | | ${ipv6_address} | string | 1642:94b:52d8:3a4e:38bc:4d87:846e:9c83 |
666 | | ${job_descriptor} | string | Senior |
667 | | ${job_level} | string | Identity |
668 | | ${job_title} | string | Executive |
669 | | ${language_abbreviation} | string | kn |
670 | | ${language} | string | Bengali |
671 | | ${last_name} | string | Friesen |
672 | | ${latitude} | float64 | 45.919913 |
673 | | ${longitude} | float64 | -110.313125 |
674 | | ${lunch} | string | Sweet and sour pork balls |
675 | | ${mac_address} | string | bd:e8:ce:66:da:5b |
676 | | ${minute} | int | 23 |
677 | | ${month_string} | string | April |
678 | | ${month} | int | 10 |
679 | | ${name_prefix} | string | Ms. |
680 | | ${name_suffix} | string | I |
681 | | ${name} | string | Paxton Schumm |
682 | | ${nanosecond} | int | 349669923 |
683 | | ${nicecolors} | []string | [#490a3d #bd1550 #e97f02 #f8ca00 #8a9b0f] |
684 | | ${noun_abstract} | string | timing |
685 | | ${noun_collective_animal} | string | brace |
686 | | ${noun_collective_people} | string | mob |
687 | | ${noun_collective_thing} | string | orchard |
688 | | ${noun_common} | string | problem |
689 | | ${noun_concrete} | string | town |
690 | | ${noun_countable} | string | cat |
691 | | ${noun_uncountable} | string | wisdom |
692 | | ${noun} | string | case |
693 | | ${opera_user_agent} | string | Opera/10.10 (Windows NT 5.01; en-US) Presto/2.11.165 Version/13.00 |
694 | | ${password} | string | 1k0vWN 9Z | 4f={B YPRda4ys. |
695 | | ${pet_name} | string | Bernadette |
696 | | ${phone_formatted} | string | (476)455-2253 |
697 | | ${phone} | string | 2692528685 |
698 | | ${phrase} | string | I'm straight |
699 | | ${preposition_compound} | string | ahead of |
700 | | ${preposition_double} | string | next to |
701 | | ${preposition_simple} | string | at |
702 | | ${preposition} | string | outside of |
703 | | ${programming_language} | string | PL/SQL |
704 | | ${pronoun_demonstrative} | string | those |
705 | | ${pronoun_interrogative} | string | whom |
706 | | ${pronoun_object} | string | us |
707 | | ${pronoun_personal} | string | I |
708 | | ${pronoun_possessive} | string | mine |
709 | | ${pronoun_reflective} | string | yourself |
710 | | ${pronoun_relative} | string | whom |
711 | | ${pronoun} | string | those |
712 | | ${quote} | string | "Raw denim tilde cronut mlkshk photo booth kickstarter." - Gunnar Rice |
713 | | ${rgbcolor} | []int | [152 74 172] |
714 | | ${safari_user_agent} | string | Mozilla/5.0 (Windows; U; Windows 95) AppleWebKit/536.41.5 (KHTML, like Gecko) Version/5.2 Safari/536.41.5 |
715 | | ${safecolor} | string | gray |
716 | | ${second} | int | 58 |
717 | | ${snack} | string | Crispy fried chicken spring rolls |
718 | | ${ssn} | string | 783135577 |
719 | | ${state_abr} | string | AL |
720 | | ${state} | string | Kentucky |
721 | | ${street_name} | string | Way |
722 | | ${street_number} | string | 6234 |
723 | | ${street_prefix} | string | Port |
724 | | ${street_suffix} | string | stad |
725 | | ${street} | string | 11083 Lake Fall mouth |
726 | | ${time_zone_abv} | string | ADT |
727 | | ${time_zone_full} | string | (UTC-02:00) Coordinated Universal Time-02 |
728 | | ${time_zone_offset} | float32 | 3 |
729 | | ${time_zone_region} | string | Asia/Aqtau |
730 | | ${time_zone} | string | Mountain Standard Time (Mexico) |
731 | | ${uint128_hex} | string | 0xcd50930d5bc0f2e8fa36205e3d7bd7b2 |
732 | | ${uint16_hex} | string | 0x7c80 |
733 | | ${uint16} | uint16 | 25076 |
734 | | ${uint256_hex} | string | 0x61334b8c51fa841bf9a3f1f0ac3750cd1b51ca2046b0fb75627ac73001f0c5aa |
735 | | ${uint32_hex} | string | 0xfe208664 |
736 | | ${uint32} | uint32 | 783098878 |
737 | | ${uint64_hex} | string | 0xc8b91dc44e631956 |
738 | | ${uint64} | uint64 | 5722659847801560283 |
739 | | ${uint8_hex} | string | 0x65 |
740 | | ${uint8} | uint8 | 192 |
741 | | ${url} | string | https://www.leadcutting-edge.net/productize |
742 | | ${user_agent} | string | Opera/10.64 (Windows NT 5.2; en-US) Presto/2.13.295 Version/10.00 |
743 | | ${username} | string | Gutmann2845 |
744 | | ${uuid} | string | e6e34ff4-1def-41e5-9afb-f697a51c0359 |
745 | | ${vegetable} | string | Tomato |
746 | | ${verb_action} | string | knit |
747 | | ${verb_helping} | string | did |
748 | | ${verb_linking} | string | has |
749 | | ${verb} | string | be |
750 | | ${weekday} | string | Tuesday |
751 | | ${word} | string | month |
752 | | ${year} | int | 1962 |
753 | | ${zip} | string | 45618 |
754 |
755 | ### Building releases locally
756 |
757 | ```
758 | $ VERSION=0.1.0 make release
759 | ```
760 |
761 | ### Thanks
762 |
763 | Thanks to the maintainers of the following fantastic packages, whose code this tools makes use of:
764 |
765 | - [samber/lo](https://github.com/samber/lo)
766 | - [brianvoe/gofakeit](https://github.com/brianvoe/gofakeit)
767 | - [go-yaml/yaml](https://github.com/go-yaml/yaml)
768 | - [stretchr/testify](github.com/stretchr/testify/assert)
769 |
770 | ### Todos
771 |
772 | - Improve code coverage
773 | - Write file after generating, then only keep columns that other tables need
774 | - Support for range without a table count (e.g. the following results in zero rows unless a count is provided)
775 |
776 | ```yaml
777 | - name: bet_types
778 | count: 3
779 | columns:
780 | - name: id
781 | type: range
782 | processor:
783 | type: int
784 | from: 1
785 | step: 1
786 | - name: description
787 | type: const
788 | processor:
789 | values: [Win, Lose, Draw]
790 | ```
791 |
--------------------------------------------------------------------------------
/assets/cover.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/codingconcepts/dg/e0a02910aab9dabf73a19c087be2a730c5d96b1c/assets/cover.png
--------------------------------------------------------------------------------
/assets/cover_grey.svg:
--------------------------------------------------------------------------------
1 |
62 |
--------------------------------------------------------------------------------
/dg.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/csv"
5 | "flag"
6 | "fmt"
7 | "log"
8 | "os"
9 | "path"
10 | "runtime/pprof"
11 | "strings"
12 | "text/template"
13 | "time"
14 |
15 | "github.com/codingconcepts/dg/internal/pkg/generator"
16 | "github.com/codingconcepts/dg/internal/pkg/model"
17 | "github.com/codingconcepts/dg/internal/pkg/source"
18 | "github.com/codingconcepts/dg/internal/pkg/ui"
19 | "github.com/codingconcepts/dg/internal/pkg/web"
20 | "github.com/samber/lo"
21 | )
22 |
23 | var (
24 | version string
25 | )
26 |
27 | func main() {
28 | log.SetFlags(0)
29 |
30 | configPath := flag.String("c", "", "the absolute or relative path to the config file")
31 | outputDir := flag.String("o", ".", "the absolute or relative path to the output dir")
32 | createImports := flag.String("i", "", "write import statements to file")
33 | cpuprofile := flag.String("cpuprofile", "", "write cpu profile to file")
34 | versionFlag := flag.Bool("version", false, "display the current version number")
35 | port := flag.Int("p", 0, "port to serve files from (omit to generate without serving)")
36 | flag.Parse()
37 |
38 | if *cpuprofile != "" {
39 | defer launchProfiler(*cpuprofile)()
40 | }
41 |
42 | if *versionFlag {
43 | fmt.Println(version)
44 | return
45 | }
46 |
47 | if *configPath == "" {
48 | flag.Usage()
49 | os.Exit(2)
50 | }
51 |
52 | tt := ui.TimeTracker(os.Stdout, realClock{}, 40)
53 | defer tt(time.Now(), "done")
54 |
55 | c, err := loadConfig(*configPath, tt)
56 | if err != nil {
57 | log.Fatalf("error loading config: %v", err)
58 | }
59 |
60 | files := make(map[string]model.CSVFile)
61 |
62 | if err = loadInputs(c, path.Dir(*configPath), tt, files); err != nil {
63 | log.Fatalf("error loading inputs: %v", err)
64 | }
65 |
66 | if err = generateTables(c, tt, files); err != nil {
67 | log.Fatalf("error generating tables: %v", err)
68 | }
69 |
70 | if err = removeSuppressedColumns(c, tt, files); err != nil {
71 | log.Fatalf("error removing supressed columns: %v", err)
72 | }
73 |
74 | if err := writeFiles(*outputDir, files, tt); err != nil {
75 | log.Fatalf("error writing csv files: %v", err)
76 | }
77 |
78 | if *createImports != "" {
79 | if err := writeImports(*outputDir, *createImports, c, files, tt); err != nil {
80 | log.Fatalf("error writing import statements: %v", err)
81 | }
82 | }
83 |
84 | if *port == 0 {
85 | return
86 | }
87 |
88 | log.Fatal(web.Serve(*outputDir, *port))
89 | }
90 |
91 | func loadConfig(filename string, tt ui.TimerFunc) (model.Config, error) {
92 | defer tt(time.Now(), "loaded config file")
93 |
94 | file, err := os.Open(filename)
95 | if err != nil {
96 | return model.Config{}, fmt.Errorf("opening file: %w", err)
97 | }
98 | defer file.Close()
99 |
100 | return model.LoadConfig(file)
101 | }
102 |
103 | func loadInputs(c model.Config, configDir string, tt ui.TimerFunc, files map[string]model.CSVFile) error {
104 | defer tt(time.Now(), "loaded data sources")
105 |
106 | for _, input := range c.Inputs {
107 | if err := loadInput(input, configDir, tt, files); err != nil {
108 | return fmt.Errorf("loading input for %q: %w", input.Name, err)
109 | }
110 | }
111 |
112 | return nil
113 | }
114 |
115 | func loadInput(input model.Input, configDir string, tt ui.TimerFunc, files map[string]model.CSVFile) error {
116 | defer tt(time.Now(), fmt.Sprintf("loaded data source: %s", input.Name))
117 |
118 | switch input.Type {
119 | case "csv":
120 | var s model.SourceCSV
121 | if err := input.Source.UnmarshalFunc(&s); err != nil {
122 | return fmt.Errorf("parsing csv source for %s: %w", input.Name, err)
123 | }
124 |
125 | if err := source.LoadCSVSource(input.Name, configDir, s, files); err != nil {
126 | return fmt.Errorf("loading csv for %s: %w", input.Name, err)
127 | }
128 | }
129 |
130 | return nil
131 | }
132 |
133 | func generateTables(c model.Config, tt ui.TimerFunc, files map[string]model.CSVFile) error {
134 | defer tt(time.Now(), "generated all tables")
135 |
136 | for _, table := range c.Tables {
137 | if err := generateTable(table, files, tt); err != nil {
138 | return fmt.Errorf("generating csv file for %q: %w", table.Name, err)
139 | }
140 | }
141 |
142 | return nil
143 | }
144 |
145 | func generateTable(t model.Table, files map[string]model.CSVFile, tt ui.TimerFunc) error {
146 | defer tt(time.Now(), fmt.Sprintf("generated table: %s", t.Name))
147 |
148 | // Create the Cartesian product of any each types first.
149 | var eg generator.EachGenerator
150 | if err := eg.Generate(t, files); err != nil {
151 | return fmt.Errorf("generating each columns: %w", err)
152 | }
153 |
154 | // Create any const columns next.
155 | var cg generator.ConstGenerator
156 | if err := cg.Generate(t, files); err != nil {
157 | return fmt.Errorf("generating const columns: %w", err)
158 | }
159 |
160 | for _, col := range t.Columns {
161 | switch col.Type {
162 | case "ref":
163 | var g generator.RefGenerator
164 | if err := col.Generator.UnmarshalFunc(&g); err != nil {
165 | return fmt.Errorf("parsing ref process for %s.%s: %w", t.Name, col.Name, err)
166 | }
167 | if err := g.Generate(t, col, files); err != nil {
168 | return fmt.Errorf("running ref process for %s.%s: %w", t.Name, col.Name, err)
169 | }
170 |
171 | case "gen":
172 | var g generator.GenGenerator
173 | if err := col.Generator.UnmarshalFunc(&g); err != nil {
174 | return fmt.Errorf("parsing each process for %s: %w", col.Name, err)
175 | }
176 | if err := g.Generate(t, col, files); err != nil {
177 | return fmt.Errorf("running gen process for %s.%s: %w", t.Name, col.Name, err)
178 | }
179 |
180 | case "set":
181 | var g generator.SetGenerator
182 | if err := col.Generator.UnmarshalFunc(&g); err != nil {
183 | return fmt.Errorf("parsing set process for %s.%s: %w", t.Name, col.Name, err)
184 | }
185 | if err := g.Generate(t, col, files); err != nil {
186 | return fmt.Errorf("running set process for %s.%s: %w", t.Name, col.Name, err)
187 | }
188 |
189 | // case "const":
190 | // var g generator.ConstGenerator
191 | // if err := col.Generator.UnmarshalFunc(&g); err != nil {
192 | // return fmt.Errorf("parsing const process for %s.%s: %w", t.Name, col.Name, err)
193 | // }
194 | // if err := g.Generate(t, col, files); err != nil {
195 | // return fmt.Errorf("running const process for %s.%s: %w", t.Name, col.Name, err)
196 | // }
197 |
198 | case "inc":
199 | var g generator.IncGenerator
200 | if err := col.Generator.UnmarshalFunc(&g); err != nil {
201 | return fmt.Errorf("parsing each process for %s: %w", col.Name, err)
202 | }
203 | if err := g.Generate(t, col, files); err != nil {
204 | return fmt.Errorf("running inc process for %s.%s: %w", t.Name, col.Name, err)
205 | }
206 |
207 | case "range":
208 | var g generator.RangeGenerator
209 | if err := col.Generator.UnmarshalFunc(&g); err != nil {
210 | return fmt.Errorf("parsing range process for %s: %w", col.Name, err)
211 | }
212 | if err := g.Generate(t, col, files); err != nil {
213 | return fmt.Errorf("running range process for %s.%s: %w", t.Name, col.Name, err)
214 | }
215 |
216 | case "match":
217 | var g generator.MatchGenerator
218 | if err := col.Generator.UnmarshalFunc(&g); err != nil {
219 | return fmt.Errorf("parsing match process for %s: %w", col.Name, err)
220 | }
221 | if err := g.Generate(t, col, files); err != nil {
222 | return fmt.Errorf("running match process for %s.%s: %w", t.Name, col.Name, err)
223 | }
224 | }
225 | }
226 |
227 | file, ok := files[t.Name]
228 | if !ok {
229 | return fmt.Errorf("missing table: %q", t.Name)
230 | }
231 |
232 | if len(file.UniqueColumns) > 0 {
233 | file.Lines = generator.Transpose(file.Lines)
234 | file.Lines = file.Unique()
235 | file.Lines = generator.Transpose(file.Lines)
236 | }
237 | files[t.Name] = file
238 |
239 | return nil
240 | }
241 |
242 | func removeSuppressedColumns(c model.Config, tt ui.TimerFunc, files map[string]model.CSVFile) error {
243 | defer tt(time.Now(), "removed suppressed columns")
244 |
245 | for _, table := range c.Tables {
246 | for _, column := range table.Columns {
247 | if !column.Suppress {
248 | continue
249 | }
250 |
251 | file, ok := files[table.Name]
252 | if !ok {
253 | return fmt.Errorf("missing table: %q", table.Name)
254 | }
255 |
256 | // Remove suppressed column from header.
257 | var headerIndex int
258 | file.Header = lo.Reject(file.Header, func(v string, i int) bool {
259 | if v == column.Name {
260 | headerIndex = i
261 | return true
262 | }
263 | return false
264 | })
265 |
266 | // Remove suppressed column from lines.
267 | file.Lines = append(file.Lines[:headerIndex], file.Lines[headerIndex+1:]...)
268 |
269 | files[table.Name] = file
270 | }
271 | }
272 |
273 | return nil
274 | }
275 |
276 | func writeFiles(outputDir string, cfs map[string]model.CSVFile, tt ui.TimerFunc) error {
277 | defer tt(time.Now(), "wrote all csvs")
278 |
279 | if err := os.MkdirAll(outputDir, os.ModePerm); err != nil {
280 | return fmt.Errorf("creating output directory: %w", err)
281 | }
282 |
283 | for name, file := range cfs {
284 | if !file.Output {
285 | continue
286 | }
287 |
288 | if err := writeFile(outputDir, name, file, tt); err != nil {
289 | return fmt.Errorf("writing file %q: %w", file.Name, err)
290 | }
291 | }
292 |
293 | return nil
294 | }
295 |
296 | func writeFile(outputDir, name string, cf model.CSVFile, tt ui.TimerFunc) error {
297 | defer tt(time.Now(), fmt.Sprintf("wrote csv: %s", name))
298 |
299 | fullPath := path.Join(outputDir, fmt.Sprintf("%s.csv", name))
300 | file, err := os.Create(fullPath)
301 | if err != nil {
302 | return fmt.Errorf("creating csv file %q: %w", name, err)
303 | }
304 | defer file.Close()
305 |
306 | writer := csv.NewWriter(file)
307 | if err = writer.Write(cf.Header); err != nil {
308 | return fmt.Errorf("writing csv header for %q: %w", name, err)
309 | }
310 |
311 | cf.Lines = generator.Transpose(cf.Lines)
312 |
313 | if err = writer.WriteAll(cf.Lines); err != nil {
314 | return fmt.Errorf("writing csv lines for %q: %w", name, err)
315 | }
316 |
317 | writer.Flush()
318 | return nil
319 | }
320 |
321 | func writeImports(outputDir, name string, c model.Config, files map[string]model.CSVFile, tt ui.TimerFunc) error {
322 | defer tt(time.Now(), fmt.Sprintf("wrote imports: %s", name))
323 |
324 | importTmpl := template.Must(template.New("import").
325 | Funcs(template.FuncMap{"join": strings.Join}).
326 | Parse(`IMPORT INTO {{.Name}} (
327 | {{ join .Header ", " }}
328 | )
329 | CSV DATA (
330 | '.../{{.Name}}.csv'
331 | )
332 | WITH skip='1', nullif = '', allow_quoted_null;
333 |
334 | `),
335 | )
336 |
337 | fullPath := path.Join(outputDir, name)
338 | file, err := os.Create(fullPath)
339 | if err != nil {
340 | return fmt.Errorf("creating csv file %q: %w", name, err)
341 | }
342 | defer file.Close()
343 |
344 | // Iterate through the tables in the config file, so the imports are in the right order.
345 | for _, table := range c.Tables {
346 | csv := files[table.Name]
347 | if !csv.Output {
348 | continue
349 | }
350 |
351 | if err := importTmpl.Execute(file, csv); err != nil {
352 | return fmt.Errorf("writing import statement for %q: %w", name, err)
353 | }
354 | }
355 |
356 | return nil
357 | }
358 |
359 | func launchProfiler(cpuprofile string) func() {
360 | f, err := os.Create(cpuprofile)
361 | if err != nil {
362 | log.Fatalf("creating file for profiler: %v", err)
363 | }
364 | pprof.StartCPUProfile(f)
365 |
366 | return func() {
367 | pprof.StopCPUProfile()
368 | }
369 | }
370 |
371 | type realClock struct{}
372 |
373 | func (realClock) Now() time.Time {
374 | return time.Now()
375 | }
376 |
377 | func (realClock) Since(t time.Time) time.Duration {
378 | return time.Since(t)
379 | }
380 |
--------------------------------------------------------------------------------
/examples/const_test/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 | - name: one
3 | columns:
4 | - name: c1
5 | type: const
6 | processor:
7 | values: [a, b, c]
8 |
9 | - name: two
10 | columns:
11 | - name: c1
12 | type: const
13 | processor:
14 | values: [a, b, c, d, e]
15 |
16 | - name: c2
17 | type: const
18 | processor:
19 | values: [a, b]
20 |
21 | - name: three
22 | columns:
23 | - name: c1
24 | type: const
25 | processor:
26 | values: [a, b]
27 |
28 | - name: c2
29 | type: const
30 | processor:
31 | values: [a, b, c, d, e]
--------------------------------------------------------------------------------
/examples/each_match_test/config.yaml:
--------------------------------------------------------------------------------
1 | inputs:
2 |
3 | - name: market
4 | type: csv
5 | source:
6 | file_name: market.csv
7 |
8 | tables:
9 |
10 | - name: product
11 | count: 1
12 | unique_columns: [name]
13 | columns:
14 | - name: id
15 | type: gen
16 | processor:
17 | value: ${uuid}
18 | - name: name
19 | type: gen
20 | processor:
21 | value: ${adjective} ${adverb} ${noun}
22 |
23 | - name: market_product
24 | columns:
25 | - name: id
26 | type: gen
27 | processor:
28 | value: ${uuid}
29 | - name: product_id
30 | type: each
31 | processor:
32 | table: product
33 | column: id
34 | - name: market
35 | type: each
36 | processor:
37 | table: market
38 | column: code
39 | - name: region
40 | type: match
41 | processor:
42 | source_table: market
43 | source_column: code
44 | source_value: region
45 | match_column: market
46 |
--------------------------------------------------------------------------------
/examples/each_match_test/market.csv:
--------------------------------------------------------------------------------
1 | code,region
2 | us,us-east-1
3 | uk,eu-west-1
4 |
--------------------------------------------------------------------------------
/examples/input_test/config.yaml:
--------------------------------------------------------------------------------
1 | inputs:
2 | - name: significant_event
3 | type: csv
4 | source:
5 | file_name: significant_dates.csv
6 |
7 | tables:
8 | - name: events
9 | columns:
10 | - name: timeline_date
11 | type: range
12 | processor:
13 | type: date
14 | from: 1885-01-01
15 | to: 1985-10-26
16 | format: 2006-01-02
17 | step: 24h
18 | - name: timeline_event
19 | type: match
20 | processor:
21 | source_table: significant_event
22 | source_column: date
23 | source_value: events
24 | match_column: timeline_date
--------------------------------------------------------------------------------
/examples/input_test/significant_dates.csv:
--------------------------------------------------------------------------------
1 | date,events
2 | 1885-01-01,"Buford Tannen presumably hires an unknown Blacksmith to shoe his horse, but does not pay him."
3 | 1885-07-04,"Hill Valley's Fourth of July Celebrations held in the Palace Saloon, and Chester, the bartender gives additional drinks such as Whiskey to the locals."
4 | 1885-08-29,"There is a meeting of the townspeople over who will pick up the new schoolteacher on September 4. No one volunteers."
5 | 1885-09-02,"Native American Horseback-archers are chased near Hill Valley by the United States Cavalry."
6 | 1885-09-03,"Stinky Lomax is hanged in Haysville, California, for an unknown crime and Marshall Strickland is a witness of the hanging."
7 | 1885-09-04,"A train pulls into Hill Valley Station, carrying the new clock for the Hill Valley Courthouse[10] - and Clara Clayton (who had been commissioned to be the new teacher at the schoolhouse near Carson Spur, outside Hill Valley) aboard as a passenger. With no one to meet Clara upon her arrival, she rents two horses and a wagon buckboard from Joe Statler, then sets out to find the schoolhouse on her own.\n\nA snake spooks the horses pulling Clara's wagon. They then ride madly into Shonash Ravine, killing her instantly. The ravine where she dies is renamed Clayton Ravine in her memory."
8 | 1885-09-05,"The Hill Valley Festival. As part of Hill Valley’s anniversary festivities, the new courthouse clock is started at exactly 8:00 p.m. PST. Photographs with the clock are offered to townspeople wishing to pose with the new timepiece. The clock will remain in faithful service to Hill Valley for the next seven decades."
9 | 1885-09-06,"Buford Tannen and his gang, rob the Pine City Stage"
10 | 1885-09-08,"Hill Valley holds a funeral for Clara Clayton."
11 | 1885-12-01,"Shonash Ravine is renamed Clayton Ravine."
12 | 1917-04-06,"The United States declares war on Germany. Due to hostility against German-Americans during World War I, Doc's father changes his family's name from Von Braun to Brown."
13 | 1930-05-31,"Clint Eastwood, Jr., American film actor, director, producer, and composer (and Marty's alter ego in 1885 Timeline 8) is born."
14 | 1937-03-27,"Biff Tannen is born."
15 | 1938-04-01,"George McFly is born to Arthur and Sylvia McFly."
16 | 1942-11-19,"Calvin Richard Klein, American fashion designer (and Marty's alter ego in 1955 Timeline 2) is born."
17 | 1955-11-05,"Doc Brown slips off his toilet whilst hanging a clock and has a vision of the flux capacitor.\n\nBiff Tannen and his gang pick on George McFly at Lou's Café, warning George to never go into the café again. The café's busboy, Goldie Wilson, gives George advice on standing up for himself, but George ignores it.\n\nWhile 'birdwatching', i.e. spying on a girl (Lorraine Baines) changing through her window, George McFly slips and falls out of a tree into the street right in front of Sam Baines's car. After hitting him with the car, Baines takes him in, and his daughter Lorraine takes care of George. The two fall in love."
18 | 1955-11-07,"On the first day of school after George was hit by Sam Baines's car, Lorraine is infatuated with George as a result of the Florence Nightingale effect. With his attentions focused on Lorraine, George stops writing science fiction stories and having missed Science Fiction Theatre on the 5th (due to his injury), apparently has no problem missing the show scheduled for Saturday the 12th - the night of the Enchantment Under the Sea dance. George then asks Lorraine to the dance, and she accepts. Biff, meanwhile, apparently leaves them alone for awhile."
19 | 1955-11-08,"Biff and his gang stop by Lou's Cafe to make sure that George McFly isn't there, then drive off, perhaps commenting on a manure truck that is parked nearby."
20 | 1955-11-12,"Doc continues to think about the vision he had had a week earlier about the flux capacitor. If Doc is conducting an experiment, it is probably in his home rather than in Courthouse Square.\n\nLorraine, accompanied by her friend Babs, goes to Ruth's Frock Shop in downtown Hill Valley to pick up a dress she had made for the dance later that night. There is nobody else present.\n\nThe Enchantment Under the Sea dance takes place at the Hill Valley High School gym. Marvin Berry and the Starlighters play a set of music, taking a break at 9 o'clock.\n\nBiff and his gang spend the evening drinking, but avoid going to the dance.\n\nDuring Marvin Berry and the Starlighters' second (and final for the night) set, George and Lorraine share their first kiss on the dance floor. It is at that moment that Lorraine realizes that she is going to spend the rest of her life with George.\n\n10:04 p.m.: The Hill Valley Courthouse is struck by lightning, stopping its clock after 70 years, 2 months, 7 days, 2 hours and 4 minutes of continuous ticking. Despite repeated attempts over the intervening years, the clock is never successfully repaired; in fact, most Hill Valley residents voice a desire for its preservation as is to memorialize the unusual occurrence."
21 | 1956-06-20,"George McFly and Lorraine Baines McFly graduate from Hill Valley High School."
22 | 1958-03-31,"Chuck Berry releases the single Johnny B. Goode."
23 | 1958-03-27,"Biff Tannen turns 21, the legal gambling age."
24 | 1958-12-07,"George McFly and Lorraine Baines get married."
25 | 1962-08-01,"Doc Brown's mansion is destroyed by fire, except for the detached garage (which Doc moves into)."
26 | 1967-01-18,"A Fistful of Dollars, starring Clint Eastwood, is released to US cinemas."
27 | 1968-06-09,"Marty McFly is born to George and Lorraine McFly."
28 | 1968-08-06,"Douglas J. Needles is born."
29 | 1968-10-29,"Jennifer Parker is born."
30 | 1983-05-22,"Ronald Reagan announces he will run for a second term in office. Although a newspaper headline, seen in Timeline 2, mentions that Doc Brown is commended and wins an award, it is unknown whether this happened in the original timeline (because in 1985, Dr. Brown is described by Stanford S. Strickland as 'a real nutcase')."
31 | 1985-10-12,"Libyan terrorist steal plutonium from the local power plant so Doc can build them a nuclear weapon.[22] It is dismissed by the FBI as a 'simple clerical error'."
32 | 1985-10-25,"8:18 a.m. Opening titles of 'Back to the Future': all the clocks in Doc's lab are set 25 minutes slow, displayed incorrectly as 7:53 a.m.\n\n8:25 a.m. Doc asks Marty to meet him at Twin Pines Mall at 1:15 a.m. next morning.\n\nMarty arrives to school late then fails his audition with his band, The Pinheads, later after school.\n\nMarty admires a black Toyota Hilux 4X4 on a flatbed truck at the Texaco gas station.\n\nA woman asks Marty for a donation to preserve the non-functioning clock tower, handing him a flyer containing specific date and time details about the lightning strike to the clock. Marty puts the flyer in his pocket.\n\nBiff borrows George's car. He crashes it while drinking and driving (at the same time!), and the two get into an argument about whether or not George knew about a blind spot.\n\nLorraine's brother Joey fails to make parole again."
33 | 1985-10-26,"12:28 a.m. Doc calls Marty to stop by at his garage to pick up his JVC camcorder.\n\n1:16 a.m. Marty arrives at Twin Pines Mall.\n\n1:18 a.m. Doc demonstrates Temporal Experiment #1.\n\n1:20 a.m. Temporal Experiment #1 proves to be a success. Einstein becomes the world's first time traveler by departing one minute into the future. (This time jump does not create a new timeline; timelines are only created by going back in time. Einstein merely moves one minute forward on Timeline 1. The Doc states this is the case with timelines in the blackboard scene in Part II.)\n\n1:21 a.m. Einstein arrives at one minute into the future.\n\n1:33 a.m. Emmett Brown is shot dead by Libyan terrorists.\n\n1:35 a.m. Marty flees from the Libyans in the DeLorean and accidentally departs 1985 for 1955 by accelerating to 88 m.p.h. His arrival there brings Timeline 2 into existence, and Timeline 1 fades from existence."
--------------------------------------------------------------------------------
/examples/many_to_many/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 | # Create data for a person table.
3 | - name: person
4 | count: 10000
5 | columns:
6 | - name: id
7 | type: gen
8 | processor:
9 | value: ${uuid}
10 |
11 | # Create data for an event table.
12 | - name: event
13 | count: 100
14 | columns:
15 | - name: id
16 | type: gen
17 | processor:
18 | value: ${uuid}
19 |
20 | # Create data for a person_type table, with 5 rows.
21 | - name: person_type
22 | count: 5
23 | columns:
24 | - name: id
25 | type: gen
26 | processor:
27 | value: ${uuid}
28 |
29 | - name: name
30 | type: gen
31 | processor:
32 | value: TYPE-${uint16}
33 | format: "%05d"
34 |
35 | # Create data for a person_event many-to-many resolver table between
36 | # the person and event tables.
37 | - name: person_event
38 | columns:
39 | - name: person_id
40 | type: each
41 | processor:
42 | table: person
43 | column: id
44 |
45 | - name: event_id
46 | type: each
47 | processor:
48 | table: event
49 | column: id
--------------------------------------------------------------------------------
/examples/many_to_many/create.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE "person" (
2 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid()
3 | );
4 |
5 | CREATE TABLE "event" (
6 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid()
7 | );
8 |
9 | CREATE TABLE "person_type" (
10 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid(),
11 | "name" STRING NOT NULL
12 | );
13 |
14 | CREATE TABLE "person_event" (
15 | "id" UUID PRIMARY KEY DEFAULT gen_random_uuid(),
16 | "person_type" UUID NOT NULL REFERENCES "person_type"("id"),
17 | "person_id" UUID NOT NULL REFERENCES "person"("id"),
18 | "event_id" UUID NOT NULL REFERENCES "event"("id")
19 | );
--------------------------------------------------------------------------------
/examples/many_to_many/insert.sql:
--------------------------------------------------------------------------------
1 | IMPORT INTO "person" (
2 | "id"
3 | )
4 | CSV DATA (
5 | 'http://localhost:3000/person.csv'
6 | )
7 | WITH
8 | skip='1',
9 | nullif = '',
10 | allow_quoted_null;
11 |
12 | IMPORT INTO "event" (
13 | "id"
14 | )
15 | CSV DATA (
16 | 'http://localhost:3000/event.csv'
17 | )
18 | WITH
19 | skip='1',
20 | nullif = '',
21 | allow_quoted_null;
22 |
23 | IMPORT INTO "person_type" (
24 | "id",
25 | "name"
26 | )
27 | CSV DATA (
28 | 'http://localhost:3000/person_type.csv'
29 | )
30 | WITH
31 | skip='1',
32 | nullif = '',
33 | allow_quoted_null;
34 |
35 | IMPORT INTO "person_event" (
36 | "person_id",
37 | "event_id",
38 | "id",
39 | "person_type"
40 | )
41 | CSV DATA (
42 | 'http://localhost:3000/person_event.csv'
43 | )
44 | WITH
45 | skip='1',
46 | nullif = '',
47 | allow_quoted_null;
--------------------------------------------------------------------------------
/examples/match_test/config.yaml:
--------------------------------------------------------------------------------
1 | inputs:
2 |
3 | - name: market
4 | type: csv
5 | source:
6 | file_name: market.csv
7 |
8 | tables:
9 |
10 | - name: market_product
11 | count: 10
12 | columns:
13 | - name: id
14 | type: gen
15 | processor:
16 | value: ${uuid}
17 | - name: market
18 | type: set
19 | processor:
20 | values: ["us", "in"]
21 | - name: region
22 | type: match
23 | processor:
24 | source_table: market
25 | source_column: code
26 | source_value: region
27 | match_column: market
28 |
--------------------------------------------------------------------------------
/examples/match_test/market.csv:
--------------------------------------------------------------------------------
1 | code,region
2 | us,us-east-1
3 | in,ap-south-1
--------------------------------------------------------------------------------
/examples/pattern_test/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 | # Generate data for a person table using the pattern generator.
3 | - name: person
4 | count: 1000
5 | columns:
6 | - name: id
7 | type: gen
8 | processor:
9 | value: ${uuid}
10 | - name: mobile
11 | type: gen
12 | processor:
13 | pattern: \d{3}-\d{3}-\d{4}
--------------------------------------------------------------------------------
/examples/person/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 | # Generate data for a person table, showing off a couple of column generators.
3 | - name: person
4 | count: 100000
5 | columns:
6 | - name: id
7 | type: gen
8 | processor:
9 | value: ${uuid}
10 | - name: full_name
11 | type: gen
12 | processor:
13 | value: ${name_prefix} ${first_name} ${last_name}
14 | - name: date_of_birth
15 | type: gen
16 | processor:
17 | value: ${date}
18 | format: 2006-01-02
19 | - name: user_type
20 | type: set
21 | processor:
22 | values: [admin, regular, read-only]
23 | - name: favourite_animal
24 | type: set
25 | processor:
26 | values: [rabbit, dog, cat]
27 | weights: [10, 60, 30]
--------------------------------------------------------------------------------
/examples/person/create.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE person_type AS ENUM ('admin', 'regular', 'read-only');
2 | CREATE TYPE animal_type AS ENUM ('rabbit', 'dog', 'cat');
3 |
4 | CREATE TABLE person (
5 | "id" UUID PRIMARY KEY,
6 | "full_name" STRING NOT NULL,
7 | "date_of_birth" DATE NOT NULL,
8 | "user_type" person_type NOT NULL,
9 | "favourite_animal" animal_type NOT NULL
10 | );
--------------------------------------------------------------------------------
/examples/person/insert.sql:
--------------------------------------------------------------------------------
1 | IMPORT INTO "person"(
2 | "id",
3 | "full_name",
4 | "date_of_birth",
5 | "user_type",
6 | "favourite_animal"
7 | )
8 | CSV DATA (
9 | 'http://localhost:3000/person.csv'
10 | )
11 | WITH
12 | skip='1',
13 | nullif = '',
14 | allow_quoted_null;
--------------------------------------------------------------------------------
/examples/range_test/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 | - name: auto_incrementing_id
3 | count: 20
4 | columns:
5 | - name: id
6 | type: range
7 | processor:
8 | type: int
9 | from: 1
10 |
11 | - name: bet_types
12 | columns:
13 | - name: id
14 | type: range
15 | processor:
16 | type: int
17 | from: 1
18 | step: 1
19 | - name: description
20 | type: const
21 | processor:
22 | values: [Win, Lose, Draw]
23 |
24 | - name: previous_table
25 | count: 20
26 | columns:
27 | - name: id
28 | type: gen
29 | processor:
30 | value: ${uuid}
31 |
32 | - name: each_test
33 | count: 10 # Ignored due to count from "each" generator.
34 | columns:
35 | - name: id
36 | type: each
37 | processor:
38 | table: previous_table
39 | column: id
40 | - name: should_have_20
41 | type: range
42 | processor:
43 | type: date
44 | from: 2020-01-01
45 | to: 2023-01-01
46 | format: 2006-01-02
47 | step: 1h # Ignored due to count from "each" generator.
48 |
49 | - name: count_test
50 | count: 10
51 | columns:
52 | - name: should_have_10
53 | type: range
54 | processor:
55 | type: date
56 | from: 2020-01-01
57 | to: 2023-01-01
58 | format: 2006-01-02
59 | step: 1h # Ignored due to count value.
60 |
61 | - name: step_test
62 | columns:
63 | - name: should_have_37
64 | type: range
65 | processor:
66 | type: date
67 | from: 2020-01-01
68 | to: 2023-01-01
69 | format: 2006-01-02
70 | step: 730h # Ignored due to count value.
--------------------------------------------------------------------------------
/examples/supress_column/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 |
3 | - name: example
4 | count: 1
5 | columns:
6 |
7 | - name: one
8 | type: gen
9 | processor:
10 | value: ${uint8}
11 |
12 | - name: two
13 | suppress: true
14 | type: gen
15 | processor:
16 | value: ${uint64}
--------------------------------------------------------------------------------
/examples/unique_test/config.yaml:
--------------------------------------------------------------------------------
1 | tables:
2 | - name: person
3 | unique_columns: [col_a, col_b]
4 | count: 10
5 | columns:
6 |
7 | - name: col_a
8 | type: set
9 | processor:
10 | values: [a, b, c]
11 |
12 | - name: col_b
13 | type: set
14 | processor:
15 | values: [d, e, f]
16 |
17 | - name: col_c
18 | type: set
19 | processor:
20 | values: [g, h, i]
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/codingconcepts/dg
2 |
3 | go 1.20
4 |
5 | require (
6 | github.com/brianvoe/gofakeit/v6 v6.22.0
7 | github.com/samber/lo v1.38.1
8 | github.com/stretchr/testify v1.8.4
9 | gopkg.in/yaml.v3 v3.0.1
10 | )
11 |
12 | require (
13 | github.com/davecgh/go-spew v1.1.1 // indirect
14 | github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb // indirect
15 | github.com/pmezard/go-difflib v1.0.0 // indirect
16 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
17 | )
18 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/brianvoe/gofakeit/v6 v6.22.0 h1:BzOsDot1o3cufTfOk+fWKE9nFYojyDV+XHdCWL2+uyE=
2 | github.com/brianvoe/gofakeit/v6 v6.22.0/go.mod h1:Ow6qC71xtwm79anlwKRlWZW6zVq9D2XHE4QSSMP/rU8=
3 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
4 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
5 | github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb h1:w1g9wNDIE/pHSTmAaUhv4TZQuPBS6GV3mMz5hkgziIU=
6 | github.com/lucasjones/reggen v0.0.0-20200904144131-37ba4fa293bb/go.mod h1:5ELEyG+X8f+meRWHuqUOewBOhvHkl7M76pdGEansxW4=
7 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
8 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
9 | github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=
10 | github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
11 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
12 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
13 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
14 | golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
15 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
16 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
17 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
18 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
19 |
--------------------------------------------------------------------------------
/internal/pkg/generator/array.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "github.com/samber/lo"
5 | )
6 |
7 | // CartesianProduct returns the Cartesian product of a variable number of arrays.
8 | func CartesianProduct(a ...[]string) [][]string {
9 | if len(a) == 0 {
10 | return [][]string{}
11 | }
12 |
13 | totalCombinations := lo.Reduce(a, func(agg int, item []string, index int) int {
14 | return agg * len(item)
15 | }, 1)
16 |
17 | // Preallocate the result slice with the correct capacity.
18 | result := make([][]string, 0, totalCombinations)
19 | result = append(result, []string{})
20 |
21 | // Generate the Cartesian products.
22 | for _, arr := range a {
23 | temp := make([][]string, 0, totalCombinations)
24 | for _, element := range arr {
25 | for _, combination := range result {
26 | // Create a new combination by appending the current element.
27 | newCombination := make([]string, len(combination)+1)
28 | copy(newCombination, combination)
29 | newCombination[len(combination)] = element
30 | temp = append(temp, newCombination)
31 | }
32 | }
33 | result = temp
34 | }
35 |
36 | return result
37 | }
38 |
39 | // Transpose a multi-dimensional array.
40 | func Transpose(m [][]string) [][]string {
41 | max := lo.MaxBy(m, func(a, b []string) bool {
42 | return len(a) > len(b)
43 | })
44 |
45 | r := make([][]string, len(max))
46 |
47 | for x := range r {
48 | r[x] = make([]string, len(m))
49 | }
50 |
51 | for y, s := range m {
52 | for x, e := range s {
53 | r[x][y] = e
54 | }
55 | }
56 | return r
57 | }
58 |
--------------------------------------------------------------------------------
/internal/pkg/generator/array_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "strings"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestCartesianProduct(t *testing.T) {
11 | cases := []struct {
12 | name string
13 | input [][]string
14 | output [][]string
15 | }{
16 | {
17 | name: "single input",
18 | input: [][]string{
19 | {"a", "b", "c"},
20 | },
21 | output: [][]string{
22 | {"a"}, {"b"}, {"c"},
23 | },
24 | },
25 | {
26 | name: "multiple input",
27 | input: [][]string{
28 | {"a", "b", "c"},
29 | {"d", "e", "f"},
30 | },
31 | output: [][]string{
32 | {"a", "d"},
33 | {"b", "d"},
34 | {"c", "d"},
35 | {"a", "e"},
36 | {"b", "e"},
37 | {"c", "e"},
38 | {"a", "f"},
39 | {"b", "f"},
40 | {"c", "f"},
41 | },
42 | },
43 | {
44 | name: "small array big array",
45 | input: [][]string{
46 | {"a", "b"},
47 | {"d", "e", "f"},
48 | },
49 | output: [][]string{
50 | {"a", "d"},
51 | {"b", "d"},
52 | {"a", "e"},
53 | {"b", "e"},
54 | {"a", "f"},
55 | {"b", "f"},
56 | },
57 | },
58 | {
59 | name: "big array small array",
60 | input: [][]string{
61 | {"a", "b", "c"},
62 | {"d", "e"},
63 | },
64 | output: [][]string{
65 | {"a", "d"},
66 | {"b", "d"},
67 | {"c", "d"},
68 | {"a", "e"},
69 | {"b", "e"},
70 | {"c", "e"},
71 | },
72 | },
73 | }
74 |
75 | for _, c := range cases {
76 | t.Run(c.name, func(t *testing.T) {
77 | actual := CartesianProduct(c.input...)
78 | assert.Equal(t, c.output, actual)
79 | })
80 | }
81 | }
82 |
83 | func TestTranspose(t *testing.T) {
84 | cases := []struct {
85 | name string
86 | input [][]string
87 | output [][]string
88 | }{
89 | {
90 | name: "single input",
91 | input: [][]string{
92 | {"a", "b", "c"},
93 | },
94 | output: [][]string{
95 | {"a"}, {"b"}, {"c"},
96 | },
97 | },
98 | {
99 | name: "multiple input",
100 | input: [][]string{
101 | {"a", "b", "c"},
102 | {"d", "e", "f"},
103 | },
104 | output: [][]string{
105 | {"a", "d"},
106 | {"b", "e"},
107 | {"c", "f"},
108 | },
109 | },
110 | {
111 | name: "first input bigger than second",
112 | input: [][]string{
113 | {"a", "b", "c", "1"},
114 | {"d", "e", "f"},
115 | },
116 | output: [][]string{
117 | {"a", "d"},
118 | {"b", "e"},
119 | {"c", "f"},
120 | {"1", ""},
121 | },
122 | },
123 | {
124 | name: "second input bigger than first",
125 | input: [][]string{
126 | {"a", "b", "c"},
127 | {"d", "e", "f", "2"},
128 | },
129 | output: [][]string{
130 | {"a", "d"},
131 | {"b", "e"},
132 | {"c", "f"},
133 | {"", "2"},
134 | },
135 | },
136 | }
137 |
138 | for _, c := range cases {
139 | t.Run(c.name, func(t *testing.T) {
140 | actual := Transpose(c.input)
141 | assert.Equal(t, c.output, actual)
142 | })
143 | }
144 | }
145 |
146 | func BenchmarkCartesianProduct(b *testing.B) {
147 | cases := []struct {
148 | name string
149 | input [][]string
150 | }{
151 | {
152 | name: "single array",
153 | input: [][]string{
154 | {"a", "b", "c"},
155 | },
156 | },
157 | {
158 | name: "two small arrays",
159 | input: [][]string{
160 | {"a", "b", "c"},
161 | {"d", "e", "f"},
162 | },
163 | },
164 | {
165 | name: "three small arrays",
166 | input: [][]string{
167 | {"a", "b", "c"},
168 | {"d", "e", "f"},
169 | {"g", "h", "i"},
170 | },
171 | },
172 | {
173 | name: "small array and big array",
174 | input: [][]string{
175 | {"a", "b", "c"},
176 | strings.Split(strings.Repeat("d", 1000), ""),
177 | },
178 | },
179 | {
180 | name: "big array and small array",
181 | input: [][]string{
182 | strings.Split(strings.Repeat("a", 1000), ""),
183 | {"d", "e", "f"},
184 | },
185 | },
186 | {
187 | name: "big arrays",
188 | input: [][]string{
189 | strings.Split(strings.Repeat("a", 1000), ""),
190 | strings.Split(strings.Repeat("d", 1000), ""),
191 | },
192 | },
193 | }
194 |
195 | for _, c := range cases {
196 | b.Run(c.name, func(b *testing.B) {
197 | for i := 0; i < b.N; i++ {
198 | CartesianProduct(c.input...)
199 | }
200 | })
201 | }
202 | }
203 |
204 | func BenchmarkTranspose(b *testing.B) {
205 | cases := []struct {
206 | name string
207 | input [][]string
208 | }{
209 | {
210 | name: "single array",
211 | input: [][]string{
212 | {"a", "b", "c"},
213 | },
214 | },
215 | {
216 | name: "multiple small arrays",
217 | input: [][]string{
218 | {"a", "b", "c"},
219 | {"d", "e", "f"},
220 | },
221 | },
222 | {
223 | name: "small array and big array",
224 | input: [][]string{
225 | {"a", "b", "c"},
226 | strings.Split(strings.Repeat("d", 1000), ""),
227 | },
228 | },
229 | {
230 | name: "big array and small array",
231 | input: [][]string{
232 | strings.Split(strings.Repeat("a", 1000), ""),
233 | {"d", "e", "f"},
234 | },
235 | },
236 | }
237 |
238 | for _, c := range cases {
239 | b.Run(c.name, func(b *testing.B) {
240 | for i := 0; i < b.N; i++ {
241 | CartesianProduct(c.input...)
242 | }
243 | })
244 | }
245 | }
246 |
--------------------------------------------------------------------------------
/internal/pkg/generator/common.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 | )
8 |
9 | // AddTable adds a column to a table in the given files map.
10 | func AddTable(table model.Table, column string, line []string, files map[string]model.CSVFile) {
11 | if _, ok := files[table.Name]; !ok {
12 | files[table.Name] = model.CSVFile{
13 | Name: table.Name,
14 | Output: !table.Suppress,
15 | UniqueColumns: table.UniqueColumns,
16 | }
17 | }
18 |
19 | add(files, table.Name, column, line)
20 | }
21 |
22 | // AddInput adds a column to a table in the given files map.
23 | func AddInput(table, column string, line []string, files map[string]model.CSVFile) {
24 | if _, ok := files[table]; !ok {
25 | files[table] = model.CSVFile{
26 | Name: table,
27 | Output: false,
28 | }
29 | }
30 |
31 | add(files, table, column, line)
32 | }
33 |
34 | func add(files map[string]model.CSVFile, table string, column string, line []string) {
35 | foundTable := files[table]
36 | foundTable.Header = append(foundTable.Header, column)
37 | foundTable.Lines = append(foundTable.Lines, line)
38 | files[table] = foundTable
39 | }
40 |
41 | func formatValue(fp FormatterProcessor, value any) string {
42 | format := fp.GetFormat()
43 | if format != "" {
44 | // Check if the value implements the formatter interface and use that first,
45 | // otherwise, just perform a simple string format.
46 | if f, ok := value.(Formatter); ok {
47 | return f.Format(format)
48 | } else {
49 | return fmt.Sprintf(format, value)
50 | }
51 | } else {
52 | return fmt.Sprintf("%v", value)
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/internal/pkg/generator/common_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 | "time"
6 |
7 | "github.com/codingconcepts/dg/internal/pkg/model"
8 |
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func TestAddToFile(t *testing.T) {
13 | cases := []struct {
14 | name string
15 | table string
16 | column string
17 | line []string
18 | filesBefore map[string]model.CSVFile
19 | filesAfter map[string]model.CSVFile
20 | }{
21 | {
22 | name: "first column for table",
23 | table: "person",
24 | column: "id",
25 | line: []string{"a", "b", "c"},
26 | filesBefore: map[string]model.CSVFile{},
27 | filesAfter: map[string]model.CSVFile{
28 | "person": {
29 | Name: "person",
30 | Header: []string{"id"},
31 | Lines: [][]string{{"a", "b", "c"}},
32 | },
33 | },
34 | },
35 | {
36 | name: "second column for table",
37 | table: "person",
38 | column: "name",
39 | line: []string{"1", "2", "3"},
40 | filesBefore: map[string]model.CSVFile{
41 | "person": {
42 | Name: "person",
43 | Header: []string{"id"},
44 | Lines: [][]string{{"a", "b", "c"}},
45 | },
46 | },
47 | filesAfter: map[string]model.CSVFile{
48 | "person": {
49 | Name: "person",
50 | Header: []string{"id", "name"},
51 | Lines: [][]string{{"a", "b", "c"}, {"1", "2", "3"}},
52 | },
53 | },
54 | },
55 | }
56 |
57 | for _, c := range cases {
58 | t.Run(c.name, func(t *testing.T) {
59 | table := model.Table{
60 | Name: c.table,
61 | }
62 | AddTable(table, c.column, c.line, c.filesBefore)
63 |
64 | assert.Equal(t, c.filesAfter[c.table].Header, c.filesBefore[c.table].Header)
65 | assert.Equal(t, c.filesAfter[c.table].Lines, c.filesBefore[c.table].Lines)
66 | assert.Equal(t, c.filesAfter[c.table].Name, c.filesBefore[c.table].Name)
67 | })
68 | }
69 | }
70 |
71 | func TestFormatValue(t *testing.T) {
72 | cases := []struct {
73 | name string
74 | format string
75 | value any
76 | exp string
77 | }{
78 | {
79 | name: "no format",
80 | value: 1,
81 | exp: "1",
82 | },
83 | {
84 | name: "int format",
85 | value: 1,
86 | format: "PREFIX_%d_SUFFIX",
87 | exp: "PREFIX_1_SUFFIX",
88 | },
89 | {
90 | name: "time format",
91 | value: time.Date(2023, 1, 2, 3, 4, 5, 6, time.UTC),
92 | format: "2006-01-02T15:04:05Z07:00",
93 | exp: "2023-01-02T03:04:05Z",
94 | },
95 | }
96 |
97 | for _, c := range cases {
98 | t.Run(c.name, func(t *testing.T) {
99 | formatter := GenGenerator{Format: c.format}
100 | act := formatValue(formatter, c.value)
101 |
102 | assert.Equal(t, c.exp, act)
103 | })
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/internal/pkg/generator/const_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 | "sort"
6 |
7 | "github.com/codingconcepts/dg/internal/pkg/model"
8 | "github.com/samber/lo"
9 | )
10 |
11 | // ConstGenerator provides additional context to a const column.
12 | type ConstGenerator struct {
13 | Values []string `yaml:"values"`
14 | }
15 |
16 | // Generate values for a column based on a series of provided values.
17 | func (g ConstGenerator) Generate(t model.Table, files map[string]model.CSVFile) error {
18 | cols := lo.Filter(t.Columns, func(c model.Column, _ int) bool {
19 | return c.Type == "const"
20 | })
21 |
22 | sortColumns(cols)
23 |
24 | for _, c := range cols {
25 | var cg ConstGenerator
26 | if err := c.Generator.UnmarshalFunc(&cg); err != nil {
27 | return fmt.Errorf("parsing const process for %s.%s: %w", t.Name, c.Name, err)
28 | }
29 | if err := cg.generate(t, c, files); err != nil {
30 | return fmt.Errorf("generating const columns: %w", err)
31 | }
32 | }
33 |
34 | return nil
35 | }
36 |
37 | func sortColumns(cols []model.Column) {
38 | sort.Slice(cols, func(i, j int) bool {
39 | var g1 ConstGenerator
40 | if err := cols[i].Generator.UnmarshalFunc(&g1); err != nil {
41 | return false
42 | }
43 |
44 | var g2 ConstGenerator
45 | if err := cols[j].Generator.UnmarshalFunc(&g2); err != nil {
46 | return false
47 | }
48 |
49 | return len(g1.Values) > len(g2.Values)
50 | })
51 | }
52 |
53 | func (g ConstGenerator) generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
54 | if len(g.Values) == 0 {
55 | return fmt.Errorf("no values provided for const generator")
56 | }
57 |
58 | count := len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
59 | return len(a) > len(b)
60 | }))
61 |
62 | if count == 0 {
63 | count = t.Count
64 | }
65 |
66 | // Repeat the values until they equal the count.
67 | if count > len(g.Values) {
68 | for i := 0; len(g.Values) < count; i++ {
69 | g.Values = append(g.Values, g.Values[i%len(g.Values)])
70 | }
71 | }
72 |
73 | AddTable(t, c.Name, g.Values, files)
74 | return nil
75 | }
76 |
--------------------------------------------------------------------------------
/internal/pkg/generator/const_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 | "github.com/samber/lo"
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestGenerateConstColumn(t *testing.T) {
12 | cases := []struct {
13 | name string
14 | tableCount int
15 | files map[string]model.CSVFile
16 | values []string
17 | exp []string
18 | expErr error
19 | }{
20 | {
21 | name: "first column in table",
22 | tableCount: 3,
23 | files: map[string]model.CSVFile{},
24 | values: []string{"a", "b", "c"},
25 | },
26 | {
27 | name: "less than current table size",
28 | files: map[string]model.CSVFile{
29 | "table": {
30 | Name: "table",
31 | Header: []string{"col_a", "col_b", "col_c"},
32 | Lines: [][]string{
33 | {"val_1", "val_2", "val_3"},
34 | {"val_1", "val_2", "val_3"},
35 | },
36 | },
37 | },
38 | values: []string{"a", "b"},
39 | exp: []string{"a", "b", "a"},
40 | },
41 | {
42 | name: "less than current table size with table count",
43 | tableCount: 10,
44 | files: map[string]model.CSVFile{
45 | "table": {
46 | Name: "table",
47 | Header: []string{"col_a", "col_b", "col_c"},
48 | Lines: [][]string{
49 | {"val_1", "val_2", "val_3"},
50 | {"val_1", "val_2", "val_3"},
51 | },
52 | },
53 | },
54 | values: []string{"a", "b"},
55 | exp: []string{"a", "b", "a"},
56 | },
57 | {
58 | name: "same as current table size",
59 | files: map[string]model.CSVFile{
60 | "table": {
61 | Name: "table",
62 | Header: []string{"col_a", "col_b", "col_c"},
63 | Lines: [][]string{
64 | {"val_1", "val_2", "val_3"},
65 | {"val_1", "val_2", "val_3"},
66 | },
67 | },
68 | },
69 | values: []string{"a", "b", "c"},
70 | },
71 | {
72 | name: "more than current table size",
73 | files: map[string]model.CSVFile{
74 | "table": {
75 | Name: "table",
76 | Header: []string{"col_a", "col_b", "col_c"},
77 | Lines: [][]string{
78 | {"val_1", "val_2", "val_3"},
79 | {"val_1", "val_2", "val_3"},
80 | },
81 | },
82 | },
83 | values: []string{"a", "b", "c", "d", "e"},
84 | },
85 | }
86 |
87 | for _, c := range cases {
88 | t.Run(c.name, func(t *testing.T) {
89 | g := ConstGenerator{
90 | Values: c.values,
91 | }
92 |
93 | table := model.Table{
94 | Name: "table",
95 | Count: c.tableCount,
96 | Columns: []model.Column{
97 | {Name: "col", Type: "const", Generator: model.ToRawMessage(t, g)},
98 | },
99 | }
100 |
101 | actErr := g.Generate(table, c.files)
102 | assert.Equal(t, c.expErr, actErr)
103 | if actErr != nil {
104 | return
105 | }
106 |
107 | exp := lo.Ternary(c.exp != nil, c.exp, c.values)
108 |
109 | assert.Equal(t, exp, c.files["table"].Lines[len(c.files["table"].Lines)-1])
110 | })
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/internal/pkg/generator/each_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/samber/lo"
9 | )
10 |
11 | // EachGenerator provides additional context to an each or ref column.
12 | type EachGenerator struct {
13 | Table string `yaml:"table"`
14 | Column string `yaml:"column"`
15 | }
16 |
17 | // Generate looks for any each type columns for a table, and
18 | // returns their Cartesian product back into the given files map.
19 | func (g EachGenerator) Generate(t model.Table, files map[string]model.CSVFile) error {
20 | cols := lo.Filter(t.Columns, func(c model.Column, _ int) bool {
21 | return c.Type == "each"
22 | })
23 |
24 | if len(cols) == 0 {
25 | return nil
26 | }
27 |
28 | var preCartesian [][]string
29 | for _, col := range cols {
30 | var gCol EachGenerator
31 | if err := col.Generator.UnmarshalFunc(&gCol); err != nil {
32 | return fmt.Errorf("parsing each process for %s.%s: %w", t.Name, col.Name, err)
33 | }
34 |
35 | srcTable := files[gCol.Table]
36 | srcColumn := gCol.Column
37 | srcColumnIndex := lo.IndexOf(srcTable.Header, srcColumn)
38 |
39 | if len(srcTable.Lines)-1 < srcColumnIndex {
40 | return fmt.Errorf("column %q out of bounds for table %q", srcColumn, srcTable.Name)
41 | }
42 |
43 | preCartesian = append(preCartesian, srcTable.Lines[srcColumnIndex])
44 | }
45 |
46 | // Compute Cartesian product of all columns.
47 | cartesianColumns := Transpose(CartesianProduct(preCartesian...))
48 |
49 | // Add the header
50 | for i, col := range cartesianColumns {
51 | AddTable(t, cols[i].Name, col, files)
52 | }
53 |
54 | return nil
55 | }
56 |
--------------------------------------------------------------------------------
/internal/pkg/generator/each_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestGenerateEachColumn(t *testing.T) {
11 | table := model.Table{
12 | Name: "person_event",
13 | Columns: []model.Column{
14 | {
15 | Name: "person_id",
16 | Type: "each",
17 | Generator: model.ToRawMessage(t, EachGenerator{
18 | Table: "person",
19 | Column: "id",
20 | }),
21 | },
22 | {
23 | Name: "event_id",
24 | Type: "each",
25 | Generator: model.ToRawMessage(t, EachGenerator{
26 | Table: "event",
27 | Column: "id",
28 | }),
29 | },
30 | },
31 | }
32 |
33 | files := map[string]model.CSVFile{
34 | "person": {
35 | Name: "person",
36 | Header: []string{"id", "name"},
37 | Lines: [][]string{
38 | {"p-i-1", "p-i-2"},
39 | {"p-one", "p-two"},
40 | },
41 | },
42 | "event": {
43 | Name: "event",
44 | Header: []string{"id", "name"},
45 | Lines: [][]string{
46 | {"e-i-1", "e-i-2"},
47 | {"e-one", "e-two"},
48 | },
49 | },
50 | }
51 |
52 | g := EachGenerator{}
53 |
54 | err := g.Generate(table, files)
55 | assert.Nil(t, err)
56 |
57 | exp := model.CSVFile{
58 | Name: "person_event",
59 | Header: []string{"person_id", "event_id"},
60 | Lines: [][]string{
61 | {"p-i-1", "p-i-2", "p-i-1", "p-i-2"},
62 | {"e-i-1", "e-i-1", "e-i-2", "e-i-2"},
63 | },
64 | Output: true,
65 | }
66 | assert.Equal(t, exp, files["person_event"])
67 | }
68 |
--------------------------------------------------------------------------------
/internal/pkg/generator/formatter.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | // Formatter determines the behaviour for anything that can take a format
4 | // string and return another.
5 | type Formatter interface {
6 | Format(string) string
7 | }
8 |
9 | // FormatterProcessor can be called to get the Format string out of a struct
10 | // that implements this interface.
11 | type FormatterProcessor interface {
12 | GetFormat() string
13 | }
14 |
--------------------------------------------------------------------------------
/internal/pkg/generator/gen_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/codingconcepts/dg/internal/pkg/model"
8 | "github.com/codingconcepts/dg/internal/pkg/random"
9 | "github.com/lucasjones/reggen"
10 | "github.com/samber/lo"
11 | )
12 |
13 | // GenGenerator provides additional context to a gen column.
14 | type GenGenerator struct {
15 | Value string `yaml:"value"`
16 | Pattern string `yaml:"pattern"`
17 | NullPercentage int `yaml:"null_percentage"`
18 | Format string `yaml:"format"`
19 |
20 | patternGenerator *reggen.Generator
21 | }
22 |
23 | func (g GenGenerator) GetFormat() string {
24 | return g.Format
25 | }
26 |
27 | // Generate random data for a given column.
28 | func (g GenGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
29 | if g.Value == "" && g.Pattern == "" {
30 | return fmt.Errorf("gen must have either 'value' or 'pattern'")
31 | }
32 |
33 | if t.Count == 0 {
34 | t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
35 | return len(a) > len(b)
36 | }))
37 | }
38 |
39 | if g.Pattern != "" {
40 | var err error
41 | if g.patternGenerator, err = reggen.NewGenerator(g.Pattern); err != nil {
42 | return fmt.Errorf("creating regex generator: %w", err)
43 | }
44 | }
45 |
46 | var line []string
47 | for i := 0; i < t.Count; i++ {
48 | s := g.generate()
49 | line = append(line, s)
50 | }
51 |
52 | AddTable(t, c.Name, line, files)
53 | return nil
54 | }
55 |
56 | func (pg GenGenerator) generate() string {
57 | r := random.Intn(100)
58 | if r < pg.NullPercentage {
59 | return ""
60 | }
61 |
62 | if pg.Pattern != "" {
63 | return pg.patternGenerator.Generate(255)
64 | }
65 |
66 | s := pg.Value
67 |
68 | // Look for quick single-replacements.
69 | if v, ok := replacements[s]; ok {
70 | return formatValue(pg, v())
71 | }
72 |
73 | // Process multipe-replacements.
74 | for k, v := range replacements {
75 | if strings.Contains(s, k) {
76 | valueStr := formatValue(pg, v())
77 | s = strings.ReplaceAll(s, k, valueStr)
78 | }
79 | }
80 |
81 | return s
82 | }
83 |
--------------------------------------------------------------------------------
/internal/pkg/generator/gen_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "regexp"
5 | "strconv"
6 | "strings"
7 | "testing"
8 | "time"
9 |
10 | "github.com/codingconcepts/dg/internal/pkg/model"
11 | "github.com/lucasjones/reggen"
12 |
13 | "github.com/stretchr/testify/assert"
14 | )
15 |
16 | func TestGenerateGenColumn(t *testing.T) {
17 | cases := []struct {
18 | name string
19 | value string
20 | pattern string
21 | format string
22 | expShapeFunc func(val string) bool
23 | }{
24 | {
25 | name: "multiple space-delimited strings",
26 | value: "${first_name} ${last_name}",
27 | expShapeFunc: func(val string) bool {
28 | return len(strings.Split(val, " ")) == 2
29 | },
30 | },
31 | {
32 | name: "formatted date string",
33 | value: "${date}",
34 | format: "2006-01-02T15:04:05",
35 | expShapeFunc: func(val string) bool {
36 | _, err := time.Parse("2006-01-02T15:04:05", val)
37 | return err == nil
38 | },
39 | },
40 | {
41 | name: "integer",
42 | value: "${int64}",
43 | expShapeFunc: func(val string) bool {
44 | _, err := strconv.Atoi(val)
45 | if err != nil {
46 | t.Fatal(err)
47 | }
48 | return err == nil
49 | },
50 | },
51 | {
52 | name: "pattern",
53 | pattern: `[a-z]{3}-[A-Z]{3}-\d{3}`,
54 | expShapeFunc: func(val string) bool {
55 | re := regexp.MustCompile(`[a-z]{3}-[A-Z]{3}-\d{3}`)
56 | return re.MatchString(val)
57 | },
58 | },
59 | }
60 |
61 | for _, c := range cases {
62 | t.Run(c.name, func(t *testing.T) {
63 | table := model.Table{
64 | Name: "table",
65 | Count: 1,
66 | }
67 |
68 | column := model.Column{
69 | Name: "col",
70 | }
71 |
72 | g := GenGenerator{
73 | Value: c.value,
74 | Pattern: c.pattern,
75 | Format: c.format,
76 | }
77 |
78 | files := map[string]model.CSVFile{}
79 | err := g.Generate(table, column, files)
80 | assert.Nil(t, err)
81 | assert.True(t, c.expShapeFunc(files["table"].Lines[0][0]))
82 | })
83 | }
84 | }
85 |
86 | func BenchmarkGeneratePattern(b *testing.B) {
87 | pattern := `[a-z]{3}-[A-Z]{3}-\d{3}`
88 | patternGenerator, err := reggen.NewGenerator(pattern)
89 | assert.NoError(b, err)
90 |
91 | g := GenGenerator{
92 | Pattern: pattern,
93 | patternGenerator: patternGenerator,
94 | }
95 |
96 | for i := 0; i < b.N; i++ {
97 | g.generate()
98 | }
99 | }
100 |
--------------------------------------------------------------------------------
/internal/pkg/generator/inc_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "github.com/codingconcepts/dg/internal/pkg/model"
5 | "github.com/samber/lo"
6 | )
7 |
8 | // IncGenerator provides additional context to an inc column.
9 | type IncGenerator struct {
10 | Start int `yaml:"start"`
11 | Format string `yaml:"format"`
12 | }
13 |
14 | func (pi IncGenerator) GetFormat() string {
15 | return pi.Format
16 | }
17 |
18 | // Generate an incrementing number value for a column.
19 | func (g IncGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
20 | if t.Count == 0 {
21 | t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
22 | return len(a) > len(b)
23 | }))
24 | }
25 |
26 | var line []string
27 | for i := 0; i < t.Count; i++ {
28 | line = append(line, formatValue(g, g.Start+i))
29 | }
30 |
31 | AddTable(t, c.Name, line, files)
32 | return nil
33 | }
34 |
--------------------------------------------------------------------------------
/internal/pkg/generator/inc_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestGenerateIncColumn(t *testing.T) {
12 | cases := []struct {
13 | name string
14 | count int
15 | start int
16 | files map[string]model.CSVFile
17 | exp [][]string
18 | }{
19 | {
20 | name: "with count generates as many as specified by count",
21 | count: 10,
22 | start: 100,
23 | files: map[string]model.CSVFile{},
24 | exp: [][]string{
25 | {"100", "101", "102", "103", "104", "105", "106", "107", "108", "109"},
26 | },
27 | },
28 | {
29 | name: "without count generates as many as the max line",
30 | start: 200,
31 | files: map[string]model.CSVFile{
32 | "table": {
33 | Lines: [][]string{
34 | {"a", "b", "c"},
35 | {"a", "b", "c", "d", "e"},
36 | },
37 | },
38 | },
39 | exp: [][]string{
40 | {"a", "b", "c"},
41 | {"a", "b", "c", "d", "e"},
42 | {"200", "201", "202", "203", "204"},
43 | },
44 | },
45 | }
46 |
47 | for _, c := range cases {
48 | t.Run(c.name, func(t *testing.T) {
49 | table := model.Table{
50 | Name: "table",
51 | Count: c.count,
52 | }
53 |
54 | column := model.Column{
55 | Name: "id",
56 | }
57 |
58 | g := IncGenerator{
59 | Start: c.start,
60 | }
61 |
62 | err := g.Generate(table, column, c.files)
63 | assert.Nil(t, err)
64 | assert.Equal(t,
65 | []string([]string{"id"}),
66 | c.files["table"].Header,
67 | )
68 | assert.Equal(t,
69 | c.exp,
70 | c.files["table"].Lines,
71 | )
72 | })
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/internal/pkg/generator/match_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/samber/lo"
9 | )
10 |
11 | // MatchGenerator provides additional context to a match column.
12 | type MatchGenerator struct {
13 | SourceTable string `yaml:"source_table"`
14 | SourceColumn string `yaml:"source_column"`
15 | SourceValue string `yaml:"source_value"`
16 | MatchColumn string `yaml:"match_column"`
17 | }
18 |
19 | // Generate matches values from a previously generated table and inserts values
20 | // into a new table where match is found.
21 | func (g MatchGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
22 | sourceTable, ok := files[g.SourceTable]
23 | if !ok {
24 | return fmt.Errorf("missing source table %q for match lookup", g.SourceTable)
25 | }
26 |
27 | sourceColumnIndex := lo.IndexOf(sourceTable.Header, g.SourceColumn)
28 | sourceColumn := sourceTable.Lines[sourceColumnIndex]
29 |
30 | valueColumnIndex := lo.IndexOf(sourceTable.Header, g.SourceValue)
31 | valueColumn := sourceTable.Lines[valueColumnIndex]
32 |
33 | sourceMap := map[string]string{}
34 | for i := 0; i < len(sourceColumn); i++ {
35 | sourceMap[sourceColumn[i]] = valueColumn[i]
36 | }
37 |
38 | matchTable, ok := files[t.Name]
39 | if !ok {
40 | return fmt.Errorf("missing destination table %q for match lookup", t.Name)
41 | }
42 |
43 | // Use the match table headers to determine index, as the each processor
44 | // will re-order columns.
45 | _, matchColumnIndex, ok := lo.FindIndexOf(matchTable.Header, func(c string) bool {
46 | return c == g.MatchColumn
47 | })
48 | if !ok {
49 | return fmt.Errorf("missing match column %q in current table", g.MatchColumn)
50 | }
51 |
52 | matchColumn := matchTable.Lines[matchColumnIndex]
53 |
54 | lines := make([]string, len(matchColumn))
55 | for i, matchC := range matchColumn {
56 | if sourceValue, ok := sourceMap[matchC]; ok {
57 | lines[i] = sourceValue
58 | }
59 | }
60 |
61 | AddTable(t, c.Name, lines, files)
62 | return nil
63 | }
64 |
--------------------------------------------------------------------------------
/internal/pkg/generator/match_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/samber/lo"
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func TestGenerateMatchColumn(t *testing.T) {
13 | cases := []struct {
14 | name string
15 | srcTable *model.CSVFile
16 | srcTableName string
17 | srcColumnName string
18 | srcValueName string
19 | dstTable *model.CSVFile
20 | dstColumns []model.Column
21 | dstColumn model.Column
22 | matchColumn string
23 | expColumn []string
24 | expError error
25 | }{
26 | {
27 | name: "generates matching columns",
28 | srcTable: &model.CSVFile{
29 | Name: "significant_events",
30 | Header: []string{"date", "event"},
31 | Lines: [][]string{
32 | {"2023-01-01", "2023-01-03"},
33 | {"abc", "def"},
34 | },
35 | },
36 | srcTableName: "significant_events",
37 | srcColumnName: "date",
38 | srcValueName: "event",
39 | dstTable: &model.CSVFile{
40 | Name: "timeline",
41 | Header: []string{"timeline_date"},
42 | Lines: [][]string{
43 | {"2023-01-01", "2023-01-02", "2023-01-03"},
44 | },
45 | },
46 | dstColumns: []model.Column{
47 | {Name: "timeline_date"},
48 | },
49 | dstColumn: model.Column{
50 | Name: "timeline_event",
51 | },
52 | matchColumn: "timeline_date",
53 | expColumn: []string{"abc", "", "def"},
54 | },
55 | }
56 |
57 | for _, c := range cases {
58 | t.Run(c.name, func(t *testing.T) {
59 | table := model.Table{
60 | Name: c.dstTable.Name,
61 | Columns: c.dstColumns,
62 | }
63 |
64 | column := c.dstColumn
65 |
66 | g := MatchGenerator{
67 | SourceTable: c.srcTableName,
68 | SourceColumn: c.srcColumnName,
69 | SourceValue: c.srcValueName,
70 | MatchColumn: c.matchColumn,
71 | }
72 |
73 | files := map[string]model.CSVFile{}
74 | if c.srcTable != nil {
75 | files[c.srcTable.Name] = *c.srcTable
76 | }
77 | if c.dstTable != nil {
78 | files[c.dstTable.Name] = *c.dstTable
79 | }
80 |
81 | err := g.Generate(table, column, files)
82 | assert.Equal(t, c.expError, err)
83 | if err != nil {
84 | return
85 | }
86 |
87 | actColumnIndex := lo.IndexOf(files[c.dstTable.Name].Header, c.dstColumn.Name)
88 | assert.Equal(t, c.expColumn, files[c.dstTable.Name].Lines[actColumnIndex])
89 | })
90 | }
91 | }
92 |
93 | /*
94 | source table:
95 |
96 | date, event
97 | 2023-01-01, abc
98 | 2023-01-03, def
99 |
100 |
101 | dest table:
102 |
103 | timeline_date, timeline_event
104 | 2023-01-01
105 | 2023-01-02
106 | 2023-01-03
107 |
108 |
109 | outcome:
110 | timeline_date, timeline_event
111 | 2023-01-01, abc
112 | 2023-01-02
113 | 2023-01-03, def
114 | */
115 |
--------------------------------------------------------------------------------
/internal/pkg/generator/placeholder.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import "github.com/brianvoe/gofakeit/v6"
4 |
5 | var (
6 | replacements = map[string]func() any{
7 | "${ach_account}": func() any { return gofakeit.AchAccount() },
8 | "${ach_routing}": func() any { return gofakeit.AchRouting() },
9 | "${adjective_demonstrative}": func() any { return gofakeit.AdjectiveDemonstrative() },
10 | "${adjective_descriptive}": func() any { return gofakeit.AdjectiveDescriptive() },
11 | "${adjective_indefinite}": func() any { return gofakeit.AdjectiveIndefinite() },
12 | "${adjective_interrogative}": func() any { return gofakeit.AdjectiveInterrogative() },
13 | "${adjective_possessive}": func() any { return gofakeit.AdjectivePossessive() },
14 | "${adjective_proper}": func() any { return gofakeit.AdjectiveProper() },
15 | "${adjective_quantitative}": func() any { return gofakeit.AdjectiveQuantitative() },
16 | "${adjective}": func() any { return gofakeit.Adjective() },
17 | "${adverb_degree}": func() any { return gofakeit.AdverbDegree() },
18 | "${adverb_frequency_definite}": func() any { return gofakeit.AdverbFrequencyDefinite() },
19 | "${adverb_frequency_indefinite}": func() any { return gofakeit.AdverbFrequencyIndefinite() },
20 | "${adverb_manner}": func() any { return gofakeit.AdverbManner() },
21 | "${adverb_place}": func() any { return gofakeit.AdverbPlace() },
22 | "${adverb_time_definite}": func() any { return gofakeit.AdverbTimeDefinite() },
23 | "${adverb_time_indefinite}": func() any { return gofakeit.AdverbTimeIndefinite() },
24 | "${adverb}": func() any { return gofakeit.Adverb() },
25 | "${animal_type}": func() any { return gofakeit.AnimalType() },
26 | "${animal}": func() any { return gofakeit.Animal() },
27 | "${app_author}": func() any { return gofakeit.AppAuthor() },
28 | "${app_name}": func() any { return gofakeit.AppName() },
29 | "${app_version}": func() any { return gofakeit.AppVersion() },
30 | "${bitcoin_address}": func() any { return gofakeit.BitcoinAddress() },
31 | "${bitcoin_private_key}": func() any { return gofakeit.BitcoinPrivateKey() },
32 | "${bool}": func() any { return gofakeit.Bool() },
33 | "${breakfast}": func() any { return gofakeit.Breakfast() },
34 | "${bs}": func() any { return gofakeit.BS() },
35 | "${car_fuel_type}": func() any { return gofakeit.CarFuelType() },
36 | "${car_maker}": func() any { return gofakeit.CarMaker() },
37 | "${car_model}": func() any { return gofakeit.CarModel() },
38 | "${car_transmission_type}": func() any { return gofakeit.CarTransmissionType() },
39 | "${car_type}": func() any { return gofakeit.CarType() },
40 | "${chrome_user_agent}": func() any { return gofakeit.ChromeUserAgent() },
41 | "${city}": func() any { return gofakeit.City() },
42 | "${color}": func() any { return gofakeit.Color() },
43 | "${company_suffix}": func() any { return gofakeit.CompanySuffix() },
44 | "${company}": func() any { return gofakeit.Company() },
45 | "${connective_casual}": func() any { return gofakeit.ConnectiveCasual() },
46 | "${connective_complaint}": func() any { return gofakeit.ConnectiveComplaint() },
47 | "${connective_examplify}": func() any { return gofakeit.ConnectiveExamplify() },
48 | "${connective_listing}": func() any { return gofakeit.ConnectiveListing() },
49 | "${connective_time}": func() any { return gofakeit.ConnectiveTime() },
50 | "${connective}": func() any { return gofakeit.Connective() },
51 | "${country_abr}": func() any { return gofakeit.CountryAbr() },
52 | "${country}": func() any { return gofakeit.Country() },
53 | "${credit_card_cvv}": func() any { return gofakeit.CreditCardCvv() },
54 | "${credit_card_exp}": func() any { return gofakeit.CreditCardExp() },
55 | "${credit_card_type}": func() any { return gofakeit.CreditCardType() },
56 | "${currency_long}": func() any { return gofakeit.CurrencyLong() },
57 | "${currency_short}": func() any { return gofakeit.CurrencyShort() },
58 | "${date}": func() any { return gofakeit.Date() },
59 | "${day}": func() any { return gofakeit.Day() },
60 | "${dessert}": func() any { return gofakeit.Dessert() },
61 | "${dinner}": func() any { return gofakeit.Dinner() },
62 | "${domain_name}": func() any { return gofakeit.DomainName() },
63 | "${domain_suffix}": func() any { return gofakeit.DomainSuffix() },
64 | "${email}": func() any { return gofakeit.Email() },
65 | "${emoji}": func() any { return gofakeit.Emoji() },
66 | "${file_extension}": func() any { return gofakeit.FileExtension() },
67 | "${file_mime_type}": func() any { return gofakeit.FileMimeType() },
68 | "${firefox_user_agent}": func() any { return gofakeit.FirefoxUserAgent() },
69 | "${first_name}": func() any { return gofakeit.FirstName() },
70 | "${flipacoin}": func() any { return gofakeit.FlipACoin() },
71 | "${float32}": func() any { return gofakeit.Float32() },
72 | "${float64}": func() any { return gofakeit.Float64() },
73 | "${fruit}": func() any { return gofakeit.Fruit() },
74 | "${gender}": func() any { return gofakeit.Gender() },
75 | "${hexcolor}": func() any { return gofakeit.HexColor() },
76 | "${hobby}": func() any { return gofakeit.Hobby() },
77 | "${hour}": func() any { return gofakeit.Hour() },
78 | "${http_method}": func() any { return gofakeit.HTTPMethod() },
79 | "${http_status_code_simple}": func() any { return gofakeit.HTTPStatusCodeSimple() },
80 | "${http_status_code}": func() any { return gofakeit.HTTPStatusCode() },
81 | "${http_version}": func() any { return gofakeit.HTTPVersion() },
82 | "${int16}": func() any { return gofakeit.Int16() },
83 | "${int32}": func() any { return gofakeit.Int32() },
84 | "${int64}": func() any { return gofakeit.Int64() },
85 | "${int8}": func() any { return gofakeit.Int8() },
86 | "${ipv4_address}": func() any { return gofakeit.IPv4Address() },
87 | "${ipv6_address}": func() any { return gofakeit.IPv6Address() },
88 | "${job_descriptor}": func() any { return gofakeit.JobDescriptor() },
89 | "${job_level}": func() any { return gofakeit.JobLevel() },
90 | "${job_title}": func() any { return gofakeit.JobTitle() },
91 | "${language_abbreviation}": func() any { return gofakeit.LanguageAbbreviation() },
92 | "${language}": func() any { return gofakeit.Language() },
93 | "${last_name}": func() any { return gofakeit.LastName() },
94 | "${latitude}": func() any { return gofakeit.Latitude() },
95 | "${longitude}": func() any { return gofakeit.Longitude() },
96 | "${lunch}": func() any { return gofakeit.Lunch() },
97 | "${mac_address}": func() any { return gofakeit.MacAddress() },
98 | "${minute}": func() any { return gofakeit.Minute() },
99 | "${month_string}": func() any { return gofakeit.MonthString() },
100 | "${month}": func() any { return gofakeit.Month() },
101 | "${name_prefix}": func() any { return gofakeit.NamePrefix() },
102 | "${name_suffix}": func() any { return gofakeit.NameSuffix() },
103 | "${name}": func() any { return gofakeit.Name() },
104 | "${nanosecond}": func() any { return gofakeit.NanoSecond() },
105 | "${nicecolors}": func() any { return gofakeit.NiceColors() },
106 | "${noun_abstract}": func() any { return gofakeit.NounAbstract() },
107 | "${noun_collective_animal}": func() any { return gofakeit.NounCollectiveAnimal() },
108 | "${noun_collective_people}": func() any { return gofakeit.NounCollectivePeople() },
109 | "${noun_collective_thing}": func() any { return gofakeit.NounCollectiveThing() },
110 | "${noun_common}": func() any { return gofakeit.NounCommon() },
111 | "${noun_concrete}": func() any { return gofakeit.NounConcrete() },
112 | "${noun_countable}": func() any { return gofakeit.NounCountable() },
113 | "${noun_uncountable}": func() any { return gofakeit.NounUncountable() },
114 | "${noun}": func() any { return gofakeit.Noun() },
115 | "${opera_user_agent}": func() any { return gofakeit.OperaUserAgent() },
116 | "${password}": func() any { return gofakeit.Password(true, true, true, true, true, 25) },
117 | "${pet_name}": func() any { return gofakeit.PetName() },
118 | "${phone_formatted}": func() any { return gofakeit.PhoneFormatted() },
119 | "${phone}": func() any { return gofakeit.Phone() },
120 | "${phrase}": func() any { return gofakeit.Phrase() },
121 | "${preposition_compound}": func() any { return gofakeit.PrepositionCompound() },
122 | "${preposition_double}": func() any { return gofakeit.PrepositionDouble() },
123 | "${preposition_simple}": func() any { return gofakeit.PrepositionSimple() },
124 | "${preposition}": func() any { return gofakeit.Preposition() },
125 | "${programming_language}": func() any { return gofakeit.ProgrammingLanguage() },
126 | "${pronoun_demonstrative}": func() any { return gofakeit.PronounDemonstrative() },
127 | "${pronoun_interrogative}": func() any { return gofakeit.PronounInterrogative() },
128 | "${pronoun_object}": func() any { return gofakeit.PronounObject() },
129 | "${pronoun_personal}": func() any { return gofakeit.PronounPersonal() },
130 | "${pronoun_possessive}": func() any { return gofakeit.PronounPossessive() },
131 | "${pronoun_reflective}": func() any { return gofakeit.PronounReflective() },
132 | "${pronoun_relative}": func() any { return gofakeit.PronounRelative() },
133 | "${pronoun}": func() any { return gofakeit.Pronoun() },
134 | "${quote}": func() any { return gofakeit.Quote() },
135 | "${rgbcolor}": func() any { return gofakeit.RGBColor() },
136 | "${safari_user_agent}": func() any { return gofakeit.SafariUserAgent() },
137 | "${safecolor}": func() any { return gofakeit.SafeColor() },
138 | "${second}": func() any { return gofakeit.Second() },
139 | "${snack}": func() any { return gofakeit.Snack() },
140 | "${ssn}": func() any { return gofakeit.SSN() },
141 | "${state_abr}": func() any { return gofakeit.StateAbr() },
142 | "${state}": func() any { return gofakeit.State() },
143 | "${street_name}": func() any { return gofakeit.StreetName() },
144 | "${street_number}": func() any { return gofakeit.StreetNumber() },
145 | "${street_prefix}": func() any { return gofakeit.StreetPrefix() },
146 | "${street_suffix}": func() any { return gofakeit.StreetSuffix() },
147 | "${street}": func() any { return gofakeit.Street() },
148 | "${time_zone_abv}": func() any { return gofakeit.TimeZoneAbv() },
149 | "${time_zone_full}": func() any { return gofakeit.TimeZoneFull() },
150 | "${time_zone_offset}": func() any { return gofakeit.TimeZoneOffset() },
151 | "${time_zone_region}": func() any { return gofakeit.TimeZoneRegion() },
152 | "${time_zone}": func() any { return gofakeit.TimeZone() },
153 | "${uint128_hex}": func() any { return gofakeit.HexUint128() },
154 | "${uint16_hex}": func() any { return gofakeit.HexUint16() },
155 | "${uint16}": func() any { return gofakeit.Uint16() },
156 | "${uint256_hex}": func() any { return gofakeit.HexUint256() },
157 | "${uint32_hex}": func() any { return gofakeit.HexUint32() },
158 | "${uint32}": func() any { return gofakeit.Uint32() },
159 | "${uint64_hex}": func() any { return gofakeit.HexUint64() },
160 | "${uint64}": func() any { return gofakeit.Uint64() },
161 | "${uint8_hex}": func() any { return gofakeit.HexUint8() },
162 | "${uint8}": func() any { return gofakeit.Uint8() },
163 | "${url}": func() any { return gofakeit.URL() },
164 | "${user_agent}": func() any { return gofakeit.UserAgent() },
165 | "${username}": func() any { return gofakeit.Username() },
166 | "${uuid}": func() any { return gofakeit.UUID() },
167 | "${vegetable}": func() any { return gofakeit.Vegetable() },
168 | "${verb_action}": func() any { return gofakeit.VerbAction() },
169 | "${verb_helping}": func() any { return gofakeit.VerbHelping() },
170 | "${verb_linking}": func() any { return gofakeit.VerbLinking() },
171 | "${verb}": func() any { return gofakeit.Verb() },
172 | "${weekday}": func() any { return gofakeit.WeekDay() },
173 | "${word}": func() any { return gofakeit.Word() },
174 | "${year}": func() any { return gofakeit.Year() },
175 | "${zip}": func() any { return gofakeit.Zip() },
176 | }
177 | )
178 |
--------------------------------------------------------------------------------
/internal/pkg/generator/range_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 | "strconv"
6 | "time"
7 |
8 | "github.com/codingconcepts/dg/internal/pkg/model"
9 | "github.com/samber/lo"
10 | )
11 |
12 | // RangeGenerator provides additional context to a range column.
13 | type RangeGenerator struct {
14 | Type string `yaml:"type"`
15 | From string `yaml:"from"`
16 | To string `yaml:"to"`
17 | Step string `yaml:"step"`
18 | Format string `yaml:"format"`
19 | }
20 |
21 | // Generate sequential data between a given start and end range.
22 | func (g RangeGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
23 | count := len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
24 | return len(a) > len(b)
25 | }))
26 |
27 | if count == 0 {
28 | count = t.Count
29 | }
30 |
31 | switch g.Type {
32 | case "date":
33 | lines, err := g.generateDateSlice(count)
34 | if err != nil {
35 | return fmt.Errorf("generating date slice: %w", err)
36 | }
37 |
38 | AddTable(t, c.Name, lines, files)
39 | return nil
40 |
41 | case "int":
42 | lines, err := g.generateIntSlice(count)
43 | if err != nil {
44 | return fmt.Errorf("generating int slice: %w", err)
45 | }
46 |
47 | AddTable(t, c.Name, lines, files)
48 | return nil
49 |
50 | default:
51 | return fmt.Errorf("%q is not a valid range type", g.Type)
52 | }
53 | }
54 |
55 | func (g RangeGenerator) generateDateSlice(count int) ([]string, error) {
56 | // Validate that we have everything we need.
57 | if count == 0 && g.Step == "" {
58 | return nil, fmt.Errorf("either a count or a step must be provided to a date range generator")
59 | }
60 |
61 | from, err := time.Parse(g.Format, g.From)
62 | if err != nil {
63 | return nil, fmt.Errorf("parsing from date: %w", err)
64 | }
65 |
66 | to, err := time.Parse(g.Format, g.To)
67 | if err != nil {
68 | return nil, fmt.Errorf("parsing to date: %w", err)
69 | }
70 |
71 | var step time.Duration
72 | if count > 0 {
73 | step = to.Sub(from) / time.Duration(count)
74 | } else {
75 | if step, err = time.ParseDuration(g.Step); err != nil {
76 | return nil, fmt.Errorf("parsing step: %w", err)
77 | }
78 | }
79 |
80 | var s []string
81 | for i := from; i.Before(to); i = i.Add(step) {
82 | s = append(s, i.Format(g.Format))
83 | }
84 |
85 | return s, nil
86 | }
87 |
88 | func (g RangeGenerator) generateIntSlice(count int) ([]string, error) {
89 | // Validate that we have everything we need.
90 | if count == 0 && g.Step == "" {
91 | return nil, fmt.Errorf("either a count or a step must be provided to an int range generator")
92 | }
93 |
94 | from, err := strconv.Atoi(g.From)
95 | if err != nil {
96 | return nil, fmt.Errorf("parsing from number: %w", err)
97 | }
98 |
99 | var to int
100 | if g.To == "" {
101 | to = from + count - 1
102 | } else {
103 | if to, err = strconv.Atoi(g.To); err != nil {
104 | return nil, fmt.Errorf("parsing to number: %w", err)
105 | }
106 | }
107 |
108 | var step int
109 | if count > 0 {
110 | step = (to - from) / (count - 1)
111 | } else {
112 | if step, err = strconv.Atoi(g.Step); err != nil {
113 | return nil, fmt.Errorf("parsing step number: %w", err)
114 | }
115 | }
116 |
117 | var s []string
118 | for i := from; i <= to; i += step {
119 | s = append(s, strconv.Itoa(i))
120 | }
121 |
122 | return s, nil
123 | }
124 |
--------------------------------------------------------------------------------
/internal/pkg/generator/range_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestGenerateRangeColumn(t *testing.T) {
12 | cases := []struct {
13 | name string
14 | files map[string]model.CSVFile
15 | rtype string
16 | count int
17 | from string
18 | to string
19 | step string
20 | format string
21 | expLines []string
22 | expErr error
23 | }{
24 | {
25 | name: "generates date range for existing table",
26 | files: map[string]model.CSVFile{
27 | "table": {
28 | Lines: [][]string{
29 | {"a"},
30 | {"a", "b"},
31 | {"a", "b", "c"},
32 | },
33 | },
34 | },
35 | rtype: "date",
36 | count: 5,
37 | from: "2023-01-01",
38 | to: "2023-02-01",
39 | step: "24h",
40 | format: "2006-01-02",
41 | expLines: []string{
42 | "2023-01-01",
43 | "2023-01-11",
44 | "2023-01-21",
45 | },
46 | },
47 | {
48 | name: "generates date range for count",
49 | files: map[string]model.CSVFile{},
50 | rtype: "date",
51 | count: 4,
52 | from: "2023-01-01",
53 | to: "2023-02-01",
54 | step: "24h",
55 | format: "2006-01-02",
56 | expLines: []string{
57 | "2023-01-01",
58 | "2023-01-08",
59 | "2023-01-16",
60 | "2023-01-24",
61 | },
62 | },
63 | {
64 | name: "generates date range for step",
65 | files: map[string]model.CSVFile{},
66 | rtype: "date",
67 | from: "2023-01-01",
68 | to: "2023-02-01",
69 | step: "72h",
70 | format: "2006-01-02",
71 | expLines: []string{
72 | "2023-01-01",
73 | "2023-01-04",
74 | "2023-01-07",
75 | "2023-01-10",
76 | "2023-01-13",
77 | "2023-01-16",
78 | "2023-01-19",
79 | "2023-01-22",
80 | "2023-01-25",
81 | "2023-01-28",
82 | "2023-01-31",
83 | },
84 | },
85 | {
86 | name: "generates date range for count",
87 | files: map[string]model.CSVFile{},
88 | rtype: "int",
89 | count: 10,
90 | from: "1",
91 | expLines: []string{
92 | "1",
93 | "2",
94 | "3",
95 | "4",
96 | "5",
97 | "6",
98 | "7",
99 | "8",
100 | "9",
101 | "10",
102 | },
103 | },
104 | {
105 | name: "generates int range for existing table",
106 | files: map[string]model.CSVFile{
107 | "table": {
108 | Lines: [][]string{
109 | {"a"},
110 | {"a", "b"},
111 | {"a", "b", "c"},
112 | },
113 | },
114 | },
115 | rtype: "int",
116 | count: 5,
117 | from: "1",
118 | to: "5",
119 | expLines: []string{
120 | "1",
121 | "3",
122 | "5",
123 | },
124 | },
125 | {
126 | name: "generates int range for count",
127 | files: map[string]model.CSVFile{},
128 | rtype: "int",
129 | count: 4,
130 | from: "10",
131 | to: "40",
132 | step: "10",
133 | expLines: []string{
134 | "10",
135 | "20",
136 | "30",
137 | "40",
138 | },
139 | },
140 | {
141 | name: "generates int range for const",
142 | files: map[string]model.CSVFile{},
143 | rtype: "int",
144 | count: 4,
145 | from: "1",
146 | step: "1",
147 | expLines: []string{
148 | "1",
149 | "2",
150 | "3",
151 | "4",
152 | },
153 | },
154 | }
155 |
156 | for _, c := range cases {
157 | t.Run(c.name, func(t *testing.T) {
158 | table := model.Table{
159 | Name: "table",
160 | Count: c.count,
161 | }
162 |
163 | column := model.Column{
164 | Name: "col",
165 | }
166 |
167 | g := RangeGenerator{
168 | Type: c.rtype,
169 | From: c.from,
170 | To: c.to,
171 | Step: c.step,
172 | Format: c.format,
173 | }
174 |
175 | files := c.files
176 |
177 | err := g.Generate(table, column, files)
178 | assert.Equal(t, c.expErr, err)
179 |
180 | if err != nil {
181 | return
182 | }
183 |
184 | assert.Equal(t, c.expLines, files["table"].Lines[len(files["table"].Lines)-1])
185 | })
186 | }
187 | }
188 |
189 | func TestGenerateDateSlice(t *testing.T) {
190 | cases := []struct {
191 | name string
192 | from string
193 | to string
194 | format string
195 | count int
196 | step string
197 | expSlice []string
198 | expError string
199 | }{
200 | {
201 | name: "no count or step",
202 | expError: "either a count or a step must be provided to a date range generator",
203 | },
204 | {
205 | name: "count",
206 | count: 10,
207 | from: "2023-01-01",
208 | to: "2023-01-10",
209 | format: "2006-01-02",
210 | expSlice: []string{
211 | "2023-01-01", "2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-05", "2023-01-06", "2023-01-07", "2023-01-08", "2023-01-09",
212 | },
213 | },
214 | {
215 | name: "step",
216 | step: "24h",
217 | from: "2023-01-10",
218 | to: "2023-01-20",
219 | format: "2006-01-02",
220 | expSlice: []string{
221 | "2023-01-10", "2023-01-11", "2023-01-12", "2023-01-13", "2023-01-14", "2023-01-15", "2023-01-16", "2023-01-17", "2023-01-18", "2023-01-19",
222 | },
223 | },
224 | {
225 | name: "invalid format",
226 | count: 10,
227 | from: "2023-01-01",
228 | to: "2023-01-10",
229 | format: "abc",
230 | expError: `parsing from date: parsing time "2023-01-01" as "abc": cannot parse "2023-01-01" as "abc"`,
231 | },
232 | {
233 | name: "invalid from date",
234 | count: 10,
235 | from: "abc",
236 | format: "2006-01-02",
237 |
238 | to: "2023-01-10",
239 | expError: `parsing from date: parsing time "abc" as "2006-01-02": cannot parse "abc" as "2006"`,
240 | },
241 | {
242 | name: "invalid to date",
243 | count: 10,
244 | from: "2023-01-01",
245 | to: "abc",
246 | format: "2006-01-02",
247 | expError: `parsing to date: parsing time "abc" as "2006-01-02": cannot parse "abc" as "2006"`,
248 | },
249 | {
250 | name: "invalid step",
251 | step: "abc",
252 | from: "2023-01-01",
253 | to: "2023-01-10",
254 | format: "2006-01-02",
255 | expError: `parsing step: time: invalid duration "abc"`,
256 | },
257 | }
258 |
259 | for _, c := range cases {
260 | t.Run(c.name, func(t *testing.T) {
261 | g := RangeGenerator{
262 | From: c.from,
263 | To: c.to,
264 | Format: c.format,
265 | Step: c.step,
266 | }
267 |
268 | actSlice, actErr := g.generateDateSlice(c.count)
269 | if c.expError != "" {
270 | assert.Equal(t, c.expError, actErr.Error())
271 | return
272 | }
273 |
274 | assert.Equal(t, c.expSlice, actSlice)
275 | })
276 | }
277 | }
278 |
--------------------------------------------------------------------------------
/internal/pkg/generator/ref_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 | "github.com/codingconcepts/dg/internal/pkg/random"
8 |
9 | "github.com/samber/lo"
10 | )
11 |
12 | // RefGenerator provides additional context to a ref column.
13 | type RefGenerator struct {
14 | Table string `yaml:"table"`
15 | Column string `yaml:"column"`
16 | }
17 |
18 | // Generate looks to previously generated table data and references that when generating data
19 | // for the given table.
20 | func (g RefGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
21 | if t.Count == 0 {
22 | t.Count = len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
23 | return len(a) > len(b)
24 | }))
25 | }
26 |
27 | table, ok := files[g.Table]
28 | if !ok {
29 | return fmt.Errorf("missing table %q for ref lookup", g.Table)
30 | }
31 |
32 | colIndex := lo.IndexOf(table.Header, g.Column)
33 | column := table.Lines[colIndex]
34 |
35 | var line []string
36 | for i := 0; i < t.Count; i++ {
37 | line = append(line, column[random.Intn(len(column))])
38 | }
39 |
40 | AddTable(t, c.Name, line, files)
41 | return nil
42 | }
43 |
--------------------------------------------------------------------------------
/internal/pkg/generator/ref_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestGenerateRefColumn(t *testing.T) {
12 | table := model.Table{
13 | Name: "pet",
14 | Count: 2,
15 | }
16 |
17 | column := model.Column{
18 | Name: "person_id",
19 | }
20 |
21 | g := RefGenerator{
22 | Table: "person",
23 | Column: "id",
24 | }
25 |
26 | files := map[string]model.CSVFile{
27 | "person": {
28 | Header: []string{"id"},
29 | Lines: [][]string{{"ce9af887-37eb-4e08-9790-4f481b0fa594"}},
30 | },
31 | }
32 | err := g.Generate(table, column, files)
33 | assert.Nil(t, err)
34 | assert.Equal(t, "ce9af887-37eb-4e08-9790-4f481b0fa594", files["pet"].Lines[0][0])
35 | assert.Equal(t, "ce9af887-37eb-4e08-9790-4f481b0fa594", files["pet"].Lines[0][1])
36 | }
37 |
--------------------------------------------------------------------------------
/internal/pkg/generator/set_generator.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 | "github.com/codingconcepts/dg/internal/pkg/random"
8 | "github.com/samber/lo"
9 | )
10 |
11 | // SetGenerator provides additional context to a set column.
12 | type SetGenerator struct {
13 | Values []string `yaml:"values"`
14 | Weights []int `yaml:"weights"`
15 | }
16 |
17 | // Generate selects between a set of values for a given table.
18 | func (g SetGenerator) Generate(t model.Table, c model.Column, files map[string]model.CSVFile) error {
19 | if len(g.Values) == 0 {
20 | return fmt.Errorf("no values provided for set generator")
21 | }
22 |
23 | count := len(lo.MaxBy(files[t.Name].Lines, func(a, b []string) bool {
24 | return len(a) > len(b)
25 | }))
26 |
27 | if count == 0 {
28 | count = t.Count
29 | }
30 |
31 | var line []string
32 | if len(g.Weights) > 0 {
33 | items, err := g.buildWeightedItems()
34 | if err != nil {
35 | return fmt.Errorf("making weighted items collection: %w", err)
36 | }
37 |
38 | for i := 0; i < count; i++ {
39 | line = append(line, items.choose())
40 | }
41 | } else {
42 | for i := 0; i < count; i++ {
43 | line = append(line, g.Values[random.Intn(len(g.Values))])
44 | }
45 | }
46 |
47 | AddTable(t, c.Name, line, files)
48 | return nil
49 | }
50 |
51 | func (g SetGenerator) buildWeightedItems() (weightedItems, error) {
52 | if len(g.Values) != len(g.Weights) {
53 | return weightedItems{}, fmt.Errorf("set values and weights need to be the same")
54 | }
55 |
56 | weightedItems := make([]weightedItem, len(g.Values))
57 | for i, v := range g.Values {
58 | weightedItems = append(weightedItems, weightedItem{
59 | Value: v,
60 | Weight: g.Weights[i],
61 | })
62 | }
63 |
64 | return makeWeightedItems(weightedItems), nil
65 | }
66 |
--------------------------------------------------------------------------------
/internal/pkg/generator/set_generator_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/codingconcepts/dg/internal/pkg/model"
7 |
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | func TestGenerateSetColumn(t *testing.T) {
12 | table := model.Table{
13 | Name: "table",
14 | Count: 10,
15 | }
16 |
17 | column := model.Column{
18 | Name: "id",
19 | }
20 |
21 | g := SetGenerator{
22 | Values: []string{"a", "b", "c"},
23 | Weights: []int{0, 1, 0},
24 | }
25 |
26 | files := map[string]model.CSVFile{}
27 |
28 | err := g.Generate(table, column, files)
29 | assert.Nil(t, err)
30 | assert.Equal(t,
31 | []string([]string{"id"}),
32 | files["table"].Header,
33 | )
34 | assert.Equal(t,
35 | [][]string{{"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"}},
36 | files["table"].Lines,
37 | )
38 | }
39 |
--------------------------------------------------------------------------------
/internal/pkg/generator/weighted_random.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "github.com/brianvoe/gofakeit/v6"
5 | "github.com/samber/lo"
6 | )
7 |
8 | type weightedItem struct {
9 | Value string
10 | Weight int
11 | }
12 |
13 | type weightedItems struct {
14 | items []weightedItem
15 | totalWeight int
16 | }
17 |
18 | func makeWeightedItems(items []weightedItem) weightedItems {
19 | wi := weightedItems{
20 | items: items,
21 | }
22 |
23 | wi.totalWeight = lo.SumBy(items, func(wi weightedItem) int {
24 | return wi.Weight
25 | })
26 |
27 | return wi
28 | }
29 |
30 | func (wi weightedItems) choose() string {
31 | randomWeight := gofakeit.IntRange(1, wi.totalWeight)
32 | for _, i := range wi.items {
33 | randomWeight -= i.Weight
34 | if randomWeight <= 0 {
35 | return i.Value
36 | }
37 | }
38 |
39 | return ""
40 | }
41 |
--------------------------------------------------------------------------------
/internal/pkg/generator/weighted_random_test.go:
--------------------------------------------------------------------------------
1 | package generator
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestMakeWeightedItems(t *testing.T) {
10 | items := makeWeightedItems(
11 | []weightedItem{
12 | {Value: "a", Weight: 10},
13 | {Value: "b", Weight: 20},
14 | {Value: "c", Weight: 30},
15 | },
16 | )
17 |
18 | assert.Equal(t, 60, items.totalWeight)
19 | }
20 |
21 | func TestChoose(t *testing.T) {
22 | cases := []struct {
23 | name string
24 | items []weightedItem
25 | exp []string
26 | }{
27 | {
28 | name: "3 items 1 has all the weight",
29 | items: []weightedItem{
30 | {Value: "a", Weight: 100},
31 | {Value: "b", Weight: 0},
32 | {Value: "c", Weight: 0},
33 | },
34 | exp: []string{"a", "a", "a", "a", "a", "a", "a", "a", "a", "a"},
35 | },
36 | }
37 |
38 | for _, c := range cases {
39 | t.Run(c.name, func(t *testing.T) {
40 | items := makeWeightedItems(c.items)
41 |
42 | var act []string
43 | for i := 0; i < 10; i++ {
44 | act = append(act, items.choose())
45 | }
46 |
47 | assert.Equal(t, c.exp, act)
48 | })
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/internal/pkg/model/config.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "fmt"
5 | "io"
6 |
7 | "gopkg.in/yaml.v3"
8 | )
9 |
10 | // Config represents the entire contents of a config file.
11 | type Config struct {
12 | Tables []Table `yaml:"tables"`
13 | Inputs []Input `yaml:"inputs"`
14 | }
15 |
16 | // Table represents the instructions to create one CSV file.
17 | type Table struct {
18 | Name string `yaml:"name"`
19 | Count int `yaml:"count"`
20 | Suppress bool `yaml:"suppress"`
21 | UniqueColumns []string `yaml:"unique_columns"`
22 | Columns []Column `yaml:"columns"`
23 | }
24 |
25 | // Column represents the instructions to populate one CSV file column.
26 | type Column struct {
27 | Name string `yaml:"name"`
28 | Type string `yaml:"type"`
29 | Suppress bool `yaml:"suppress"`
30 | Generator RawMessage `yaml:"processor"`
31 | }
32 |
33 | // Input represents a data source provided by the user.
34 | type Input struct {
35 | Name string `yaml:"name"`
36 | Type string `yaml:"type"`
37 | Source RawMessage `yaml:"source"`
38 | }
39 |
40 | // Load config from a file
41 | func LoadConfig(r io.Reader) (Config, error) {
42 | var c Config
43 | if err := yaml.NewDecoder(r).Decode(&c); err != nil {
44 | return Config{}, fmt.Errorf("parsing file: %w", err)
45 | }
46 |
47 | return c, nil
48 | }
49 |
--------------------------------------------------------------------------------
/internal/pkg/model/config_test.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "strings"
5 | "testing"
6 |
7 | "github.com/stretchr/testify/assert"
8 | )
9 |
10 | func TestLoadConfig(t *testing.T) {
11 | y := `
12 | inputs:
13 | - name: my_data
14 | type: csv
15 | source:
16 | file_name: my_data.csv
17 |
18 | tables:
19 | - name: person
20 | count: 100
21 | columns:
22 | - name: id
23 | type: inc
24 | processor:
25 | start: 1
26 | format: "P%03d"
27 | `
28 |
29 | config, err := LoadConfig(strings.NewReader(y))
30 | assert.Nil(t, err)
31 |
32 | exp := Config{
33 | Inputs: []Input{
34 | {
35 | Name: "my_data",
36 | Type: "csv",
37 | Source: ToRawMessage(t, SourceCSV{
38 | FileName: "my_data.csv",
39 | }),
40 | },
41 | },
42 | Tables: []Table{
43 | {
44 | Name: "person",
45 | Count: 100,
46 | Columns: []Column{
47 | {
48 | Name: "id",
49 | Type: "inc",
50 | Generator: ToRawMessage(t, map[string]any{
51 | "start": 1,
52 | "format": "P%03d",
53 | }),
54 | },
55 | },
56 | },
57 | },
58 | }
59 |
60 | assert.Equal(t, exp.Inputs[0].Name, config.Inputs[0].Name)
61 | assert.Equal(t, exp.Inputs[0].Type, config.Inputs[0].Type)
62 |
63 | var expSource SourceCSV
64 | assert.Nil(t, exp.Inputs[0].Source.UnmarshalFunc(&expSource))
65 |
66 | var actSource SourceCSV
67 | assert.Nil(t, config.Inputs[0].Source.UnmarshalFunc(&actSource))
68 |
69 | assert.Equal(t, expSource, actSource)
70 |
71 | assert.Equal(t, exp.Tables[0].Name, config.Tables[0].Name)
72 | assert.Equal(t, exp.Tables[0].Count, config.Tables[0].Count)
73 | assert.Equal(t, exp.Tables[0].Columns[0].Name, config.Tables[0].Columns[0].Name)
74 | assert.Equal(t, exp.Tables[0].Columns[0].Type, config.Tables[0].Columns[0].Type)
75 |
76 | var expProcessor map[string]any
77 | assert.Nil(t, exp.Tables[0].Columns[0].Generator.UnmarshalFunc(&expProcessor))
78 |
79 | var actProcessor map[string]any
80 | assert.Nil(t, config.Tables[0].Columns[0].Generator.UnmarshalFunc(&actProcessor))
81 |
82 | assert.Equal(t, expProcessor, actProcessor)
83 | }
84 |
--------------------------------------------------------------------------------
/internal/pkg/model/csv_file.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "github.com/samber/lo"
5 | )
6 |
7 | // CSVFile represents the content of a CSV file.
8 | type CSVFile struct {
9 | Name string
10 | Header []string
11 | Lines [][]string
12 | UniqueColumns []string
13 | Output bool
14 | }
15 |
16 | // Unique removes any duplicates from the CSVFile's lines.
17 | func (c *CSVFile) Unique() [][]string {
18 | uniqueColumnIndexes := uniqueIndexes(c.Header, c.UniqueColumns)
19 |
20 | uniqueValues := map[string]struct{}{}
21 | var uniqueLines [][]string
22 |
23 | for i := 0; i < len(c.Lines); i++ {
24 | key := uniqueKey(uniqueColumnIndexes, c.Lines[i])
25 |
26 | if _, ok := uniqueValues[key]; !ok {
27 | uniqueLines = append(uniqueLines, c.Lines[i])
28 | uniqueValues[key] = struct{}{}
29 | }
30 | }
31 |
32 | return uniqueLines
33 | }
34 |
35 | func uniqueIndexes(header, uniqueColumns []string) []int {
36 | indexes := []int{}
37 |
38 | for i, h := range header {
39 | if lo.Contains(uniqueColumns, h) {
40 | indexes = append(indexes, i)
41 | }
42 | }
43 |
44 | return indexes
45 | }
46 |
47 | func uniqueKey(indexes []int, line []string) string {
48 | output := ""
49 |
50 | for i, col := range line {
51 | if lo.Contains(indexes, i) {
52 | output += col
53 | } else {
54 | output += "-"
55 | }
56 | }
57 |
58 | return output
59 | }
60 |
--------------------------------------------------------------------------------
/internal/pkg/model/csv_file_test.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "testing"
5 |
6 | "github.com/stretchr/testify/assert"
7 | )
8 |
9 | func TestUnique(t *testing.T) {
10 | cases := []struct {
11 | name string
12 | uniqueColumns []string
13 | exp [][]string
14 | }{
15 | {
16 | name: "1 column unique",
17 | uniqueColumns: []string{"col_1"},
18 | exp: [][]string{
19 | {"a", "d", "g"},
20 | {"b", "d", "g"},
21 | {"c", "d", "g"},
22 | },
23 | },
24 | {
25 | name: "2 column unique",
26 | uniqueColumns: []string{"col_1", "col_2"},
27 | exp: [][]string{
28 | {"a", "d", "g"},
29 | {"b", "d", "g"},
30 | {"c", "d", "g"},
31 | {"a", "e", "g"},
32 | {"b", "e", "g"},
33 | {"c", "e", "g"},
34 | {"a", "f", "g"},
35 | {"b", "f", "g"},
36 | {"c", "f", "g"},
37 | },
38 | },
39 | {
40 | name: "3 column unique",
41 | uniqueColumns: []string{"col_1", "col_2", "col_3"},
42 | exp: [][]string{
43 | {"a", "d", "g"},
44 | {"b", "d", "g"},
45 | {"c", "d", "g"},
46 | {"a", "e", "g"},
47 | {"b", "e", "g"},
48 | {"c", "e", "g"},
49 | {"a", "f", "g"},
50 | {"b", "f", "g"},
51 | {"c", "f", "g"},
52 | {"a", "d", "h"},
53 | {"b", "d", "h"},
54 | {"c", "d", "h"},
55 | {"a", "e", "h"},
56 | {"b", "e", "h"},
57 | {"c", "e", "h"},
58 | {"a", "f", "h"},
59 | {"b", "f", "h"},
60 | {"c", "f", "h"},
61 | {"a", "d", "i"},
62 | {"b", "d", "i"},
63 | {"c", "d", "i"},
64 | {"a", "e", "i"},
65 | {"b", "e", "i"},
66 | {"c", "e", "i"},
67 | {"a", "f", "i"},
68 | {"b", "f", "i"},
69 | {"c", "f", "i"},
70 | },
71 | },
72 | }
73 |
74 | for _, c := range cases {
75 | t.Run(c.name, func(t *testing.T) {
76 | file := CSVFile{
77 | Header: []string{"col_1", "col_2", "col_3"},
78 | UniqueColumns: c.uniqueColumns,
79 | Lines: [][]string{
80 | {"a", "d", "g"},
81 | {"b", "d", "g"},
82 | {"c", "d", "g"},
83 | {"a", "e", "g"},
84 | {"b", "e", "g"},
85 | {"c", "e", "g"},
86 | {"a", "f", "g"},
87 | {"b", "f", "g"},
88 | {"c", "f", "g"},
89 | {"a", "d", "h"},
90 | {"b", "d", "h"},
91 | {"c", "d", "h"},
92 | {"a", "e", "h"},
93 | {"b", "e", "h"},
94 | {"c", "e", "h"},
95 | {"a", "f", "h"},
96 | {"b", "f", "h"},
97 | {"c", "f", "h"},
98 | {"a", "d", "i"},
99 | {"b", "d", "i"},
100 | {"c", "d", "i"},
101 | {"a", "e", "i"},
102 | {"b", "e", "i"},
103 | {"c", "e", "i"},
104 | {"a", "f", "i"},
105 | {"b", "f", "i"},
106 | {"c", "f", "i"},
107 | },
108 | }
109 |
110 | act := file.Unique()
111 |
112 | assert.Equal(t, c.exp, act)
113 | })
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/internal/pkg/model/raw_message.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "bytes"
5 | "testing"
6 |
7 | "gopkg.in/yaml.v3"
8 | )
9 |
10 | // RawMessage does what json.RawMessage does but for YAML.
11 | type RawMessage struct {
12 | UnmarshalFunc func(interface{}) error
13 | }
14 |
15 | func (msg *RawMessage) UnmarshalYAML(unmarshal func(interface{}) error) error {
16 | msg.UnmarshalFunc = unmarshal
17 | return nil
18 | }
19 |
20 | // ToRawMessage converts an object into a model.RawMessage for testing purposes.
21 | func ToRawMessage(t *testing.T, v any) RawMessage {
22 | buf := &bytes.Buffer{}
23 | if err := yaml.NewEncoder(buf).Encode(v); err != nil {
24 | t.Fatalf("error encoding to yaml: %v", err)
25 | }
26 |
27 | var rawMessage RawMessage
28 | if err := yaml.NewDecoder(buf).Decode(&rawMessage); err != nil {
29 | t.Fatalf("error decoding from yaml: %v", err)
30 | }
31 |
32 | return rawMessage
33 | }
34 |
--------------------------------------------------------------------------------
/internal/pkg/model/raw_message_test.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | import (
4 | "log"
5 | "strings"
6 | "testing"
7 |
8 | "github.com/stretchr/testify/assert"
9 | "gopkg.in/yaml.v3"
10 | )
11 |
12 | func TestRawMessageUnmarshal(t *testing.T) {
13 | type test struct {
14 | R RawMessage `yaml:"r"`
15 | }
16 |
17 | y := `r: hello raw message`
18 |
19 | var tst test
20 | if err := yaml.NewDecoder(strings.NewReader(y)).Decode(&tst); err != nil {
21 | t.Fatalf("error decoding yaml: %v", err)
22 | }
23 |
24 | var s string
25 | if err := tst.R.UnmarshalFunc(&s); err != nil {
26 | log.Fatalf("error decoding yaml: %v", err)
27 | }
28 |
29 | assert.Equal(t, "hello raw message", s)
30 | }
31 |
--------------------------------------------------------------------------------
/internal/pkg/model/source.go:
--------------------------------------------------------------------------------
1 | package model
2 |
3 | // SourceCSV represents a CSV file source.
4 | type SourceCSV struct {
5 | FileName string `yaml:"file_name"`
6 | }
7 |
--------------------------------------------------------------------------------
/internal/pkg/random/xorshift.go:
--------------------------------------------------------------------------------
1 | package random
2 |
3 | import "time"
4 |
5 | var (
6 | r = newSplitMix64(time.Now().UnixNano())
7 | )
8 |
9 | type splitMix64 struct {
10 | s uint64
11 | }
12 |
13 | func newSplitMix64(seed int64) *splitMix64 {
14 | return &splitMix64{
15 | s: uint64(seed),
16 | }
17 | }
18 |
19 | // Intn returns a non-negative pseudo-random int.
20 | func Intn(n int) int {
21 | return int(r.uint64()&(1<<63-1)) % n
22 |
23 | }
24 |
25 | func (x *splitMix64) uint64() uint64 {
26 | x.s = x.s + uint64(0x9E3779B97F4A7C15)
27 | z := x.s
28 | z = (z ^ (z >> 30)) * uint64(0xBF58476D1CE4E5B9)
29 | z = (z ^ (z >> 27)) * uint64(0x94D049BB133111EB)
30 | return z ^ (z >> 31)
31 | }
32 |
--------------------------------------------------------------------------------
/internal/pkg/source/csv_source.go:
--------------------------------------------------------------------------------
1 | package source
2 |
3 | import (
4 | "encoding/csv"
5 | "fmt"
6 | "io"
7 | "os"
8 | "path"
9 |
10 | "github.com/codingconcepts/dg/internal/pkg/generator"
11 | "github.com/codingconcepts/dg/internal/pkg/model"
12 | )
13 |
14 | // LoadCSVSource loads a CSV file from disk and adds it as a table to files.
15 | func LoadCSVSource(table, configDir string, s model.SourceCSV, files map[string]model.CSVFile) (err error) {
16 | fullPath := path.Join(configDir, s.FileName)
17 | file, err := os.Open(fullPath)
18 | if err != nil {
19 | return fmt.Errorf("opening csv file: %w", err)
20 | }
21 | defer func() {
22 | if ferr := file.Close(); ferr != nil {
23 | err = ferr
24 | }
25 | }()
26 |
27 | return processCSVSource(file, table, files)
28 | }
29 |
30 | func processCSVSource(file io.Reader, table string, files map[string]model.CSVFile) error {
31 | reader := csv.NewReader(file)
32 | rows, err := reader.ReadAll()
33 | if err != nil {
34 | return fmt.Errorf("reading csv file: %w", err)
35 | }
36 |
37 | headers := rows[0]
38 | columns := generator.Transpose(rows[1:])
39 |
40 | for i, column := range columns {
41 | generator.AddInput(table, headers[i], column, files)
42 | }
43 |
44 | return nil
45 | }
46 |
--------------------------------------------------------------------------------
/internal/pkg/source/csv_source_test.go:
--------------------------------------------------------------------------------
1 | package source
2 |
3 | import (
4 | "os"
5 | "path"
6 | "testing"
7 |
8 | "github.com/codingconcepts/dg/internal/pkg/model"
9 | "github.com/stretchr/testify/assert"
10 | )
11 |
12 | func TestLoadCSVSource(t *testing.T) {
13 | filePath := path.Join(t.TempDir(), "load_test.csv")
14 | assert.NoError(t, os.WriteFile(filePath, []byte("col_a,col_b,col_c\nA,B,C\n1,2,3"), os.ModePerm))
15 |
16 | table := "input"
17 | files := make(map[string]model.CSVFile)
18 | s := model.SourceCSV{FileName: "load_test.csv"}
19 |
20 | assert.NoError(t, LoadCSVSource(table, path.Dir(filePath), s, files))
21 |
22 | expCSVFile := model.CSVFile{
23 | Name: "input",
24 | Header: []string{"col_a", "col_b", "col_c"},
25 | Lines: [][]string{
26 | {"A", "1"},
27 | {"B", "2"},
28 | {"C", "3"}},
29 | Output: false}
30 |
31 | assert.Equal(t, expCSVFile, files["input"])
32 | }
33 |
--------------------------------------------------------------------------------
/internal/pkg/ui/time_tracker.go:
--------------------------------------------------------------------------------
1 | package ui
2 |
3 | import (
4 | "fmt"
5 | "io"
6 | "strings"
7 | "time"
8 | )
9 |
10 | // Clock allows time.Now to be mocked in tests.
11 | type Clock interface {
12 | Since(time.Time) time.Duration
13 | }
14 |
15 | // TimerFunc is a function whose execution can be deferred in order to
16 | // time an event.
17 | type TimerFunc func(time.Time, string)
18 |
19 | // TimeTracker returns a new TimerFunc, given a maxMessageLen, which
20 | // determines at which point messages should start to get truncated.
21 | func TimeTracker(w io.Writer, c Clock, maxMessageLen int) TimerFunc {
22 | return func(start time.Time, msg string) {
23 | elapsed := c.Since(start)
24 | switch {
25 | case elapsed > time.Second:
26 | elapsed = elapsed.Round(time.Second)
27 | case elapsed > time.Millisecond:
28 | elapsed = elapsed.Round(time.Millisecond)
29 | default:
30 | elapsed = elapsed.Round(time.Microsecond)
31 | }
32 |
33 | if len(msg) > maxMessageLen {
34 | msg = msg[:maxMessageLen-3] + "..."
35 | }
36 |
37 | padding := strings.Repeat(" ", maxMessageLen-len(msg))
38 | fmt.Fprintf(w, "%s %stook: %s\n", msg, padding, elapsed)
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/internal/pkg/ui/time_tracker_test.go:
--------------------------------------------------------------------------------
1 | package ui
2 |
3 | import (
4 | "bytes"
5 | "testing"
6 | "time"
7 |
8 | "github.com/stretchr/testify/assert"
9 | )
10 |
11 | type mockClock struct {
12 | elapsed time.Duration
13 | }
14 |
15 | func (c mockClock) Since(time.Time) time.Duration {
16 | return c.elapsed
17 | }
18 |
19 | func TestTimerFunc(t *testing.T) {
20 | cases := []struct {
21 | name string
22 | maxMsgLen int
23 | msg string
24 | elapsed time.Duration
25 | exp string
26 | }{
27 | {
28 | name: "microsecond scale",
29 | maxMsgLen: 1,
30 | msg: "a",
31 | elapsed: time.Nanosecond * 123500,
32 | exp: "a took: 124µs\n",
33 | },
34 | {
35 | name: "millisecond scale",
36 | maxMsgLen: 1,
37 | msg: "a",
38 | elapsed: time.Microsecond * 123500,
39 | exp: "a took: 124ms\n",
40 | },
41 | {
42 | name: "second scale",
43 | maxMsgLen: 1,
44 | msg: "a",
45 | elapsed: time.Millisecond * 123500,
46 | exp: "a took: 2m4s\n",
47 | },
48 | {
49 | name: "minute scale",
50 | maxMsgLen: 1,
51 | msg: "a",
52 | elapsed: time.Second * 123500,
53 | exp: "a took: 34h18m20s\n",
54 | },
55 | {
56 | name: "message same as truncate size",
57 | maxMsgLen: 10,
58 | msg: "aaaaaaaaaa",
59 | elapsed: 0,
60 | exp: "aaaaaaaaaa took: 0s\n",
61 | },
62 | {
63 | name: "message over truncate size",
64 | maxMsgLen: 10,
65 | msg: "aaaaaaaaaaa",
66 | elapsed: 0,
67 | exp: "aaaaaaa... took: 0s\n",
68 | },
69 | }
70 |
71 | for _, c := range cases {
72 | t.Run(c.name, func(t *testing.T) {
73 | clock := mockClock{
74 | elapsed: c.elapsed,
75 | }
76 |
77 | buf := new(bytes.Buffer)
78 | tt := TimeTracker(buf, clock, c.maxMsgLen)
79 |
80 | tt(time.Now(), c.msg)
81 |
82 | assert.Equal(t, c.exp, buf.String())
83 | })
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/internal/pkg/web/file_server.go:
--------------------------------------------------------------------------------
1 | package web
2 |
3 | import (
4 | "fmt"
5 | "log"
6 | "net/http"
7 | )
8 |
9 | // Serve files from the output csv directory on a given port.
10 | //
11 | // Note: This is a blocking call.
12 | func Serve(dir string, port int) error {
13 | fs := http.FileServer(http.Dir(dir))
14 | http.Handle("/", fs)
15 |
16 | addr := fmt.Sprintf(":%d", port)
17 |
18 | log.Printf("Serving files on %s", addr)
19 | return http.ListenAndServe(addr, nil)
20 | }
21 |
--------------------------------------------------------------------------------