├── .github └── workflows │ ├── demo.yml │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── setup.py ├── sqlite_generate ├── __init__.py ├── cli.py └── utils.py └── tests └── test_sqlite_generate.py /.github/workflows/demo.yml: -------------------------------------------------------------------------------- 1 | name: Deploy demo 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check out repo 14 | uses: actions/checkout@v2 15 | - name: Set up Python 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: 3.8 19 | - uses: actions/cache@v2 20 | name: Configure pip caching 21 | with: 22 | path: ~/.cache/pip 23 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 24 | restore-keys: | 25 | ${{ runner.os }}-pip- 26 | - name: Install Python dependencies 27 | run: | 28 | pip install -e . 29 | pip install datasette datasette-publish-vercel 30 | - name: Build database 31 | run: sqlite-generate demo.db --seed seed --fts --columns=10 --fks=0,3 --pks=0,2 32 | - name: Deploy Datasette using Vercel 33 | env: 34 | VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} 35 | run: |- 36 | datasette publish vercel demo.db \ 37 | --token $VERCEL_TOKEN \ 38 | --project sqlite-generate \ 39 | --title "sqlite-generate demo" \ 40 | --about_url "https://github.com/simonw/sqlite-generate" \ 41 | --about "simonw/sqlite-generate" \ 42 | --install datasette-block-robots \ 43 | --install datasette-search-all \ 44 | --install datasette-graphql 45 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: '3.8' 16 | - uses: actions/cache@v2 17 | name: Configure pip caching 18 | with: 19 | path: ~/.cache/pip 20 | key: ${{ runner.os }}-publish-pip-${{ hashFiles('**/setup.py') }} 21 | restore-keys: | 22 | ${{ runner.os }}-publish-pip- 23 | - name: Install dependencies 24 | run: | 25 | pip install -e '.[test]' 26 | pip install setuptools wheel twine 27 | - name: Run tests 28 | run: pytest 29 | - name: Publish 30 | if: success() 31 | env: 32 | TWINE_USERNAME: __token__ 33 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 34 | run: | 35 | python setup.py sdist bdist_wheel 36 | twine upload dist/* 37 | 38 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | test: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: [3.6, 3.7, 3.8] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - uses: actions/cache@v2 18 | name: Configure pip caching 19 | with: 20 | path: ~/.cache/pip 21 | key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }} 22 | restore-keys: | 23 | ${{ runner.os }}-pip- 24 | - name: Install dependencies 25 | run: | 26 | pip install -e '.[test]' 27 | - name: Run tests 28 | run: | 29 | pytest 30 | 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | *.db 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | venv 7 | .eggs 8 | .pytest_cache 9 | *.egg-info 10 | .DS_Store 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sqlite-generate 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/sqlite-generate.svg)](https://pypi.org/project/sqlite-generate/) 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/sqlite-generate?label=changelog)](https://github.com/simonw/sqlite-generate/releases) 5 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/sqlite-generate/blob/master/LICENSE) 6 | 7 | Tool for generating demo SQLite databases 8 | 9 | ## Installation 10 | 11 | Install this plugin using `pip`: 12 | 13 | $ pip install sqlite-generate 14 | 15 | ## Demo 16 | 17 | You can see a demo of the database generated using this command running in [Datasette](https://github.com/simonw/datasette) at https://sqlite-generate-demo.datasette.io/ 18 | 19 | The demo is generated using the following command: 20 | 21 | sqlite-generate demo.db --seed seed --fts --columns=10 --fks=0,3 --pks=0,2 22 | 23 | ## Usage 24 | 25 | To generate a SQLite database file called `data.db` with 10 randomly named tables in it, run the following: 26 | 27 | sqlite-generate data.db 28 | 29 | You can use the `--tables` option to generate a different number of tables: 30 | 31 | sqlite-generate data.db --tables 20 32 | 33 | You can run the command against the same database file multiple times to keep adding new tables, using different settings for each batch of generated tables. 34 | 35 | By default each table will contain a random number of rows between 0 and 200. You can customize this with the `--rows` option: 36 | 37 | sqlite-generate data.db --rows 20 38 | 39 | This will insert 20 rows into each table. 40 | 41 | sqlite-generate data.db --rows 500,2000 42 | 43 | This inserts a random number of rows between 500 and 2000 into each table. 44 | 45 | Each table will have 5 columns. You can change this using `--columns`: 46 | 47 | sqlite-generate data.db --columns 10 48 | 49 | `--columns` can also accept a range: 50 | 51 | sqlite-generate data.db --columns 5,15 52 | 53 | You can control the random number seed used with the `--seed` option. This will result in the exact same database file being created by multiple runs of the tool: 54 | 55 | sqlite-generate data.db --seed=myseed 56 | 57 | By default each table will contain between 0 and 2 foreign key columns to other tables. You can control this using the `--fks` option, with either a single number or a range: 58 | 59 | sqlite-generate data.db --columns=20 --fks=5,15 60 | 61 | Each table will have a single primary key column called `id`. You can use the `--pks=` option to change the number of primary key columns on each table. Drop it to 0 to generate [rowid tables](https://www.sqlite.org/rowidtable.html). Increase it above 1 to generate tables with compound primary keys. Or use a range to get a random selection of different primary key layouts: 62 | 63 | sqlite-generate data.db --pks=0,2 64 | 65 | To configure [SQLite full-text search](https://www.sqlite.org/fts5.html) for all columns of type text, use `--fts`: 66 | 67 | sqlite-generate data.db --fts 68 | 69 | This will use FTS5 by default. To use [FTS4](https://www.sqlite.org/fts3.html) instead, use `--fts4`. 70 | 71 | ## Development 72 | 73 | To contribute to this tool, first checkout the code. Then create a new virtual environment: 74 | 75 | cd sqlite-generate 76 | python -mvenv venv 77 | source venv/bin/activate 78 | 79 | Or if you are using `pipenv`: 80 | 81 | pipenv shell 82 | 83 | Now install the dependencies and tests: 84 | 85 | pip install -e '.[test]' 86 | 87 | To run the tests: 88 | 89 | pytest 90 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import os 3 | 4 | VERSION = "1.1.1" 5 | 6 | 7 | def get_long_description(): 8 | with open( 9 | os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md"), 10 | encoding="utf8", 11 | ) as fp: 12 | return fp.read() 13 | 14 | 15 | setup( 16 | name="sqlite-generate", 17 | description="Tool for generating demo SQLite databases", 18 | long_description=get_long_description(), 19 | long_description_content_type="text/markdown", 20 | author="Simon Willison", 21 | url="https://github.com/simonw/sqlite-generate", 22 | project_urls={ 23 | "Issues": "https://github.com/simonw/sqlite-generate/issues", 24 | "CI": "https://github.com/simonw/sqlite-generate/actions", 25 | "Changelog": "https://github.com/simonw/sqlite-generate/releases", 26 | }, 27 | license="Apache License, Version 2.0", 28 | version=VERSION, 29 | packages=["sqlite_generate"], 30 | entry_points=""" 31 | [console_scripts] 32 | sqlite-generate=sqlite_generate.cli:cli 33 | """, 34 | install_requires=["click", "Faker", "sqlite-utils"], 35 | extras_require={"test": ["pytest"]}, 36 | tests_require=["sqlite-generate[test]"], 37 | ) 38 | -------------------------------------------------------------------------------- /sqlite_generate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simonw/sqlite-generate/84267539f3874d7aee7f868827268f479b480423/sqlite_generate/__init__.py -------------------------------------------------------------------------------- /sqlite_generate/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | from faker import Faker 3 | import sqlite_utils 4 | from .utils import record_builder 5 | 6 | 7 | class IntRange(click.ParamType): 8 | name = "intrange" 9 | 10 | def convert(self, value, param, ctx): 11 | if not ( 12 | value.isdigit() 13 | or ( 14 | value.count(",") == 1 and all(bit.isdigit() for bit in value.split(",")) 15 | ) 16 | ): 17 | self.fail( 18 | "Use --{param}=low,high or --{param}=exact", 19 | format(param=param), 20 | param, 21 | ctx, 22 | ) 23 | if value.isdigit(): 24 | value_low = value_high = int(value) 25 | else: 26 | value_low, value_high = map(int, value.split(",")) 27 | return value_low, value_high 28 | 29 | 30 | int_range = IntRange() 31 | 32 | 33 | @click.command() 34 | @click.argument("db_path") 35 | @click.option("-t", "--tables", help="Number of tables to create", default=10) 36 | @click.option( 37 | "-r", 38 | "--rows", 39 | help="Number of rows to create per table", 40 | default="0,200", 41 | type=int_range, 42 | ) 43 | @click.option( 44 | "-c", 45 | "--columns", 46 | help="Number of columns to create per table", 47 | default="4", 48 | type=int_range, 49 | ) 50 | @click.option( 51 | "--pks", 52 | help="Number of primary key. columns per table", 53 | default="1", 54 | type=int_range, 55 | ) 56 | @click.option( 57 | "--fks", help="Number of foreign keys per table", default="0,2", type=int_range 58 | ) 59 | @click.option( 60 | "--fts", help="Configure full-text search (FTS5) against text columns", is_flag=True 61 | ) 62 | @click.option( 63 | "--fts4", 64 | help="Configure full-text search (FTS4) against text columns", 65 | is_flag=True, 66 | ) 67 | @click.option("--seed", help="Specify as seed for the random generator") 68 | @click.version_option() 69 | def cli(db_path, tables, rows, columns, pks, fks, fts, fts4, seed): 70 | "Tool for generating demo SQLite databases" 71 | db = sqlite_utils.Database(db_path) 72 | existing_tables = set(db.table_names()) 73 | fake = Faker() 74 | if seed: 75 | fake.seed_instance(seed) 76 | rows_low, rows_high = rows 77 | columns_low, columns_high = columns 78 | pks_low, pks_high = pks 79 | fks_low, fks_high = fks 80 | if fks_high > columns_high: 81 | fks_high = columns_high 82 | # Make a plan first, so we can update a progress bar 83 | plan = [fake.random.randint(rows_low, rows_high) for i in range(tables)] 84 | total_to_do = sum(plan) 85 | created_tables = [] 86 | with click.progressbar( 87 | length=total_to_do, show_pos=True, show_percent=True, label="Generating rows" 88 | ) as bar: 89 | for num_rows in plan: 90 | table_name = None 91 | while table_name is None or db[table_name].exists(): 92 | table_name = "_".join(fake.words()) 93 | column_defs, pks, generate = record_builder( 94 | fake, 95 | num_columns=fake.random.randint(columns_low, columns_high), 96 | num_pks=fake.random.randint(pks_low, pks_high), 97 | num_fks=fake.random.randint(fks_low, fks_high), 98 | ) 99 | with db.conn: 100 | db[table_name].create(column_defs, pk=pks[0] if len(pks) == 1 else pks) 101 | 102 | def yield_em(): 103 | for j in range(num_rows): 104 | yield generate() 105 | bar.update(1) 106 | 107 | db[table_name].insert_all(yield_em()) 108 | created_tables.append(table_name) 109 | 110 | # Last step: populate those foreign keys 111 | if fks_high: 112 | # Find all (table, column) pairs that end in _id 113 | fk_columns = [] 114 | for table_name in created_tables: 115 | table = db[table_name] 116 | for column in table.columns_dict: 117 | if column.endswith("_id"): 118 | fk_columns.append((table_name, column)) 119 | # Possible target tables are any table without a compound fk 120 | possible_target_tables = [ 121 | name for name in db.table_names() if len(db[name].pks) == 1 122 | ] 123 | if possible_target_tables: 124 | table_pks_cache = {} 125 | with click.progressbar( 126 | fk_columns, 127 | show_pos=True, 128 | show_percent=True, 129 | label="Populating foreign keys", 130 | ) as bar: 131 | for table_name, column in bar: 132 | other_table = fake.random.choice(possible_target_tables) 133 | other_pk = db[other_table].pks[0] 134 | db[table_name].add_foreign_key(column, other_table, other_pk) 135 | if other_table not in table_pks_cache: 136 | table_pks_cache[other_table] = [ 137 | r[0] 138 | for r in db.conn.execute( 139 | "select {} from [{}]".format(other_pk, other_table) 140 | ).fetchall() 141 | ] 142 | pks = db[table_name].pks 143 | with db.conn: 144 | # Loop through all primary keys 145 | row_pks = db.conn.execute( 146 | "select {} from {}".format(", ".join(pks), table_name) 147 | ).fetchall() 148 | for row_pk in row_pks: 149 | options = table_pks_cache[other_table] 150 | row_id = row_pk[0] if len(row_pk) == 1 else tuple(row_pk) 151 | db[table_name].update( 152 | row_id, 153 | { 154 | column: fake.random.choice(options) 155 | if options 156 | else None 157 | }, 158 | ) 159 | if fts or fts4: 160 | # Configure full-text search 161 | with click.progressbar( 162 | db.tables, 163 | show_pos=True, 164 | show_percent=True, 165 | label="Configuring FTS{}".format("4" if fts4 else ""), 166 | ) as bar: 167 | for table in bar: 168 | text_columns = [ 169 | key for key, value in table.columns_dict.items() if value is str 170 | ] 171 | table.enable_fts(text_columns, fts_version="FTS4" if fts4 else "FTS5") 172 | -------------------------------------------------------------------------------- /sqlite_generate/utils.py: -------------------------------------------------------------------------------- 1 | def record_builder(fake, num_columns=1, num_pks=1, num_fks=0): 2 | "Returns ({column:defs}, pks-tuple, generate-fn)" 3 | assert num_columns >= 1 4 | 5 | # Primary keys come first 6 | if num_pks == 1: 7 | columns = ["id"] 8 | else: 9 | columns = ["id_{}".format(i + 1) for i in range(num_pks)] 10 | column_defs = {column: int for column in columns} 11 | column_types = [None] * num_pks 12 | pks = columns[:] 13 | 14 | # If we have a second column, it's always name 15 | if num_columns > num_pks: 16 | columns.append("name") 17 | column_defs["name"] = str 18 | column_types.append((str, fake.name)) 19 | 20 | potential_column_types = ( 21 | (str, fake.name), 22 | (str, fake.address), 23 | (str, fake.email), 24 | (int, fake.unix_time), 25 | (str, fake.sha1), 26 | (str, fake.url), 27 | (str, fake.zipcode), 28 | (str, fake.text), 29 | (str, lambda: str(fake.date_this_century())), 30 | (float, fake.pyfloat), 31 | (int, fake.pyint), 32 | ) 33 | random_column_types = [] 34 | if num_columns > (num_pks + 1): 35 | random_column_types = fake.random.choices( 36 | potential_column_types, k=num_columns - 2 37 | ) 38 | # If we are generating foreign keys, randomly add those now 39 | column_is_fk = [False] * len(random_column_types) 40 | if num_fks: 41 | random_idxs = fake.random.sample(range(len(column_is_fk)), k=num_fks) 42 | for idx in random_idxs: 43 | column_is_fk[idx] = True 44 | random_column_types[idx] = (int, lambda: None) 45 | 46 | column_types.extend(random_column_types) 47 | random_column_names = [ 48 | fake.word() + ("_id" if column_is_fk[i] else "") 49 | for i, _ in enumerate(random_column_types) 50 | ] 51 | columns.extend(random_column_names) 52 | column_defs.update( 53 | { 54 | name: pair[0] 55 | for name, pair in zip(random_column_names, random_column_types) 56 | } 57 | ) 58 | 59 | pk_counters = {pk: 1 for pk in pks} 60 | 61 | def generate(): 62 | d = {} 63 | if pks: 64 | for pk in pks: 65 | d[pk] = pk_counters[pk] 66 | # Increment pk 67 | idx_to_increment = fake.random.choice(range(len(pks))) 68 | pk_counters[pks[idx_to_increment]] += 1 69 | # Reset any counters after the one we incremented 70 | for idx in range(idx_to_increment + 1, len(pks)): 71 | pk_counters[pks[idx]] = 1 72 | 73 | d.update( 74 | { 75 | name: pair[1]() 76 | for name, pair in zip(columns[len(pks) :], column_types[len(pks) :]) 77 | } 78 | ) 79 | return d 80 | 81 | return column_defs, pks, generate 82 | -------------------------------------------------------------------------------- /tests/test_sqlite_generate.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | import pytest 3 | from sqlite_generate.cli import cli 4 | import sqlite_utils 5 | 6 | 7 | def test_generate(): 8 | runner = CliRunner() 9 | with runner.isolated_filesystem(): 10 | result = runner.invoke(cli, ["data.db"], catch_exceptions=False) 11 | assert 0 == result.exit_code 12 | db = sqlite_utils.Database("data.db") 13 | assert 10 == len(db.table_names()) 14 | 15 | 16 | @pytest.mark.parametrize( 17 | "rows,low,high", [("--rows=20", 20, 20), ("--rows=5,500", 5, 500)] 18 | ) 19 | def test_rows(rows, low, high): 20 | runner = CliRunner() 21 | with runner.isolated_filesystem(): 22 | result = runner.invoke(cli, ["data.db", rows], catch_exceptions=False) 23 | assert 0 == result.exit_code, result.output 24 | db = sqlite_utils.Database("data.db") 25 | assert 10 == len(db.table_names()) 26 | for table in db.tables: 27 | assert low <= table.count <= high 28 | 29 | 30 | @pytest.mark.parametrize( 31 | "columns,low,high", [("--columns=10", 10, 10), ("--columns=5,50", 5, 50)] 32 | ) 33 | def test_columns(columns, low, high): 34 | runner = CliRunner() 35 | with runner.isolated_filesystem(): 36 | result = runner.invoke( 37 | cli, 38 | ["data.db", "--rows=1", columns, "--seed=seed"], 39 | catch_exceptions=False, 40 | ) 41 | assert 0 == result.exit_code, result.output 42 | db = sqlite_utils.Database("data.db") 43 | for table in db.tables: 44 | assert low <= len(table.columns) <= high 45 | 46 | 47 | @pytest.mark.parametrize("pks,low,high", [("--pks=2", 2, 2), ("--pks=1,3", 1, 3)]) 48 | def test_pks(pks, low, high): 49 | runner = CliRunner() 50 | with runner.isolated_filesystem(): 51 | result = runner.invoke( 52 | cli, 53 | ["data.db", "--rows=10", "--columns=10", "--fks=0", pks, "--seed=seed"], 54 | catch_exceptions=False, 55 | ) 56 | assert 0 == result.exit_code, result.output 57 | db = sqlite_utils.Database("data.db") 58 | for table in db.tables: 59 | assert low <= len(table.pks) <= high 60 | 61 | 62 | def test_pks_0(): 63 | runner = CliRunner() 64 | with runner.isolated_filesystem(): 65 | result = runner.invoke( 66 | cli, 67 | [ 68 | "data.db", 69 | "--rows=10", 70 | "--columns=10", 71 | "--fks=0", 72 | "--pks=0", 73 | "--seed=seed", 74 | ], 75 | catch_exceptions=False, 76 | ) 77 | assert 0 == result.exit_code, result.output 78 | db = sqlite_utils.Database("data.db") 79 | for table in db.tables: 80 | assert ["rowid"] == table.pks 81 | 82 | 83 | def test_seed(): 84 | runner = CliRunner() 85 | with runner.isolated_filesystem(): 86 | runner.invoke( 87 | cli, 88 | ["one.db", "--tables=1", "--rows=1", "--columns=2", "--seed=dog"], 89 | catch_exceptions=False, 90 | ) 91 | runner.invoke( 92 | cli, 93 | ["two.db", "--tables=1", "--rows=1", "--columns=2", "--seed=dog"], 94 | catch_exceptions=False, 95 | ) 96 | # Files should be identical 97 | assert open("one.db", "rb").read() == open("two.db", "rb").read() 98 | # With a different seed, files should differ: 99 | runner.invoke( 100 | cli, 101 | ["three.db", "--tables=1", "--rows=1", "--columns=2", "--seed=cat"], 102 | catch_exceptions=False, 103 | ) 104 | assert open("two.db", "rb").read() != open("three.db", "rb").read() 105 | 106 | 107 | def test_fks(): 108 | runner = CliRunner() 109 | with runner.isolated_filesystem(): 110 | result = runner.invoke( 111 | cli, 112 | [ 113 | "data.db", 114 | "--tables=2", 115 | "--rows=1", 116 | "--columns=5", 117 | "--fks=2", 118 | "--seed=seed", 119 | ], 120 | catch_exceptions=False, 121 | ) 122 | assert 0 == result.exit_code, result.output 123 | db = sqlite_utils.Database("data.db") 124 | # All tables should have columns ending in _id AND foreign keys 125 | for table in db.tables: 126 | assert table.foreign_keys 127 | fk_cols = [c for c in table.columns_dict if c.endswith("_id")] 128 | assert len(fk_cols) == 2 129 | 130 | 131 | def test_fks_multiple_runs(): 132 | runner = CliRunner() 133 | with runner.isolated_filesystem(): 134 | for i in range(2): 135 | result = runner.invoke( 136 | cli, 137 | [ 138 | "data.db", 139 | "--tables=2", 140 | "--rows=1", 141 | "--columns=5", 142 | "--fks=2", 143 | "--seed=seed{}".format(i), 144 | ], 145 | catch_exceptions=False, 146 | ) 147 | assert 0 == result.exit_code, result.output 148 | db = sqlite_utils.Database("data.db") 149 | # All tables should have columns ending in _id AND foreign keys 150 | for table in db.tables: 151 | assert table.foreign_keys 152 | fk_cols = [c for c in table.columns_dict if c.endswith("_id")] 153 | assert len(fk_cols) == 2 154 | 155 | 156 | def test_fks_against_empty_table(): 157 | runner = CliRunner() 158 | with runner.isolated_filesystem(): 159 | runner.invoke( 160 | cli, 161 | [ 162 | "data.db", 163 | "--tables=1", 164 | "--rows=0", 165 | "--columns=5", 166 | "--fks=0", 167 | "--seed=seed", 168 | ], 169 | catch_exceptions=False, 170 | ) 171 | # Run it again, with fks (this used to break) 172 | result = runner.invoke( 173 | cli, 174 | [ 175 | "data.db", 176 | "--tables=2", 177 | "--rows=1", 178 | "--columns=5", 179 | "--fks=2", 180 | "--seed=seed2", 181 | ], 182 | catch_exceptions=False, 183 | ) 184 | assert 0 == result.exit_code 185 | 186 | 187 | def test_fks_against_rowid(): 188 | runner = CliRunner() 189 | with runner.isolated_filesystem(): 190 | result = runner.invoke( 191 | cli, 192 | [ 193 | "data.db", 194 | "--tables=10", 195 | "--rows=10", 196 | "--columns=5", 197 | "--pks=0", 198 | "--fks=1", 199 | "--seed=seed", 200 | ], 201 | catch_exceptions=False, 202 | ) 203 | assert 0 == result.exit_code 204 | db = sqlite_utils.Database("data.db") 205 | assert 10 == len(db.tables) 206 | for table in db.tables: 207 | assert 1 == len(table.foreign_keys) 208 | 209 | 210 | def test_fks_against_compound_primary_keys(): 211 | # fks should only reference single key tables, not compound ones 212 | runner = CliRunner() 213 | with runner.isolated_filesystem(): 214 | result = runner.invoke( 215 | cli, 216 | [ 217 | "data.db", 218 | "--tables=2", 219 | "--rows=10", 220 | "--columns=5", 221 | "--pks=2", 222 | "--fks=0", 223 | "--seed=seed", 224 | ], 225 | catch_exceptions=False, 226 | ) 227 | assert 0 == result.exit_code 228 | db = sqlite_utils.Database("data.db") 229 | assert 2 == len(db.tables) 230 | # Every table should have two primary keys: 231 | for table in db.tables: 232 | assert 2 == len(table.pks) 233 | # Now try to use --fks and it should fail to add them silently: 234 | result = runner.invoke( 235 | cli, 236 | [ 237 | "data.db", 238 | "--tables=1", 239 | "--rows=10", 240 | "--columns=5", 241 | "--pks=2", 242 | "--fks=1", 243 | "--seed=seed", 244 | ], 245 | catch_exceptions=False, 246 | ) 247 | assert 0 == result.exit_code 248 | assert 3 == len(db.tables) 249 | # There should be no foreign keys still: 250 | for table in db.tables: 251 | assert not table.foreign_keys 252 | # Add two regular tables, with a single primary key 253 | result = runner.invoke( 254 | cli, 255 | ["data.db", "--tables=2", "--rows=10", "--seed=seed", "--fks=0",], 256 | catch_exceptions=False, 257 | ) 258 | assert 0 == result.exit_code 259 | assert 5 == len(db.tables) 260 | # Running this again SHOULD add foreign keys, because we have pk=1 tables now 261 | result = runner.invoke( 262 | cli, 263 | [ 264 | "data.db", 265 | "--tables=2", 266 | "--rows=10", 267 | "--columns=5", 268 | "--fks=1", 269 | "--seed=seed", 270 | ], 271 | catch_exceptions=False, 272 | ) 273 | assert 0 == result.exit_code 274 | assert 7 == len(db.tables) 275 | assert any(t for t in db.tables if t.foreign_keys) 276 | 277 | 278 | def test_fts(): 279 | runner = CliRunner() 280 | with runner.isolated_filesystem(): 281 | result = runner.invoke( 282 | cli, 283 | ["data.db", "--tables=5", "--fts", "--fks=0", "--seed=seed",], 284 | catch_exceptions=False, 285 | ) 286 | assert 0 == result.exit_code, result.output 287 | db = sqlite_utils.Database("data.db") 288 | table_names = db.table_names() 289 | assert 30 == len(table_names) 290 | for suffix in ("_fts", "_fts_config", "_fts_data", "_fts_idx", "_fts_docsize"): 291 | assert any(t for t in table_names if t.endswith(suffix)), suffix 292 | 293 | 294 | def test_fts4(): 295 | runner = CliRunner() 296 | with runner.isolated_filesystem(): 297 | result = runner.invoke( 298 | cli, 299 | ["data.db", "--tables=5", "--fts4", "--fks=0", "--seed=seed",], 300 | catch_exceptions=False, 301 | ) 302 | assert 0 == result.exit_code, result.output 303 | db = sqlite_utils.Database("data.db") 304 | table_names = db.table_names() 305 | assert 30 == len(table_names) 306 | for suffix in ( 307 | "_fts", 308 | "_fts_segments", 309 | "_fts_segdir", 310 | "_fts_docsize", 311 | "_fts_stat", 312 | ): 313 | assert any(t for t in table_names if t.endswith(suffix)) 314 | --------------------------------------------------------------------------------