├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README.md ├── datasette_build ├── __init__.py ├── cli.py ├── core.py ├── datasette_plugin.py ├── default_formats.py ├── hookspecs.py └── sqlite_utils_plugin.py ├── pyproject.toml └── tests ├── demo ├── cities.csv ├── countries.tsv └── museums.json └── test_build.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.8", "3.9", "3.10", "3.11"] 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python ${{ matrix.python-version }} 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: ${{ matrix.python-version }} 19 | cache: pip 20 | cache-dependency-path: '**/pyproject.toml' 21 | - name: Install dependencies 22 | run: | 23 | pip install -e '.[test]' 24 | - name: Run tests 25 | run: | 26 | pytest 27 | deploy: 28 | runs-on: ubuntu-latest 29 | environment: release 30 | permissions: 31 | id-token: write 32 | needs: [test] 33 | steps: 34 | - uses: actions/checkout@v2 35 | - name: Set up Python 36 | uses: actions/setup-python@v5 37 | with: 38 | python-version: '3.11' 39 | cache: pip 40 | cache-dependency-path: '**/pyproject.toml' 41 | - name: Install dependencies 42 | run: | 43 | pip install setuptools wheel build 44 | - name: Build 45 | run: | 46 | python -m build 47 | - name: Publish 48 | uses: pypa/gh-action-pypi-publish@release/v1 49 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | python-version: ["3.7", "3.8", "3.9", "3.10"] 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Set up Python ${{ matrix.python-version }} 16 | if: hashFiles('setup.py') 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | cache-dependency-path: '**/setup.py' 22 | - name: Install dependencies 23 | if: hashFiles('setup.py') 24 | run: | 25 | pip install -e '.[test]' 26 | - name: Run tests 27 | if: hashFiles('setup.py') 28 | run: | 29 | pytest 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | venv 6 | .eggs 7 | .pytest_cache 8 | *.egg-info 9 | .DS_Store 10 | .vscode 11 | dist 12 | build 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # datasette-build 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/datasette-build.svg)](https://pypi.org/project/datasette-build/) 4 | [![Changelog](https://img.shields.io/github/v/release/datasette/datasette-build?include_prereleases&label=changelog)](https://github.com/datasette/datasette-build/releases) 5 | [![Tests](https://github.com/datasette/datasette-build/actions/workflows/test.yml/badge.svg)](https://github.com/datasette/datasette-build/actions/workflows/test.yml) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/datasette/datasette-build/blob/main/LICENSE) 7 | 8 | Build a directory full of files into a SQLite database 9 | 10 | > ⚠️ **Early alpha preview**. Everything about this tool is likely to change. 11 | 12 | ## Installation 13 | 14 | Install this tool using `pip` or `pipx`: 15 | ```bash 16 | pipx install datasette-build 17 | ``` 18 | This will provide the `datasette-build` CLI application. 19 | 20 | You can also install it as a Datasette plugin. First [install Datasette](https://docs.datasette.io/en/stable/installation.html), then run: 21 | ```bash 22 | datasette install datasette-build 23 | ``` 24 | This will provide a `datasette build ...` command that works the same as the `datasette-build` CLI application. 25 | 26 | Or you can install it as a plugin for [sqlite-utils](https://sqlite-utils.datasette.io/). With that installed, run this: 27 | ```bash 28 | sqlite-utils install datasette-build 29 | ``` 30 | Now you can access the tool as `sqlite-utils build ...` 31 | 32 | ## Usage 33 | 34 | The `datasette-build` (or `datasette build` or `sqlite-utils build`) command takes two arguments: a path to a SQLite database file and a path to a directory containing files to be loaded into that database: 35 | 36 | ```bash 37 | datasette-build mydatabase.db myfiles/ 38 | ``` 39 | The `myfiles/` folder can contain a mixture of CSV, TSV and JSON files. Each file will be loaded into a table in the `mydatabase.db` SQLite database. 40 | 41 | The database file will be created if it does not already exist. 42 | 43 | Consider a `myfiles/cities.csv` file like this: 44 | ```csv 45 | id,name,latitude,longitude,country 46 | nyc,New York City,40.7128,-74.006,US 47 | lon,London,51.5074,-0.1278,GB 48 | tok,Tokyo,35.6895,139.6917,JP 49 | par,Paris,48.8566,2.3522,FR 50 | ber,Berlin,52.52,13.405,DE 51 | syd,Sydney,-33.8688,151.2093,AU 52 | cai,Cairo,30.0444,31.2357,EG 53 | rio,Rio de Janeiro,-22.9068,-43.1729,BR 54 | mos,Moscow,55.7558,37.6173,RU 55 | mum,Mumbai,19.076,72.8777,IN 56 | ``` 57 | Since this has a `id` column the primary key for the table will be set to `id`. Without an `id` column the primary key will not be defined. 58 | 59 | A `myfiles/counties.tsv` file could look like this: 60 | ```tsv 61 | id name population 62 | US United States 331002651 63 | GB United Kingdom 67886011 64 | JP Japan 126476461 65 | FR France 65273511 66 | DE Germany 83783942 67 | AU Australia 25499884 68 | EG Egypt 102334404 69 | BR Brazil 212559417 70 | RU Russia 145934462 71 | IN India 1380004385 72 | ``` 73 | And a `myfiles/museums.json` file like this: 74 | ```json 75 | [ 76 | { 77 | "id": 1, 78 | "name": "Metropolitan Museum of Art", 79 | "city_id": "nyc" 80 | }, 81 | { 82 | "id": 2, 83 | "name": "British Museum", 84 | "city_id": "lon" 85 | } 86 | ] 87 | ``` 88 | Running `datasette-build mydatabase.db myfiles/` will create a SQLite database file containing three tables: `cities`, `counties` and `museums`. The schema will look like this: 89 | 90 | ```sql 91 | CREATE TABLE [museums] ( 92 | [id] INTEGER PRIMARY KEY, 93 | [name] TEXT, 94 | [city_id] TEXT 95 | ); 96 | CREATE TABLE "cities" ( 97 | [id] TEXT PRIMARY KEY, 98 | [name] TEXT, 99 | [latitude] FLOAT, 100 | [longitude] FLOAT, 101 | [country] TEXT 102 | ); 103 | CREATE TABLE "countries" ( 104 | [id] TEXT PRIMARY KEY, 105 | [name] TEXT, 106 | [population] INTEGER 107 | ); 108 | ``` 109 | 110 | ## Development 111 | 112 | To set up this plugin locally, first checkout the code. Then create a new virtual environment: 113 | ```bash 114 | cd datasette-build 115 | python3 -m venv venv 116 | source venv/bin/activate 117 | ``` 118 | Now install the dependencies and test dependencies: 119 | ```bash 120 | pip install -e '.[test]' 121 | ``` 122 | To run the tests: 123 | ```bash 124 | pytest 125 | ``` 126 | -------------------------------------------------------------------------------- /datasette_build/__init__.py: -------------------------------------------------------------------------------- 1 | from .hookspecs import hookimpl 2 | -------------------------------------------------------------------------------- /datasette_build/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | from datasette_build.core import get_formats 3 | import itertools 4 | import pathlib 5 | import sqlite_utils 6 | from sqlite_utils.utils import TypeTracker 7 | 8 | 9 | @click.command() 10 | @click.argument( 11 | "db_path", 12 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 13 | ) 14 | @click.argument( 15 | "directory_path", 16 | type=click.Path(file_okay=False, dir_okay=True, allow_dash=False), 17 | ) 18 | def cli(db_path, directory_path): 19 | """Build a directory full of files into a SQLite database""" 20 | formats = {} 21 | for format, ext in get_formats(): 22 | formats[ext] = format 23 | 24 | db = sqlite_utils.Database(db_path) 25 | 26 | # Iterate through every file and directory in directory_path\ 27 | path = pathlib.Path(directory_path) 28 | for child in path.iterdir(): 29 | if child.is_file() and not child.stem.startswith("."): 30 | # It's a file, act based on its extension 31 | extension = child.suffix.lstrip(".") 32 | if extension in formats: 33 | rows = formats[extension].parse_file(child.open()) 34 | first_row = next(rows) 35 | columns = list(first_row.keys()) 36 | rows = itertools.chain([first_row], rows) 37 | pk = None 38 | if "id" in columns: 39 | pk = "id" 40 | table_name = child.stem 41 | detect_types = getattr(formats[extension], "detect_types", False) 42 | tracker = None 43 | if detect_types: 44 | tracker = TypeTracker() 45 | rows = tracker.wrap(rows) 46 | db[table_name].insert_all(rows, pk=pk, replace=True) 47 | if tracker is not None: 48 | db[table_name].transform(types=tracker.types) 49 | row_count = db[table_name].count 50 | click.echo( 51 | "{} row{} in {}".format( 52 | row_count, "" if row_count == 1 else "s", table_name 53 | ), 54 | err=True, 55 | ) 56 | elif child.is_dir(): 57 | click.echo(" is_dir: " + str(child), err=True) 58 | 59 | 60 | if __name__ == "__main__": 61 | cli() 62 | -------------------------------------------------------------------------------- /datasette_build/core.py: -------------------------------------------------------------------------------- 1 | import pluggy 2 | import importlib 3 | from . import hookspecs 4 | 5 | pm = pluggy.PluginManager("dsbuild") 6 | pm.add_hookspecs(hookspecs) 7 | pm.load_setuptools_entrypoints("dsbuild") 8 | 9 | mod = importlib.import_module("datasette_build.default_formats") 10 | pm.register(mod) 11 | 12 | 13 | def get_formats(): 14 | formats = [] 15 | 16 | def register(format, extension=None): 17 | formats.append((format, extension)) 18 | 19 | pm.hook.register_formats(register=register) 20 | return formats 21 | -------------------------------------------------------------------------------- /datasette_build/datasette_plugin.py: -------------------------------------------------------------------------------- 1 | from datasette import hookimpl 2 | 3 | 4 | @hookimpl 5 | def register_commands(cli): 6 | from .cli import cli as build_cli 7 | 8 | cli.add_command(build_cli, name="build") 9 | -------------------------------------------------------------------------------- /datasette_build/default_formats.py: -------------------------------------------------------------------------------- 1 | from datasette_build import hookimpl 2 | import csv 3 | import json 4 | 5 | 6 | @hookimpl 7 | def register_formats(register): 8 | register(CsvFormat(), extension="csv") 9 | register(CsvFormat(dialect=csv.excel_tab), extension="tsv") 10 | register(JsonFormat(), extension="json") 11 | 12 | 13 | class CsvFormat: 14 | detect_types = True 15 | 16 | def __init__(self, dialect=csv.excel): 17 | self.dialect = dialect 18 | 19 | def parse_file(self, fp): 20 | yield from csv.DictReader(fp, dialect=self.dialect) 21 | 22 | 23 | class JsonFormat: 24 | def parse_file(self, fp): 25 | yield from json.load(fp) 26 | -------------------------------------------------------------------------------- /datasette_build/hookspecs.py: -------------------------------------------------------------------------------- 1 | from pluggy import HookimplMarker 2 | from pluggy import HookspecMarker 3 | 4 | hookspec = HookspecMarker("dsbuild") 5 | hookimpl = HookimplMarker("dsbuild") 6 | 7 | 8 | @hookspec 9 | def register_formats(register): 10 | "Register classes to handle different file formats" 11 | -------------------------------------------------------------------------------- /datasette_build/sqlite_utils_plugin.py: -------------------------------------------------------------------------------- 1 | from sqlite_utils import hookimpl 2 | 3 | 4 | @hookimpl 5 | def register_commands(cli): 6 | from .cli import cli as build_cli 7 | 8 | cli.add_command(build_cli, name="build") 9 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "datasette-build" 3 | version = "0.1a0" 4 | description = "Build a directory full of files into a SQLite database" 5 | readme = "README.md" 6 | authors = [{name = "Datasette"}] 7 | license = {text = "Apache-2.0"} 8 | classifiers=[ 9 | "Framework :: Datasette", 10 | "License :: OSI Approved :: Apache Software License" 11 | ] 12 | requires-python = ">=3.8" 13 | dependencies = [ 14 | "click", 15 | "sqlite-utils", 16 | "PyYAML", 17 | "pluggy" 18 | ] 19 | 20 | [project.urls] 21 | Homepage = "https://github.com/datasette/datasette-build" 22 | Changelog = "https://github.com/datasette/datasette-build/releases" 23 | Issues = "https://github.com/datasette/datasette-build/issues" 24 | CI = "https://github.com/datasette/datasette-build/actions" 25 | 26 | [project.scripts] 27 | datasette-build = "datasette_build.cli:cli" 28 | 29 | [project.entry-points.datasette] 30 | build = "datasette_build.datasette_plugin" 31 | 32 | [project.entry-points.sqlite_utils] 33 | build = "datasette_build.sqlite_utils_plugin" 34 | 35 | [project.optional-dependencies] 36 | test = ["pytest", "pytest-asyncio"] 37 | datasette = ["datasette"] 38 | 39 | [tool.pytest.ini_options] 40 | asyncio_mode = "strict" 41 | -------------------------------------------------------------------------------- /tests/demo/cities.csv: -------------------------------------------------------------------------------- 1 | id,name,latitude,longitude,country 2 | nyc,New York City,40.7128,-74.006,US 3 | lon,London,51.5074,-0.1278,GB 4 | tok,Tokyo,35.6895,139.6917,JP 5 | par,Paris,48.8566,2.3522,FR 6 | ber,Berlin,52.52,13.405,DE 7 | syd,Sydney,-33.8688,151.2093,AU 8 | cai,Cairo,30.0444,31.2357,EG 9 | rio,Rio de Janeiro,-22.9068,-43.1729,BR 10 | mos,Moscow,55.7558,37.6173,RU 11 | mum,Mumbai,19.076,72.8777,IN 12 | -------------------------------------------------------------------------------- /tests/demo/countries.tsv: -------------------------------------------------------------------------------- 1 | id name population 2 | US United States 331002651 3 | GB United Kingdom 67886011 4 | JP Japan 126476461 5 | FR France 65273511 6 | DE Germany 83783942 7 | AU Australia 25499884 8 | EG Egypt 102334404 9 | BR Brazil 212559417 10 | RU Russia 145934462 11 | IN India 1380004385 12 | -------------------------------------------------------------------------------- /tests/demo/museums.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": 1, 4 | "name": "Metropolitan Museum of Art", 5 | "city_id": "nyc" 6 | }, 7 | { 8 | "id": 2, 9 | "name": "British Museum", 10 | "city_id": "lon" 11 | }, 12 | { 13 | "id": 3, 14 | "name": "Tokyo National Museum", 15 | "city_id": "tok" 16 | }, 17 | { 18 | "id": 4, 19 | "name": "Louvre Museum", 20 | "city_id": "par" 21 | }, 22 | { 23 | "id": 5, 24 | "name": "Pergamon Museum", 25 | "city_id": "ber" 26 | }, 27 | { 28 | "id": 6, 29 | "name": "Art Gallery of New South Wales", 30 | "city_id": "syd" 31 | }, 32 | { 33 | "id": 7, 34 | "name": "Egyptian Museum", 35 | "city_id": "cai" 36 | }, 37 | { 38 | "id": 8, 39 | "name": "Museu Nacional", 40 | "city_id": "rio" 41 | }, 42 | { 43 | "id": 9, 44 | "name": "State Hermitage Museum", 45 | "city_id": "mos" 46 | }, 47 | { 48 | "id": 10, 49 | "name": "Chhatrapati Shivaji Maharaj Vastu Sangrahalaya", 50 | "city_id": "mum" 51 | } 52 | ] -------------------------------------------------------------------------------- /tests/test_build.py: -------------------------------------------------------------------------------- 1 | from datasette_build.cli import cli 2 | from click.testing import CliRunner 3 | import pathlib 4 | import pytest 5 | import sqlite_utils 6 | 7 | DEMO_DIR = pathlib.Path(__file__).parent / "demo" 8 | 9 | 10 | @pytest.fixture(scope="session") 11 | def demo_db(tmp_path_factory): 12 | tmpdir = tmp_path_factory.mktemp("built") 13 | db_path = str(tmpdir / "demo.db") 14 | runner = CliRunner(mix_stderr=False) 15 | result = runner.invoke( 16 | cli, [str(tmpdir / "demo.db"), str(DEMO_DIR)], catch_exceptions=False 17 | ) 18 | assert result.exit_code == 0 19 | expected_lines = [ 20 | "10 rows in museums", 21 | "10 rows in cities", 22 | "10 rows in countries", 23 | ] 24 | for line in expected_lines: 25 | assert line in result.stderr 26 | return sqlite_utils.Database(db_path) 27 | 28 | 29 | def test_demo_db_tables(demo_db): 30 | assert set(demo_db.table_names()) == {"cities", "countries", "museums"} 31 | 32 | 33 | def test_demo_db_tsv(demo_db): 34 | assert demo_db["countries"].count == 10 35 | assert demo_db["countries"].columns_dict == { 36 | "id": str, 37 | "name": str, 38 | "population": int, 39 | } 40 | # Spot check 41 | assert next(demo_db["countries"].rows_where("id = 'US'")) == { 42 | "id": "US", 43 | "name": "United States", 44 | "population": 331002651, 45 | } 46 | assert next(demo_db["countries"].rows_where("id = 'EG'")) == { 47 | "id": "EG", 48 | "name": "Egypt", 49 | "population": 102334404, 50 | } 51 | 52 | 53 | def test_demo_db_csv(demo_db): 54 | cities = demo_db["cities"] 55 | assert cities.count == 10 56 | assert cities.columns_dict == { 57 | "id": str, 58 | "name": str, 59 | "latitude": float, 60 | "longitude": float, 61 | "country": str, 62 | } 63 | # Spot check 64 | assert next(cities.rows_where("id = 'nyc'")) == { 65 | "id": "nyc", 66 | "name": "New York City", 67 | "latitude": 40.7128, 68 | "longitude": -74.006, 69 | "country": "US", 70 | } 71 | assert next(cities.rows_where("id = 'syd'")) == { 72 | "id": "syd", 73 | "name": "Sydney", 74 | "latitude": -33.8688, 75 | "longitude": 151.2093, 76 | "country": "AU", 77 | } 78 | 79 | 80 | def test_demo_db_json(demo_db): 81 | museums = demo_db["museums"] 82 | assert museums.count == 10 83 | assert museums.columns_dict == { 84 | "id": int, 85 | "name": str, 86 | "city_id": str, 87 | } 88 | # Spot check 89 | assert next(museums.rows_where("id = 1")) == { 90 | "id": 1, 91 | "name": "Metropolitan Museum of Art", 92 | "city_id": "nyc", 93 | } 94 | assert next(museums.rows_where("id = 10")) == { 95 | "id": 10, 96 | "name": "Chhatrapati Shivaji Maharaj Vastu Sangrahalaya", 97 | "city_id": "mum", 98 | } 99 | --------------------------------------------------------------------------------