├── db_build ├── __init__.py ├── sqlite_utils_plugin.py └── cli.py ├── tests ├── examples │ └── test.csv └── test_db_build.py ├── .github └── workflows │ ├── test.yml │ └── publish.yml ├── pyproject.toml └── README.md /db_build/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /db_build/sqlite_utils_plugin.py: -------------------------------------------------------------------------------- 1 | import sqlite_utils 2 | 3 | 4 | @sqlite_utils.hookimpl 5 | def register_commands(cli): 6 | from .cli import cli as db_build_cli 7 | 8 | cli.add_command(db_build_cli, name="build") 9 | -------------------------------------------------------------------------------- /tests/examples/test.csv: -------------------------------------------------------------------------------- 1 | county,precinct,office,district,party,candidate,votes 2 | Yolo,100001,President,,LIB,Gary Johnson,41 3 | Yolo,100001,President,,PAF,Gloria Estela La Riva,8 4 | Yolo,100001,Proposition 51,,,No,398 5 | Yolo,100001,Proposition 51,,,Yes,460 6 | Yolo,100001,State Assembly,7,DEM,Kevin McCarty,572 7 | Yolo,100001,State Assembly,7,REP,Ryan K. Brown,291 8 | 9 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | permissions: 6 | contents: read 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | cache: pip 21 | cache-dependency-path: setup.py 22 | - name: Install dependencies 23 | run: | 24 | pip install -e '.[test,compile]' 25 | - name: Run tests 26 | run: | 27 | pytest 28 | 29 | -------------------------------------------------------------------------------- /tests/test_db_build.py: -------------------------------------------------------------------------------- 1 | from click.testing import CliRunner 2 | from db_build.cli import cli 3 | import pathlib 4 | import sqlite_utils 5 | 6 | examples = pathlib.Path(__file__).parent / "examples" 7 | 8 | 9 | def test_basic_csv(tmpdir): 10 | runner = CliRunner() 11 | db_path = str(tmpdir / "data.db") 12 | result = runner.invoke(cli, [str(db_path), str(examples / "test.csv")]) 13 | db = sqlite_utils.Database(db_path) 14 | assert db.schema == ( 15 | "CREATE TABLE [test] (\n" 16 | " [county] TEXT,\n" 17 | " [precinct] TEXT,\n" 18 | " [office] TEXT,\n" 19 | " [district] TEXT,\n" 20 | " [party] TEXT,\n" 21 | " [candidate] TEXT,\n" 22 | " [votes] TEXT\n" 23 | ");" 24 | ) 25 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "db-build" 3 | version = "0.1" 4 | description = "Tools for building SQLite databases from files and directories" 5 | readme = "README.md" 6 | authors = [{name = "Simon Willison"}] 7 | license = {text = "Apache-2.0"} 8 | classifiers = [ 9 | "License :: OSI Approved :: Apache Software License" 10 | ] 11 | dependencies = [ 12 | "click", 13 | "sqlite-utils", 14 | ] 15 | 16 | [project.scripts] 17 | db-build = "db_build.cli:cli" 18 | 19 | [project.urls] 20 | Homepage = "https://github.com/simonw/db-buld" 21 | Changelog = "https://github.com/simonw/db-buld/releases" 22 | Issues = "https://github.com/simonw/db-buld/issues" 23 | CI = "https://github.com/simonw/db-buld/actions" 24 | 25 | [project.entry-points.sqlite_utils] 26 | db_build = "db_build.sqlite_utils_plugin" 27 | 28 | [project.optional-dependencies] 29 | test = ["pytest"] 30 | -------------------------------------------------------------------------------- /db_build/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | import pathlib 3 | import sqlite_utils 4 | from sqlite_utils.utils import TypeTracker, rows_from_file 5 | 6 | 7 | @click.command() 8 | @click.argument( 9 | "database", 10 | type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 11 | required=True, 12 | ) 13 | @click.argument( 14 | "paths", 15 | type=click.Path(exists=True, file_okay=True, dir_okay=True, allow_dash=True), 16 | nargs=-1, 17 | ) 18 | def cli(database, paths): 19 | "Build a SQLite database from files and directories" 20 | db = sqlite_utils.Database(database) 21 | paths = [pathlib.Path(path) for path in paths] 22 | for path in paths: 23 | if path.suffix == ".csv": 24 | with path.open("rb") as fp: 25 | rows, _ = rows_from_file(fp) 26 | tracker = TypeTracker() 27 | db[path.stem].insert_all(tracker.wrap(rows)) 28 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | test: 12 | runs-on: ubuntu-latest 13 | strategy: 14 | matrix: 15 | python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v4 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | cache: pip 23 | cache-dependency-path: setup.py 24 | - name: Install dependencies 25 | run: | 26 | pip install -e '.[test,compile]' 27 | - name: Run tests 28 | run: | 29 | pytest 30 | deploy: 31 | runs-on: ubuntu-latest 32 | needs: [test] 33 | steps: 34 | - uses: actions/checkout@v3 35 | - name: Set up Python 36 | uses: actions/setup-python@v4 37 | with: 38 | python-version: "3.11" 39 | cache: pip 40 | cache-dependency-path: setup.py 41 | - name: Install dependencies 42 | run: | 43 | pip install setuptools wheel twine build 44 | - name: Publish 45 | env: 46 | TWINE_USERNAME: __token__ 47 | TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} 48 | run: | 49 | python -m build 50 | twine upload dist/* 51 | 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # db-build 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/db-build.svg)](https://pypi.org/project/db-build/) 4 | [![Changelog](https://img.shields.io/github/v/release/simonw/db-build?include_prereleases&label=changelog)](https://github.com/simonw/db-build/releases) 5 | [![Tests](https://github.com/simonw/db-build/workflows/Test/badge.svg)](https://github.com/simonw/db-build/actions?query=workflow%3ATest) 6 | [![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://github.com/simonw/db-build/blob/main/LICENSE) 7 | 8 | Tools for building SQLite databases from files and directories 9 | 10 | ## Installation 11 | 12 | Install using `pip` or `pipx`: 13 | ```bash 14 | pip install db-build 15 | ``` 16 | If you have `sqlite-utils` installed as well, this will act as a plugin and add a `sqlite-utils build` command. 17 | 18 | ## Usage 19 | 20 | `db-build` can build databases from a number of different flat file formats. 21 | 22 | It is always called with a SQLite database as the first argument, which can be a file that does not exist yet. 23 | 24 | Any subsequent arguments will be treated as files or directories that should be loaded into that database. 25 | 26 | A simple initial example, adding all CSV files in the current directory: 27 | 28 | ```bash 29 | db-build data.db *.csv 30 | ``` 31 | 32 | ## Development 33 | 34 | To set up this plugin locally, first checkout the code. Then create a new virtual environment: 35 | ```bash 36 | cd db-build 37 | python3 -m venv venv 38 | source venv/bin/activate 39 | ``` 40 | Now install the dependencies and test dependencies: 41 | ```bash 42 | pip install -e '.[test]' 43 | ``` 44 | To run the tests: 45 | ```bash 46 | pytest 47 | ``` 48 | --------------------------------------------------------------------------------