├── binder
    ├── runtime.txt
    ├── postBuild
    └── requirements.txt
├── dbcooper
    ├── data
    │   ├── __init__.py
    │   └── lahman.py
    ├── __init__.py
    ├── base.py
    ├── tests
    │   ├── conftest.py
    │   ├── test_example_schemas.py
    │   └── helpers.py
    ├── utils.py
    ├── collect.py
    ├── tables.py
    ├── dbcooper.py
    ├── finder.py
    └── inspect.py
├── docs
    ├── .gitignore
    ├── api
    │   ├── index.rst
    │   └── example_class.rst
    ├── index.rst
    └── conf.py
├── MANIFEST.in
├── .env.dev
├── .pre-commit-config.yaml
├── docker-compose.yml
├── Makefile
├── LICENSE
├── examples
    ├── backends.Rmd
    └── lahman.Rmd
├── pyproject.toml
├── requirements
    ├── 2022-01-01.txt
    └── dev.txt
├── .gitignore
├── .github
    └── workflows
    │   └── ci.yml
├── README.Rmd
└── README.md


/binder/runtime.txt:
--------------------------------------------------------------------------------
1 | python-3.9
2 | 


--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
1 | set -e
2 | 
3 | pip install -e .
4 | 


--------------------------------------------------------------------------------
/dbcooper/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .lahman import lahman_sqlite
2 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | # autosummary generated doc pages
2 | api/api_card
3 | 


--------------------------------------------------------------------------------
/docs/api/index.rst:
--------------------------------------------------------------------------------
1 | API
2 | =============
3 | 
4 | .. toctree::
5 |    :maxdepth: 2
6 | 
7 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | exclude .*
2 | prune .*
3 | prune requirements
4 | prune binder
5 | prune docs
6 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to dbcooper's documentation!
 2 | ================================
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 |    :caption: Contents:
 7 | 
 8 |    api/index
 9 | 
10 | 
11 | 
12 | Indices and tables
13 | ==================
14 | 
15 | * :ref:`genindex`
16 | * :ref:`modindex`
17 | * :ref:`search`
18 | 


--------------------------------------------------------------------------------
/docs/api/example_class.rst:
--------------------------------------------------------------------------------
 1 | Example Class
 2 | =============
 3 | 
 4 | .. currentmodule:: dbcooper
 5 | 
 6 | Constructor
 7 | -----------
 8 | 
 9 | .. autosummary::
10 |    :toctree: api_card
11 | 
12 |    ExampleClass
13 | 
14 | Methods
15 | -------
16 | 
17 | .. autosummary::
18 |    :toctree: api_card
19 | 
20 |    ExampleClass.show
21 | 


--------------------------------------------------------------------------------
/.env.dev:
--------------------------------------------------------------------------------
 1 | SB_TEST_PGDATABASE=postgres
 2 | SB_TEST_PGPORT=5432
 3 | SB_TEST_PGUSER=postgres
 4 | SB_TEST_PGPASSWORD=""
 5 | SB_Test_PGHOST=localhost
 6 | 
 7 | SB_TEST_BQ_DATABASE=ci
 8 | SB_TEST_BQPROJECT=siuba-tests
 9 | 
10 | SB_TEST_SNOWFLAKEDATABASE=DATASETS
11 | SB_TEST_SNOWFLAKEUSER="FILL_ME_IN"
12 | SB_TEST_SNOWFLAKEPASSWORD="FILL_ME_IN"
13 | SB_TEST_SNOWFLAKEHOST="FILL_ME_IN"
14 | SB_TEST_SNOWFLAKEOPTIONS="warehouse=COMPUTE_WH&role=CI_USER"
15 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: ".*\\.csv"
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v2.4.0
 5 |     hooks:
 6 |       - id: flake8
 7 |         # line too long and line before binary operator (black is ok with these)
 8 |         types:
 9 |           - python
10 |       - id: trailing-whitespace
11 |       - id: end-of-file-fixer
12 |       - id: check-yaml
13 |         args: ["--unsafe"]
14 |       - id: check-added-large-files
15 |   - repo: https://github.com/psf/black
16 |     rev: 19.10b0
17 |     hooks:
18 |       - id: black
19 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.1'
 2 | 
 3 | services:
 4 | 
 5 |   db_mysql:
 6 |     image: mysql
 7 |     restart: always
 8 |     environment:
 9 |       MYSQL_ROOT_PASSWORD: ""
10 |       MYSQL_ALLOW_EMPTY_PASSWORD: 1
11 |       MYSQL_DATABASE: "public"
12 |     ports:
13 |       - 3307:3306
14 |     # by default, mysql rounds to 4 decimals, but tests require more precision
15 |     command: --div-precision-increment=30
16 | 
17 |   db:
18 |     image: postgres
19 |     restart: always
20 |     environment:
21 |       POSTGRES_PASSWORD: ""
22 |       POSTGRES_HOST_AUTH_METHOD: "trust"
23 |     ports:
24 |         - 5433:5432
25 | 


--------------------------------------------------------------------------------
/dbcooper/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib.metadata import version as _v
 2 | 
 3 | # Set version -----------------------------------------------------------------
 4 | 
 5 | __version__ = _v("dbcooper")
 6 | 
 7 | del _v
 8 | 
 9 | # Main imports ----------------------------------------------------------------
10 | 
11 | from .dbcooper import DbCooper    # noqa
12 | from .finder import TableFinder, AccessorBuilder, AccessorHierarchyBuilder
13 | from .tables import DbcDocumentedTable, DbcSimpleTable
14 | 
15 | __all__ = (
16 |     "DbCooper",
17 |     "TableFinder",
18 |     "AccessorBuilder",
19 |     "AccessorHierarchyBuilder",
20 |     "DbcDocumentedTable",
21 |     "DbcSimpleTable",
22 | )


--------------------------------------------------------------------------------
/dbcooper/base.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, astuple
 2 | 
 3 | @dataclass(frozen=True)
 4 | class TableName:
 5 |     database: "str | None"
 6 |     schema: "str | None"
 7 |     table: "str"
 8 | 
 9 |     def to_tuple(self, exists=False):
10 |         tup = astuple(self)
11 | 
12 |         if exists:
13 |             return tuple(x for x in tup if x is not None)
14 | 
15 |         return tup
16 | 
17 |     def field_index_from_end(self, part):
18 |         # could derive from dataclasses.fields, but probably not worth it.
19 |         if part == "database":
20 |             return -3
21 |         elif part == "schema":
22 |             return -2
23 |         elif part == "table":
24 |             return -1
25 | 
26 |     def apply_maybe(self, f):
27 |         tup = self.to_tuple()
28 |         return self.__class__(*[f(x) if x is not None else x for x in tup])
29 | 
30 | @dataclass(frozen=True)
31 | class TableIdentity:
32 |     schema: "str | quoted_name | None"
33 |     table: "str | quoted_name"
34 | 
35 | 


--------------------------------------------------------------------------------
/dbcooper/data/lahman.py:
--------------------------------------------------------------------------------
 1 | descriptions = {
 2 |     "Salaries": {
 3 |         "schema": "Player salaries, going back to 1985.",
 4 |         "columns": {
 5 |             "yearID": "Year.",
 6 |             "teamID": "Team ID.",
 7 |             "lgID": "League ID.",
 8 |             "playerID": 'Player ID. See e.g. the "People" table for player info.',
 9 |             "Salary": "Salary (in dollars).",
10 |         }
11 |     }
12 | }
13 | 
14 | def lahman_sqlite(engine=None, schema="lahman"):
15 |     from sqlalchemy import create_engine
16 |     if engine is None:
17 |         NotImplementedError()
18 |         #engine = create_engine("sqlite://")
19 | 
20 |     engine.execute("ATTACH ':memory:' AS %s" % schema)
21 |     load_tables_for_engine(engine, schema=schema)
22 |     #return engine
23 | 
24 | def load_tables_for_engine(engine, exclude=[], **kwargs):
25 |     import lahman
26 |     for name in lahman._accessors:
27 |         if name in exclude: continue
28 |         df = getattr(lahman, name)()
29 |         df.to_sql(name, engine, **kwargs)
30 | 
31 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | SPHINX_BUILDARGS=
 2 | 
 3 | .PHONY: requirements test
 4 | 
 5 | dev-start:
 6 | 	docker-compose up -d
 7 | 
 8 | dev-stop:
 9 | 	docker-compose down
10 | 
11 | test:
12 | 	pytest
13 | 
14 | requirements/dev.txt: setup.cfg
15 | 	@# allows you to do this...
16 | 	@# make requirements | tee > requirements/some_file.txt
17 | 	@pip-compile setup.cfg --rebuild --extra dev --output-file=- > $@
18 | 
19 | binder/requirements.txt: setup.cfg
20 | 	@pip-compile setup.cfg --rebuild --extra binder --output-file=- > $@
21 | 
22 | docs-build:
23 | 	cd docs && sphinx-build . ./_build/html $(SPHINX_BUILDARGS)
24 | 
25 | docs-watch:
26 | 	cd docs && sphinx-autobuild . ./_build/html $(SPHINX_BUILDARGS)
27 | 
28 | README.md: README.Rmd
29 | 	jupytext --from Rmd --to ipynb --output - $^ \
30 | 		| jupyter nbconvert \
31 | 			--stdin --to markdown \
32 | 			--execute \
33 | 			--ExecutePreprocessor.kernel_name='venv-dbcooper-py' \
34 | 			--TagRemovePreprocessor.remove_all_outputs_tags='hide-cell' \
35 | 			--TagRemovePreprocessor.remove_input_tags='hide-cell' \
36 | 			--output $@
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Michael Chow
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dbcooper/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from siuba.tests.helpers import SqlBackend, BigqueryBackend, CloudBackend
 4 | from dbcooper.tests.helpers import create_examples
 5 | 
 6 | params_backend = [
 7 |     pytest.param(lambda: SqlBackend("postgresql"), id = "postgresql", marks=pytest.mark.postgresql),
 8 |     pytest.param(lambda: SqlBackend("mysql"), id = "mysql", marks=pytest.mark.mysql),
 9 |     pytest.param(lambda: SqlBackend("sqlite"), id = "sqlite", marks=pytest.mark.sqlite),
10 |     pytest.param(lambda: SqlBackend("duckdb"), id = "duckdb", marks=pytest.mark.duckdb),
11 |     pytest.param(lambda: BigqueryBackend("bigquery"), id = "bigquery", marks=pytest.mark.bigquery),
12 |     pytest.param(lambda: CloudBackend("snowflake"), id = "snowflake", marks=pytest.mark.snowflake),
13 |     ]
14 | 
15 | @pytest.fixture(params=params_backend, scope = "session")
16 | def backend(request):
17 |     backend = request.param()
18 |     if backend.name in ["snowflake", "bigquery"]:
19 |         # We can't easily set up and teardown new databases for cloud providers
20 |         # so really on creating the data outside of tests
21 |         pass
22 |     else:
23 |         create_examples(backend.engine)
24 | 
25 |     return backend
26 | 


--------------------------------------------------------------------------------
/examples/backends.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | jupyter:
 3 |   jupytext:
 4 |     formats: ipynb,Rmd
 5 |     text_representation:
 6 |       extension: .Rmd
 7 |       format_name: rmarkdown
 8 |       format_version: '1.2'
 9 |       jupytext_version: 1.13.7
10 |   kernelspec:
11 |     display_name: venv-dbcooper-py
12 |     language: python
13 |     name: venv-dbcooper-py
14 | ---
15 | 
16 | ```{python}
17 | from dotenv import load_dotenv
18 | from dbcooper.utils import SingleGeneric
19 | from sqlalchemy.schema import CreateSchema
20 | 
21 | load_dotenv()
22 | 
23 | from dbcooper.tests.helpers import create_examples
24 | ```
25 | 
26 | ```{python}
27 | from siuba.tests.helpers import CloudBackend, SqlBackend, BigqueryBackend
28 | 
29 | be_snow = CloudBackend("snowflake")
30 | be_sqlite = SqlBackend("sqlite")
31 | be_pg = SqlBackend("postgresql")
32 | be_mysql = SqlBackend("mysql")
33 | be_bq = BigqueryBackend("bigquery")
34 | ```
35 | 
36 | ```{python}
37 | #create_examples(be_snow.engine)
38 | ```
39 | 
40 | ```{python}
41 | from dbcooper.autotables import AutoTable
42 | from dbcooper.builder import TableFinder
43 | 
44 | #find_from_schema = TableFinder(exclude=(format_from_part="table")
45 | 
46 | tbl = AutoTable(be_snow.engine)#, find_from_schema)
47 | tbl._init()
48 | ```
49 | 
50 | ```{python}
51 | tbl.list()
52 | ```
53 | 


--------------------------------------------------------------------------------
/dbcooper/utils.py:
--------------------------------------------------------------------------------
 1 | class SingleGeneric:
 2 |     def __init__(self, name, dispatch_on_attr = "name"):
 3 |         self.name = name
 4 |         self.registry = {}
 5 |         self.dispatch_on_attr = dispatch_on_attr
 6 |         self.default = None
 7 | 
 8 |     def __call__(self, dialect, *args, **kwargs):
 9 |         f_concrete = self.dispatch(dialect)
10 | 
11 |         return f_concrete(dialect, *args, **kwargs)
12 | 
13 |     def trait(self, obj):
14 |         return getattr(obj, self.dispatch_on_attr)
15 | 
16 |     def dispatch(self, obj):
17 |         type_str = self.trait(obj)
18 | 
19 |         try:
20 |             f_concrete = self.registry[type_str]
21 |         except KeyError:
22 |             if self.default is not None:
23 |                 f_concrete = self.default
24 |             else:
25 |                 raise NotImplementedError(f"Cannot dispatch on {type_str} and no default implementation.")
26 | 
27 |         return f_concrete
28 | 
29 |     def register(self, type_str, func=None):
30 |         # allow it to function as a decorator
31 |         if func is None:
32 |             return lambda f: self.register(type_str, f)
33 | 
34 |         self.registry[type_str] = func
35 | 
36 |         return func
37 | 
38 |     def register_default(self, func):
39 |         self.default = func
40 | 
41 |     def __repr__(self):
42 |         return f"{type(self)}({self.name})"
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/binder/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with python 3.8
 3 | # To update, run:
 4 | #
 5 | #    pip-compile --extra=binder --output-file=- setup.cfg
 6 | #
 7 | attrs==21.4.0
 8 |     # via jsonschema
 9 | fastjsonschema==2.15.3
10 |     # via nbformat
11 | importlib-resources==5.7.1
12 |     # via
13 |     #   jsonschema
14 |     #   lahman
15 | jsonschema==4.6.0
16 |     # via nbformat
17 | jupyter-core==4.10.0
18 |     # via nbformat
19 | jupytext==1.13.8
20 |     # via dbcooper (setup.cfg)
21 | lahman==0.0.1
22 |     # via dbcooper (setup.cfg)
23 | markdown-it-py==2.1.0
24 |     # via
25 |     #   jupytext
26 |     #   mdit-py-plugins
27 | mdit-py-plugins==0.3.0
28 |     # via jupytext
29 | mdurl==0.1.1
30 |     # via markdown-it-py
31 | nbformat==5.4.0
32 |     # via jupytext
33 | numpy==1.22.4
34 |     # via
35 |     #   pandas
36 |     #   siuba
37 | pandas==1.4.2
38 |     # via
39 |     #   lahman
40 |     #   siuba
41 | pyrsistent==0.18.1
42 |     # via jsonschema
43 | python-dateutil==2.8.2
44 |     # via pandas
45 | pytz==2022.1
46 |     # via pandas
47 | pyyaml==6.0
48 |     # via
49 |     #   jupytext
50 |     #   siuba
51 | siuba==0.3.0
52 |     # via dbcooper (setup.cfg)
53 | six==1.16.0
54 |     # via python-dateutil
55 | sqlalchemy==1.4.37
56 |     # via
57 |     #   dbcooper (setup.cfg)
58 |     #   siuba
59 | tabulate==0.8.9
60 |     # via dbcooper (setup.cfg)
61 | toml==0.10.2
62 |     # via jupytext
63 | traitlets==5.2.2.post1
64 |     # via
65 |     #   jupyter-core
66 |     #   nbformat
67 | zipp==3.8.0
68 |     # via importlib-resources
69 | 


--------------------------------------------------------------------------------
/examples/lahman.Rmd:
--------------------------------------------------------------------------------
 1 | ```{python}
 2 | # %load_ext autoreload
 3 | ```
 4 | 
 5 | ```{python}
 6 | from dbcooper import DbCooper
 7 | from sqlalchemy import create_engine
 8 | import lahman
 9 | 
10 | def load_tables_for_engine(engine, exclude=[], **kwargs):
11 |     for name in lahman._accessors:
12 |         if name in exclude: continue
13 |         df = getattr(lahman, name)()
14 |         df.to_sql(name, engine, **kwargs)
15 | ```
16 | 
17 | The example below shows 3 modes:
18 | 
19 | * simple: table names are `<schema>_<table>`.
20 | * formatted: table names are `<table>`.
21 | * grouped: each schema is its own dictionary, accessed using `<schema>.<table>`.
22 | 
23 | 
24 | ## Sqlite (simple)
25 | 
26 | ```{python}
27 | engine = create_engine("sqlite://")
28 | engine.execute("ATTACH ':memory:' AS lahman")
29 | load_tables_for_engine(engine, schema="lahman")
30 | ```
31 | 
32 | ```{python}
33 | tbl_flat = DbCooper(engine)
34 | ```
35 | 
36 | ```{python}
37 | tbl_flat.lahman_allstar_full()
38 | ```
39 | 
40 | ## Formatting table names
41 | 
42 | ```{python}
43 | from dbcooper import AccessorBuilder
44 | 
45 | # omits schema, and keeps only table name
46 | builder = AccessorBuilder(format_from_part="table")
47 | 
48 | tbl_flat2 = DbCooper(engine, accessor_builder=builder)
49 | tbl_flat2.allstar_full()
50 | ```
51 | 
52 | ## Grouping tables by schema
53 | 
54 | ```{python}
55 | from dbcooper import TableFinder
56 | from dbcooper.finder import AccessorHierarchyBuilder
57 | 
58 | tbl_nested = DbCooper(engine, accessor_builder=AccessorHierarchyBuilder())
59 | tbl_nested.lahman.allstar_full()
60 | ```
61 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.setuptools_scm]
 6 | 
 7 | [tool.setuptools.packages.find]
 8 | include = ["dbcooper"]
 9 | 
10 | [project]
11 | name = "dbcooper"
12 | description = "The dbcooper package turns a database connection into a collection of functions, handling logic for keeping track of connections and letting you take advantage of autocompletion when exploring a database."
13 | readme = "README.md"
14 | keywords = ["template", "packaging"]
15 | license.text = "MIT"
16 | authors = [
17 |     { name = "Michael Chow", email = "mc_al_github@fastmail.com" }
18 | ]
19 | dynamic = ["version"]
20 | classifiers = [
21 |     "Programming Language :: Python :: 3.8",
22 |     "Programming Language :: Python :: 3.9",
23 |     "Programming Language :: Python :: 3.10",
24 | ]
25 | dependencies = [
26 |     "sqlalchemy",
27 |     "tabulate",
28 | ]
29 | requires-python = ">=3.10"
30 | 
31 | [project.optional-dependencies]
32 | siuba = [
33 |     "siuba>=0.4.4",
34 | ]
35 | 
36 | [dependency-groups]
37 | dev = [
38 |     "pip-tools",
39 |     "importlib-resources",
40 |     "ipykernel",
41 |     "pydata-sphinx-theme",
42 |     "pytest",
43 |     "pytest-dotenv",
44 |     "sqlalchemy-bigquery",
45 |     "sphinx~=4.4.0",
46 |     "snowflake-sqlalchemy",
47 |     "psycopg2-binary",
48 |     "pymysql",
49 |     "jupytext",
50 |     "numpy<2.0",
51 |     "polars>=1.33.1",
52 |     "duckdb<1.4.0",
53 |     "siuba==0.4.5.dev1",
54 |     "duckdb-engine>=0.17.0",
55 |     "pyarrow>=21.0.0",
56 | ]
57 | 
58 | binder = [
59 |     "jupytext",
60 |     "lahman"
61 | ]
62 | 
63 | [tool.pytest.ini_options]
64 | markers = [
65 |     "ex: a test runs against ExampleClass",
66 |     "ex2: a test runs against ExampleClass2",
67 | ]
68 | testpaths = [
69 |     "dbcooper",
70 | ]
71 | 
72 | [tool.ruff.lint]
73 | max-line-length = 90
74 | ignore = [
75 |     "E501",    # line too long
76 |     "W503",    # line before binary operator
77 | ]
78 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = "dbcooper"
21 | copyright = "2022, Michael Chow"
22 | author = "Michael Chow"
23 | 
24 | 
25 | # -- General configuration ---------------------------------------------------
26 | 
27 | # Add any Sphinx extension module names here, as strings. They can be
28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
29 | # ones.
30 | extensions = [
31 |     "sphinx.ext.autodoc",
32 |     "sphinx.ext.autosummary",
33 | ]
34 | 
35 | # Add any paths that contain templates here, relative to this directory.
36 | templates_path = ["_templates"]
37 | 
38 | # List of patterns, relative to source directory, that match files and
39 | # directories to ignore when looking for source files.
40 | # This pattern also affects html_static_path and html_extra_path.
41 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
42 | 
43 | 
44 | # -- Options for HTML output -------------------------------------------------
45 | 
46 | # The theme to use for HTML and HTML Help pages.  See the documentation for
47 | # a list of builtin themes.
48 | #
49 | html_theme = "pydata_sphinx_theme"
50 | 
51 | # Add any paths that contain custom static files (such as style sheets) here,
52 | # relative to this directory. They are copied after the builtin static files,
53 | # so a file named "default.css" will overwrite the builtin "default.css".
54 | html_static_path = ["_static"]
55 | 


--------------------------------------------------------------------------------
/requirements/2022-01-01.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with python 3.9
 3 | # To update, run:
 4 | #
 5 | #    pip-compile --extra=dev --output-file=- setup.cfg
 6 | #
 7 | alabaster==0.7.12
 8 |     # via sphinx
 9 | attrs==21.4.0
10 |     # via pytest
11 | babel==2.9.1
12 |     # via sphinx
13 | beautifulsoup4==4.10.0
14 |     # via pydata-sphinx-theme
15 | certifi==2021.10.8
16 |     # via requests
17 | charset-normalizer==2.0.12
18 |     # via requests
19 | docutils==0.17.1
20 |     # via
21 |     #   pydata-sphinx-theme
22 |     #   sphinx
23 | idna==3.3
24 |     # via requests
25 | imagesize==1.3.0
26 |     # via sphinx
27 | importlib-metadata==4.11.2
28 |     # via sphinx
29 | importlib-resources==5.4.0
30 |     # via template-python-pkg (setup.cfg)
31 | iniconfig==1.1.1
32 |     # via pytest
33 | jinja2==3.0.3
34 |     # via sphinx
35 | markupsafe==2.1.0
36 |     # via jinja2
37 | packaging==21.3
38 |     # via
39 |     #   pytest
40 |     #   sphinx
41 | pluggy==1.0.0
42 |     # via pytest
43 | py==1.11.0
44 |     # via pytest
45 | pydata-sphinx-theme==0.8.0
46 |     # via template-python-pkg (setup.cfg)
47 | pygments==2.11.2
48 |     # via sphinx
49 | pyparsing==3.0.7
50 |     # via packaging
51 | pytest==7.0.1
52 |     # via
53 |     #   pytest-dotenv
54 |     #   template-python-pkg (setup.cfg)
55 | pytest-dotenv==0.5.2
56 |     # via template-python-pkg (setup.cfg)
57 | python-dotenv==0.19.2
58 |     # via pytest-dotenv
59 | pytz==2021.3
60 |     # via babel
61 | pyyaml==6.0
62 |     # via template-python-pkg (setup.cfg)
63 | requests==2.27.1
64 |     # via sphinx
65 | snowballstemmer==2.2.0
66 |     # via sphinx
67 | soupsieve==2.3.1
68 |     # via beautifulsoup4
69 | sphinx==4.4.0
70 |     # via pydata-sphinx-theme
71 | sphinxcontrib-applehelp==1.0.2
72 |     # via sphinx
73 | sphinxcontrib-devhelp==1.0.2
74 |     # via sphinx
75 | sphinxcontrib-htmlhelp==2.0.0
76 |     # via sphinx
77 | sphinxcontrib-jsmath==1.0.1
78 |     # via sphinx
79 | sphinxcontrib-qthelp==1.0.3
80 |     # via sphinx
81 | sphinxcontrib-serializinghtml==1.1.5
82 |     # via sphinx
83 | tomli==2.0.1
84 |     # via pytest
85 | urllib3==1.26.8
86 |     # via requests
87 | zipp==3.7.0
88 |     # via
89 |     #   importlib-metadata
90 |     #   importlib-resources
91 | 


--------------------------------------------------------------------------------
/dbcooper/tests/test_example_schemas.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from polars import DataFrame as PlDataFrame
 4 | from duckdb import DuckDBPyConnection
 5 | 
 6 | from dbcooper import DbCooper
 7 | from dbcooper.tests.helpers import EXAMPLE_SCHEMAS, EXAMPLE_DATA, assert_frame_sort_equal
 8 | from dbcooper.tables import DbcSimpleTable
 9 | from dbcooper.finder import TableFinder, AccessorBuilder
10 | from dbcooper.collect import to_polars, to_duckdb, name_to_tbl
11 | 
12 | from siuba import collect
13 | 
14 | @pytest.fixture
15 | def tbl(backend):
16 |     if backend.name == "snowflake":
17 |         # snowflake can't do reflection on schemas that aren't uppercase, see
18 |         # see https://github.com/snowflakedb/snowflake-sqlalchemy/issues/276
19 |         tbl = DbCooper(backend.engine, table_factory=DbcSimpleTable)
20 |     elif backend.name == "duckdb":
21 |         # tests currently assume database name isn't used in accessor
22 |         tbl = DbCooper(backend.engine, accessor_builder=AccessorBuilder(format_from_part="schema"))
23 |     else:
24 |         tbl = DbCooper(backend.engine)
25 | 
26 |     tbl._init()
27 |     return tbl
28 | 
29 | 
30 | 
31 | def test_example_number_of_accessors(tbl):
32 |     assert len(tbl._accessors) == len(EXAMPLE_SCHEMAS)
33 | 
34 | 
35 | def test_example_repr_exists(tbl):
36 |     if tbl._engine.name == "snowflake":
37 |         # see https://github.com/snowflakedb/snowflake-sqlalchemy/issues/276
38 |         pytest.xfail()
39 | 
40 |     for (schema, table_name), attr_name in EXAMPLE_SCHEMAS.items():
41 |         table = getattr(tbl, attr_name)
42 |         assert table_name in repr(table)
43 |     
44 | 
45 | def test_example_data_roundtrip_siuba(tbl):
46 |     for (schema, table_name), attr_name in EXAMPLE_SCHEMAS.items():
47 |         table = getattr(tbl, attr_name)
48 |         assert_frame_sort_equal(collect(table()), EXAMPLE_DATA)
49 | 
50 | def test_to_polars(tbl):
51 |     res = to_polars(tbl._engine, name_to_tbl(tbl._engine, "lower", "mai"))
52 |     assert isinstance(res, PlDataFrame)
53 | 
54 | def test_to_duckdb(tbl):
55 |     if tbl._engine.name != "duckdb":
56 |         pytest.skip("to_duckdb only works with duckdb engines")
57 | 
58 |     res = to_duckdb(tbl._engine, name_to_tbl(tbl._engine, "lower", "mai"))
59 |     assert isinstance(res, DuckDBPyConnection)
60 | 
61 | 


--------------------------------------------------------------------------------
/dbcooper/collect.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from sqlalchemy import sql
 4 | from sqlalchemy.engine import Engine
 5 | from sqlalchemy.sql.expression import TextClause
 6 | from typing import TYPE_CHECKING
 7 | 
 8 | 
 9 | if TYPE_CHECKING:
10 |     from siuba.sql import LazyTbl
11 |     from duckdb import DuckDBPyConnection
12 |     from polars import DataFrame as PlDataFrame
13 | 
14 | 
15 | def query_to_tbl(engine: Engine, query: str) -> TextClause:
16 | 
17 |     full_query = f"""
18 |         SELECT * FROM (\n{query}\n) WHERE 1 = 0
19 |     """
20 | 
21 |     with engine.connect() as con:
22 |         q = con.execute(sql.text(full_query))
23 |     
24 |     columns = [sql.column(k) for k in q.keys()]
25 |     text_as_from = sql.text(query).columns(*columns).alias()
26 | 
27 |     return text_as_from
28 | 
29 | 
30 | def name_to_tbl(engine: Engine, table_name: str, schema: str | None=None) -> sql.TableClause:
31 |     # sql dialects like snowflake do not have great reflection capabilities,
32 |     # so we execute a trivial query to discover the column names
33 |     explore_table = sql.table(table_name, schema=schema)
34 |     trivial = explore_table.select(sql.text("0 = 1")).add_columns(sql.text("*"))
35 | 
36 |     with engine.connect() as con:
37 |         q = con.execute(trivial)
38 | 
39 |     columns = [sql.column(k) for k in q.keys()]
40 |     return sql.table(table_name, *columns, schema=schema)
41 | 
42 | 
43 | def to_siuba(engine: Engine, expr: str | TextClause | sql.TableClause) -> LazyTbl:
44 |     from siuba.sql import LazyTbl
45 | 
46 |     expr = query_to_tbl(engine, expr) if isinstance(expr, str) else expr
47 |     
48 |     return LazyTbl(engine, expr)
49 | 
50 | 
51 | def to_polars(engine: Engine, expr: str | TextClause | sql.TableClause) -> PlDataFrame:
52 |     from polars import read_database
53 | 
54 |     expr = query_to_tbl(engine, expr) if isinstance(expr, str) else expr
55 | 
56 |     if isinstance(expr, sql.TableClause):
57 |         expr = expr.select().add_columns()
58 | 
59 |     with engine.connect() as con:
60 |         return read_database(expr, con)
61 | 
62 | 
63 | def to_duckdb(engine: Engine, expr: str | TextClause | sql.TableClause) -> DuckDBPyConnection:
64 |     import duckdb
65 | 
66 |     if engine.name != "duckdb":
67 |         raise ValueError("This function only works with duckdb engines")
68 | 
69 |     expr = query_to_tbl(engine, expr) if isinstance(expr, str) else expr
70 | 
71 |     if isinstance(expr, sql.TableClause):
72 |         expr = expr.select().add_columns()
73 | 
74 |     with engine.connect() as con:
75 |         # assumes we are using duckdb_engine
76 |         # TODO: expr should be compiled?
77 |         return con.connection.execute(str(expr))
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Mac OSX =====================================================================
  2 | .DS_Store
  3 | 
  4 | # Vim =========================================================================
  5 | .*.sw[po]
  6 | 
  7 | # Python ======================================================================
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # Distribution / packaging
 17 | .Python
 18 | build/
 19 | develop-eggs/
 20 | dist/
 21 | downloads/
 22 | eggs/
 23 | .eggs/
 24 | lib/
 25 | lib64/
 26 | parts/
 27 | sdist/
 28 | var/
 29 | wheels/
 30 | pip-wheel-metadata/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | db.sqlite3
 69 | db.sqlite3-journal
 70 | 
 71 | # Flask stuff:
 72 | instance/
 73 | .webassets-cache
 74 | 
 75 | # Scrapy stuff:
 76 | .scrapy
 77 | 
 78 | # Sphinx documentation
 79 | docs/_build/
 80 | 
 81 | # PyBuilder
 82 | target/
 83 | 
 84 | # Jupyter Notebook
 85 | .ipynb_checkpoints
 86 | 
 87 | # IPython
 88 | profile_default/
 89 | ipython_config.py
 90 | 
 91 | # pyenv
 92 | .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
102 | __pypackages__/
103 | 
104 | # Celery stuff
105 | celerybeat-schedule
106 | celerybeat.pid
107 | 
108 | # SageMath parsed files
109 | *.sage.py
110 | 
111 | # Environments
112 | .env
113 | .venv
114 | env/
115 | venv/
116 | ENV/
117 | env.bak/
118 | venv.bak/
119 | 
120 | # Spyder project settings
121 | .spyderproject
122 | .spyproject
123 | 
124 | # Rope project settings
125 | .ropeproject
126 | 
127 | # mkdocs documentation
128 | /site
129 | 
130 | # mypy
131 | .mypy_cache/
132 | .dmypy.json
133 | dmypy.json
134 | 
135 | # Pyre type checker
136 | .pyre/
137 | 


--------------------------------------------------------------------------------
/dbcooper/tables.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from tabulate import tabulate
 4 | from typing import TYPE_CHECKING
 5 | from sqlalchemy import Table, MetaData
 6 | 
 7 | from .collect import name_to_tbl, to_siuba
 8 | 
 9 | if TYPE_CHECKING:
10 |     import sqlalchemy as sqla
11 |     from sqlalchemy.engine import Engine
12 | 
13 | class DbcSimpleTable:
14 |     """Represent a database table."""
15 |     def __init__(self, engine: Engine, table_name: str, schema: str | None = None, to_frame=to_siuba):
16 |         self.engine = engine
17 |         self.table_name = table_name
18 |         self.schema = schema
19 |         self.to_frame = to_frame
20 | 
21 |     def __repr__(self):
22 |         repr_args = map(repr, [self.table_name, self.schema])
23 |         joined_repr = ", ".join(repr_args)
24 |         return f"{self.__class__.__name__}(..., {joined_repr})"
25 | 
26 |     def _repr_html_(self):
27 |         raise NotImplementedError()
28 | 
29 |     def __call__(self):
30 |         sqla_tbl = self._create_table()
31 |         return self.to_frame(self.engine, sqla_tbl)
32 | 
33 |     def _create_table(self) -> sqla.sql.TableClause:
34 |         return name_to_tbl(self.engine, self.table_name, self.schema)
35 | 
36 | 
37 | class DbcDocumentedTable(DbcSimpleTable):
38 |     """Represent a database table with a nice column summary (including comments).
39 | 
40 |     Note that this class's objects return a siuba LazyTbl when called, and print
41 |     out the table and column descriptions otherwise.
42 |     """
43 | 
44 |     table_comment_fields = {"name": "name", "type": "type", "description": "comment"}
45 | 
46 |     def _create_table(self) -> sqla.Table:
47 |         table = Table(self.table_name, MetaData(), schema=self.schema, autoload_with = self.engine)
48 |         return table
49 | 
50 |     # methods for representation ----------------------------------------------
51 | 
52 |     def _col_to_row(self, col):
53 |         return {k: getattr(col, v) for k,v in self.table_comment_fields.items()}
54 | 
55 |     def _repr_body(self, table, tablefmt):
56 |         rows = [self._col_to_row(col) for col in table.columns]
57 |         return tabulate(rows, headers="keys", tablefmt=tablefmt)
58 | 
59 |     @staticmethod
60 |     def _get_table_comment(table):
61 |         if table.comment is None:
62 |             return "(No table description.)"
63 |         else:
64 |             return table.comment
65 | 
66 |     def _repr_html_(self):
67 |         table = self._create_table()
68 | 
69 |         table_comment = self._get_table_comment(table)
70 | 
71 |         return f"""\
72 | <h3> {table.name} </h3>
73 | <p> {table_comment} </p>
74 | {self._repr_body(table, "html")}\
75 | """
76 | 
77 |     def __repr__(self):
78 |         table = self._create_table()
79 | 
80 |         table_comment = self._get_table_comment(table)
81 | 
82 |         return f"""\
83 | {table.name}
84 | {table_comment}
85 | 
86 | {self._repr_body(table, "simple")}\
87 | """
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/dbcooper/dbcooper.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | from sqlalchemy import create_engine
 4 | 
 5 | from .finder import TableFinder, AccessorBuilder
 6 | from .tables import DbcDocumentedTable
 7 | from .collect import query_to_tbl, name_to_tbl, to_siuba
 8 | 
 9 | import typing
10 | 
11 | if typing.TYPE_CHECKING:
12 |     from sqlalchemy.engine import Engine
13 | 
14 | 
15 | class DbCooper:
16 |     def __init__(
17 |         self,
18 |         engine: "str | Engine",
19 |         table_finder=TableFinder(),
20 |         table_factory=DbcDocumentedTable,
21 |         accessor_builder=AccessorBuilder(),
22 |         to_frame=to_siuba,
23 |         initialize=True,
24 |     ):
25 | 
26 |         if isinstance(engine, str):
27 |             engine = create_engine(engine)
28 | 
29 |         self._engine: Engine = engine
30 |         self._accessors = {}
31 |         self._table_finder = table_finder
32 |         self._table_factory = table_factory
33 |         self._accessor_builder = accessor_builder
34 |         self._to_frame = to_frame
35 | 
36 |         if initialize:
37 |             self._init()
38 | 
39 |     def __getattr__(self, k):
40 |         if k in self._accessors:
41 |             return self._accessors[k]
42 | 
43 |         raise AttributeError("No such attribute %s" % k)
44 | 
45 |     def __getitem__(self, k):
46 |         if k in self._accessors:
47 |             return self._accessors[k]
48 | 
49 |         raise AttributeError("No such attribute %s" % k)
50 | 
51 | 
52 |     def __dir__(self):
53 |         dbc_methods = ["reset", "query", "list", "tbl"]
54 |         return dbc_methods + list(self._accessors.keys())
55 | 
56 |     def _ipython_key_completions_(self):
57 |         return list(self._accessors)
58 | 
59 |     def _init(self):
60 |         with self._engine.connect() as conn:
61 |             table_map = self._table_finder.map_tables(self._engine.dialect, conn)
62 | 
63 |         accessors = self._accessor_builder.create_accessors(
64 |             self._engine,
65 |             self._table_factory,
66 |             table_map,
67 |             self._to_frame,
68 |         )
69 |         self._accessors = accessors
70 | 
71 |     def reset(self):
72 |         self._init()
73 | 
74 |     def list(self, raw=False):
75 |         dialect = self._engine.dialect
76 |         with self._engine.connect() as conn:
77 |             tables = self._table_finder.list_tables(dialect, conn)
78 | 
79 |         if raw:
80 |             return tables
81 |         else:
82 |             results = []
83 |             for table in tables:
84 |                 ident = self._table_finder.identify_table(dialect, table)
85 |                 results.append(self._table_finder.join_identifiers(ident))
86 | 
87 |             return results
88 | 
89 |     def query(self, query):
90 |         expr = query_to_tbl(self._engine, query)
91 |         return self._to_frame(self._engine, expr)
92 | 
93 |     def tbl(self, name, schema=None):
94 |         expr = name_to_tbl(self._engine, name, schema)
95 |         return self._to_frame(self._engine, expr)
96 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
  1 | name: CI
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     branches: ["main", "dev-*"]
  7 |   pull_request:
  8 |   release:
  9 |     types: [published]
 10 | 
 11 | jobs:
 12 |   run-if:
 13 |     name: "Run If"
 14 |     runs-on: ubuntu-latest
 15 |     if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false
 16 |     steps:
 17 |       - run: |
 18 |           echo "Running CI"
 19 |   test-python:
 20 |     name: "Test Python Version"
 21 |     needs: ["run-if"]
 22 |     runs-on: ubuntu-latest
 23 |     strategy:
 24 |       fail-fast: false
 25 |       matrix:
 26 |         # Checks based on python versions ---
 27 |         python-version: ["3.10", "3.11", "3.12"]
 28 |         requirements: [""]
 29 | 
 30 |     steps:
 31 |       - uses: actions/checkout@v2
 32 | 
 33 |       # setup docker and gcp authentication ----
 34 |       #
 35 |       - name: Run docker-compose
 36 |         run: |
 37 |           docker compose up --build -d
 38 | 
 39 |       - name: Set up Cloud SDK
 40 |         uses: google-github-actions/setup-gcloud@v0
 41 |         with:
 42 |           project_id: siuba-tests
 43 |           service_account_key: ${{ secrets.GCP_SA_KEY }}
 44 |           export_default_credentials: true
 45 | 
 46 |       # install python w/ dependencies, and run tests ----
 47 |       #
 48 |       - uses: actions/setup-python@v2
 49 |         with:
 50 |           python-version: "${{ matrix.python-version }}"
 51 |       - name: Install uv
 52 |         uses: astral-sh/setup-uv@v6
 53 |       - name: Install dependencies
 54 |         run: |
 55 |           uv sync --dev
 56 |       - name: Run tests
 57 |         run: |
 58 |           uv run pytest -m "postgresql"
 59 |         env:
 60 |           SB_TEST_MYSQLPORT: 3307
 61 |           SB_TEST_PGPORT: 5433
 62 | 
 63 |           SB_TEST_BQDATABASE: ""
 64 |           SB_TEST_BQPROJECT: dbcooper-tests
 65 | 
 66 |           SB_TEST_SNOWFLAKEDATABASE: "DBCOOPER_DB1"
 67 |           SB_TEST_SNOWFLAKEUSER: "DBCOOPER_CI"
 68 |           SB_TEST_SNOWFLAKEPASSWORD: ${{ secrets.SB_TEST_SNOWFLAKEPASSWORD }}
 69 |           SB_TEST_SNOWFLAKEHOST: "qf04441.us-east-2.aws"
 70 |           SB_TEST_SNOWFLAKEOPTIONS: "warehouse=COMPUTE_WH&role=USER_DBCOOPER_CI"
 71 | 
 72 |   build-docs:
 73 |     name: "Build Docs"
 74 |     needs: ["run-if"]
 75 |     runs-on: ubuntu-latest
 76 |     steps:
 77 |       - uses: actions/checkout@v2
 78 |       - uses: actions/setup-python@v2
 79 |         with:
 80 |           python-version: "3.10"
 81 |       - name: Install dependencies from requirements file
 82 |         run: |
 83 |           python -m pip install --upgrade pip
 84 |           python -m pip install -r requirements/dev.txt
 85 |       - name: Build docs
 86 |         run: |
 87 |           make docs-build
 88 | 
 89 |   release-pypi:
 90 |     name: "Release to pypi"
 91 |     runs-on: ubuntu-latest
 92 |     if: github.event_name == 'release'
 93 |     needs: [build-docs]
 94 |     steps:
 95 |       - uses: actions/checkout@v2
 96 |       - uses: actions/setup-python@v2
 97 |         with:
 98 |           python-version: "3.10"
 99 |       - name: "Build Package"
100 |         run: |
101 |           python -m pip install build wheel
102 |           python -m build --sdist --wheel
103 |       - name: "Deploy to Test PyPI"
104 |         uses: pypa/gh-action-pypi-publish@release/v1
105 |         with:
106 |           user: __token__
107 |           password: ${{ secrets.PYPI_API_TOKEN }}
108 | 


--------------------------------------------------------------------------------
/dbcooper/tests/helpers.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | from dbcooper.utils import SingleGeneric
 4 | from sqlalchemy.sql.elements import quoted_name
 5 | 
 6 | from siuba.tests.helpers import SqlBackend, BigqueryBackend, assert_frame_sort_equal
 7 | 
 8 | EXAMPLE_SCHEMAS  = {
 9 |     ("mai", "lower"): "mai_lower",
10 |     ("mai", "UPPER"): "mai_UPPER",
11 |     ("mai", "MiXeD"): "mai_MiXeD",
12 | 
13 |     ("MAIN_UPPER", "some_table"): "MAIN_UPPER_some_table",
14 | }
15 | 
16 | 
17 | EXAMPLE_DATA = pd.DataFrame({"x": [1,2,3], "y": ['a', 'b', 'b']})
18 | 
19 | # utilities -------------------------------------------------------------------
20 | 
21 | write_table = SingleGeneric("write_table")
22 | 
23 | @write_table.register_default
24 | def _wt_default(engine, df, table_name, schema):
25 |     return df.to_sql(quoted_name(table_name, True), engine, schema=quoted_name(schema, True), if_exists="replace",index=False)
26 | 
27 | @write_table.register("sqlite")
28 | def _wt_sqlite(engine, df, table_name, schema):
29 |     return df.to_sql(quoted_name(table_name, True), engine, schema=quoted_name(schema, True), if_exists="replace",index=False)
30 | 
31 | 
32 | @write_table.register("snowflake")
33 | def _wt_snowflake(engine, df, table_name, schema):
34 |     # Note that I have literally spent more time trying to support writing
35 |     # case sensitive schema + table names to snowflake, than in the development
36 |     # of the rest of this library. The sqlalchemy dialect is not made for it,
37 |     # the python connector methods fail silently, and pandas to_sql fails on
38 |     # reflection (due to dialect issues).
39 |     from sqlalchemy.sql.elements import quoted_name
40 |     from snowflake.connector.pandas_tools import write_pandas, pd_writer
41 |     
42 |     ip = engine.dialect.identifier_preparer
43 |     quoted_schema = ip.quote_identifier(schema)
44 |     quoted_table_name = ip.quote_identifier(table_name)
45 |     with engine.connect() as conn:
46 |         conn.execute(f"""
47 |             CREATE OR REPLACE TABLE {quoted_schema}.{quoted_table_name} (
48 |               x integer,
49 |               y varchar(100)
50 |             )
51 |         """)
52 | 
53 |         conn.execute(f"""
54 |             INSERT INTO {quoted_schema}.{quoted_table_name}
55 |                 VALUES (1, 'a'),
56 |                        (2, 'b'),
57 |                        (3, 'b')
58 |         """)
59 |         #sf_conn = conn.connection.connection
60 |         #conn.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
61 |         #write_pandas(
62 |         #    sf_conn,
63 |         #    df, table_name, schema= schema, auto_create_table=True, 
64 |         #)
65 |     
66 | 
67 | create_examples = SingleGeneric("create_examples")
68 | 
69 | @create_examples.register_default
70 | def _create_examples_default(engine):
71 |     ip = engine.dialect.identifier_preparer
72 | 
73 |     for schema, table in EXAMPLE_SCHEMAS.keys():
74 |         with engine.connect() as conn:
75 |             conn.execute(f"CREATE SCHEMA IF NOT EXISTS {ip.quote_identifier(schema)}")
76 |         write_table(engine, EXAMPLE_DATA, table, schema)
77 | 
78 | @create_examples.register("sqlite")
79 | def _create_examples_sqlite(engine):
80 |     ip = engine.dialect.identifier_preparer
81 |     prev_schemas = set()
82 | 
83 |     for schema, table in EXAMPLE_SCHEMAS.keys():
84 |         if schema not in prev_schemas:
85 |             with engine.connect() as conn:
86 |                 conn.execute(f"ATTACH DATABASE ':memory:' AS {ip.quote_identifier(schema)}")
87 | 
88 |         prev_schemas.add(schema)
89 |         write_table(engine, EXAMPLE_DATA, table, schema)
90 | 
91 | 


--------------------------------------------------------------------------------
/dbcooper/finder.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | from collections.abc import Mapping
  4 | 
  5 | from .inspect import TableName, TableIdentity, list_tables, format_table, identify_table
  6 | 
  7 | from typing import TYPE_CHECKING, Callable
  8 | 
  9 | 
 10 | if TYPE_CHECKING:
 11 |     from .tables import DbcSimpleTable
 12 | 
 13 | 
 14 | def _set_default(d, k, default):
 15 |     """Same behavior as dict.setdefault"""
 16 |     if k in d:
 17 |         return d[k]
 18 |     else:
 19 |         d[k] = default
 20 |         return default
 21 | 
 22 | class AttributeDict(Mapping):
 23 |     """Similar to a dictionary, except items may also be accessed as attributes."""
 24 | 
 25 |     def __init__(self, d=None):
 26 |         if d is None:
 27 |             self._d = {}
 28 |         else:
 29 |             # make a copy, just to be safe
 30 |             self._d = {**d}
 31 | 
 32 |     def __getitem__(self, k):
 33 |         return self._d[k]
 34 | 
 35 |     def __iter__(self):
 36 |         return iter(self._d)
 37 | 
 38 |     def __len__(self):
 39 |         return len(self._d)
 40 | 
 41 |     def __getattr__(self, k):
 42 |         if k in self._d:
 43 |             return self._d[k]
 44 | 
 45 |         raise AttributeError("No attribute %s" % k)
 46 | 
 47 |     def __setitem__(self, k, v):
 48 |         self._d[k] = v
 49 | 
 50 |     def __dir__(self):
 51 |         return list(self._d.keys())
 52 | 
 53 |     def __repr__(self):
 54 |         repr_d = repr(self._d)
 55 |         return f"{self.__class__.__name__}({repr_d})"
 56 | 
 57 | 
 58 | 
 59 | class TableFinder:
 60 |     # TODO: rename exclude_schemas
 61 |     # TODO: format="lowercase", exclude_schemas, exclude_tables
 62 |     def __init__(self,
 63 |         exclude_schemas=None,
 64 |         identify_from_part=None,
 65 |     ):
 66 |         # TODO: filter method
 67 |         self.exclude_schemas = exclude_schemas
 68 |         self.identify_from_part = identify_from_part
 69 | 
 70 |     def list_tables(self, dialect, conn):
 71 |         # first use generic method that dispatches on dialect name
 72 |         return list_tables(dialect, conn, self.exclude_schemas)
 73 | 
 74 | 
 75 |     def identify_table(self, dialect, table: TableName):
 76 |         return identify_table(dialect, table, self.identify_from_part)
 77 | 
 78 |     def join_identifiers(self, ident: TableIdentity):
 79 |         return f"{ident.schema}.{ident.table}"
 80 | 
 81 |     def map_tables(self, dialect, conn) -> Mapping[TableName, TableIdentity]:
 82 |         table_map = {}
 83 |         table_names = self.list_tables(dialect, conn)
 84 | 
 85 |         for name in table_names:
 86 |             ident_table = self.identify_table(dialect, name)
 87 |             table_map[name] = ident_table
 88 | 
 89 |         return table_map
 90 | 
 91 | 
 92 | class AccessorBuilder:
 93 |     def __init__(
 94 |         self,
 95 |         format_from_part=None,
 96 |         name_format: "str | Callable[TableName, str]" = "identity",
 97 |     ):
 98 |         self.format_from_part=format_from_part
 99 |         self.name_format=name_format
100 | 
101 |     def format_table(self, dialect, table: TableName):
102 |         if callable(self.name_format):
103 |             return self.name_format(table)
104 | 
105 |         # first use generic method that dispatches on dialect name
106 |         table =  format_table(dialect, table, self.format_from_part)
107 | 
108 |         if self.name_format == "lower":
109 |             return table.lower()
110 |         elif self.name_format == "identity":
111 |             return table
112 |         else:
113 |             raise ValueError(
114 |                 "Unknown name_format argument type: {type(self.name_format)}"
115 |             )
116 | 
117 |     def create_accessors(self, engine, table_factory: DbcSimpleTable, table_map: Mapping[TableName, TableIdentity], to_frame):
118 |         accessors = AttributeDict()
119 | 
120 |         for table, ident in table_map.items():
121 |             fmt_name = self.format_table(engine.dialect, table)
122 |             if fmt_name in accessors:
123 |                 raise Exception("multiple tables w/ formatted name: %s" % fmt_name)
124 | 
125 |             accessors[fmt_name] = table_factory(engine, ident.table, ident.schema, to_frame)
126 | 
127 |         return accessors
128 | 
129 | 
130 | class AccessorHierarchyBuilder(AccessorBuilder):
131 |     def __init__(
132 |         self, *args, omit_database=True, **kwargs
133 |         ):
134 |         super().__init__(*args, **kwargs)
135 |         self.format_from_part="table"
136 |         self.omit_database = omit_database
137 | 
138 |     def _group_by_level(self, table_map):
139 |         from itertools import groupby
140 | 
141 |         sorted_items = sorted(
142 |             sorted(table_map.items(), key=lambda x: x[0].database or ""),
143 |             key=lambda x: x[0].schema or ""
144 |         )
145 | 
146 |         grouped = groupby(sorted_items, lambda x: (x[0].database, x[0].schema))
147 |         return {group_key: dict(iter_) for group_key, iter_ in grouped}
148 | 
149 |     def create_accessors(self, engine, table_factory, table_map, to_frame):
150 | 
151 |         grouped = self._group_by_level(table_map)
152 | 
153 |         res = AttributeDict()
154 |         for (db, schema), sub_map in grouped.items():
155 |             sub_accessors = super().create_accessors(engine, table_factory, sub_map, to_frame)
156 |             acc_db = _set_default(res, db, AttributeDict())
157 |             if schema in acc_db:
158 |                 raise ValueError(
159 |                     "Already set accessors for this schema.\n"
160 |                     f"Database name: {db}\n"
161 |                     f"Schema name: {schema}\n"
162 |                 )
163 |             acc_db[schema] = sub_accessors
164 | 
165 |         if self.omit_database:
166 |             if len(res) != 1:
167 |                 raise ValueError(
168 |                     "Omitting database requires exactly 1 database entry, but found "
169 |                     f"the following: {list(res)}"
170 |                 )
171 | 
172 |             # return the only entry in the accessors dictionary
173 |             return list(res.values())[0]
174 | 
175 |         return res
176 | 
177 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ```{python tags=c("hide-cell")}
  2 | # TODO: is there a way to get it so dbc.list() does not show 1 item per line?
  3 | 
  4 | # this keeps the pandas dataframe repr from spitting out scoped style tags
  5 | # which don't render on github
  6 | import pandas as pd
  7 | pd.set_option("display.notebook_repr_html", False)
  8 | ```
  9 | 
 10 | # dbcooper-py
 11 | 
 12 | [![CI](https://github.com/machow/dbcooper-py/actions/workflows/ci.yml/badge.svg)](https://github.com/machow/dbcooper-py/actions/workflows/ci.yml)
 13 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/machow/dbcooper-py/HEAD)
 14 | 
 15 | The dbcooper package turns a database connection into a collection of functions,
 16 | handling logic for keeping track of connections and letting you take advantage of
 17 | autocompletion when exploring a database.
 18 | 
 19 | It's especially helpful to use when authoring database-specific Python packages,
 20 | for instance in an internal company package or one wrapping a public data source.
 21 | 
 22 | For the R version see [dgrtwo/dbcooper](https://github.com/dgrtwo/dbcooper).
 23 | 
 24 | ## Installation
 25 | 
 26 | ```
 27 | pip install dbcooper
 28 | ```
 29 | 
 30 | ## Example
 31 | 
 32 | ### Initializing the functions
 33 | 
 34 | The dbcooper package asks you to create the connection first.
 35 | As an example, we'll use the Lahman baseball database package (`lahman`).
 36 | 
 37 | ```{python}
 38 | from sqlalchemy import create_engine
 39 | from dbcooper.data import lahman_sqlite
 40 | 
 41 | # connect to sqlite
 42 | engine = create_engine("sqlite://")
 43 | 
 44 | # load the lahman data into the "lahman" schema
 45 | lahman_sqlite(engine)
 46 | ```
 47 | 
 48 | Next we'll set up dbcooper
 49 | 
 50 | ```{python}
 51 | from dbcooper import DbCooper
 52 | 
 53 | dbc = DbCooper(engine)
 54 | ```
 55 | 
 56 | The `DbCooper` object contains two important things:
 57 | 
 58 | * Accessors to fetch specific tables.
 59 | * Functions for interacting with the underlying database.
 60 | 
 61 | ### Using table accessors
 62 | 
 63 | In the example below, we'll use the `"Lahman"."Salaries"` table as an example.
 64 | By default, dbcooper makes this accessible as `.lahman_salaries`.
 65 | 
 66 | **Plain** `.lahman_salaries` prints out table and column info, including types and descriptions.
 67 | 
 68 | ```{python}
 69 | # show table and column descriptions
 70 | dbc.lahman_salaries
 71 | ```
 72 | 
 73 | Note that sqlite doesn't support table and columnn descriptions, so these sections
 74 | are empty.
 75 | 
 76 | **Calling** `.lahman_salaries()` fetches a lazy version of the data.
 77 | 
 78 | 
 79 | ```{python}
 80 | dbc.lahman_salaries()
 81 | ```
 82 | 
 83 | Note that this data is a siuba `LazyTbl` object, which you can use to analyze the data.
 84 | 
 85 | ```{python}
 86 | from siuba import _, count
 87 | 
 88 | dbc.lahman_salaries() >> count(over_100k = _.salary > 100_000)
 89 | ```
 90 | 
 91 | ### Using database functions
 92 | 
 93 | * `.list()`: Get a list of tables
 94 | * `.tbl()`: Access a table that can be worked with using `siuba`.
 95 | * `.query()`: Perform a SQL query and work with the result.
 96 | * `._engine`: Get the underlying sqlalchemy engine.
 97 | 
 98 | For instance, we could start by finding the names of the tables in the Lahman database.
 99 | 
100 | ```{python}
101 | dbc.list()
102 | ```
103 | 
104 | We can access one of these tables with `dbc.tbl()`, then put it through any kind
105 | of siuba operation.
106 | 
107 | ```{python}
108 | dbc.tbl("Salaries")
109 | ```
110 | 
111 | ```{python}
112 | from siuba import _, count
113 | dbc.tbl("Salaries") >> count(_.yearID, sort=True)
114 | ```
115 | 
116 | If you'd rather start from a SQL query, use the `.query()` method.
117 | 
118 | ```{python}
119 | dbc.query("""
120 |     SELECT
121 |         playerID,
122 |         sum(AB) as AB
123 |     FROM Batting
124 |     GROUP BY playerID
125 | """)
126 | ```
127 | 
128 | For anything else you might want to do, the sqlalchemy Engine object is available.
129 | For example, the code below shows how you can set its `.echo` attribute, which
130 | tells sqlalchemy to provide useful logs.
131 | 
132 | ```{python}
133 | dbc._engine.echo = True
134 | table_names = dbc.list()
135 | ```
136 | 
137 | Note that the log messages above show that the `.list()` method executed two queries:
138 | One to list tables in the "main" schema (which is empty), and one to list tables
139 | in the "lahman" schema.
140 | 
141 | 
142 | ## Advanced Configuration
143 | 
144 | > ⚠️: These behaviors are well tested, but dbcooper's internals and API may change.
145 | 
146 | dbcooper can be configured in three ways, each corresponding to a class interface:
147 | 
148 | * **TableFinder**: Which tables will be used by `dbcooper`.
149 | * **AccessorBuilder**: How table names are turned into accessors.
150 | * **DbcDocumentedTable**: The class that defines what an accessor will return.
151 | 
152 | ```{python}
153 | from sqlalchemy import create_engine
154 | from dbcooper.data import lahman_sqlite
155 | from dbcooper import DbCooper, AccessorBuilder
156 | 
157 | engine = create_engine("sqlite://")
158 | lahman_sqlite(engine)
159 | ```
160 | 
161 | ### Excluding a schema
162 | 
163 | ```{python}
164 | from dbcooper import TableFinder
165 | 
166 | finder = TableFinder(exclude_schemas=["lahman"])
167 | dbc_no_lahman = DbCooper(engine, table_finder=finder)
168 | dbc_no_lahman.list()
169 | ```
170 | 
171 | 
172 | ### Formatting table names
173 | 
174 | ```{python}
175 | from dbcooper import AccessorBuilder
176 | 
177 | # omits schema, and keeps only table name
178 | # e.g. `salaries`, rather than `lahman_salaries`
179 | builder = AccessorBuilder(format_from_part="table")
180 | 
181 | tbl_flat = DbCooper(engine, accessor_builder=builder)
182 | tbl_flat.salaries()
183 | ```
184 | 
185 | ### Grouping tables by schema
186 | 
187 | ```{python}
188 | from dbcooper import AccessorHierarchyBuilder
189 | 
190 | tbl_nested = DbCooper(engine, accessor_builder=AccessorHierarchyBuilder())
191 | 
192 | # note the form: <schema>.<table>
193 | tbl_nested.lahman.salaries()
194 | ```
195 | 
196 | ### Don't show table documentation
197 | 
198 | ```{python}
199 | from dbcooper import DbcSimpleTable
200 | 
201 | dbc_no_doc = DbCooper(engine, table_factory=DbcSimpleTable)
202 | dbc_no_doc.lahman_salaries
203 | ```
204 | 
205 | Note that sqlalchemy dialects like `snowflake-sqlalchemy` cannot look up things
206 | like table and column descriptions as well as other dialects, so `DbcSimpleTable`
207 | may be needed to connect to snowflake (see [this issue](https://github.com/snowflakedb/snowflake-sqlalchemy/issues/276)).
208 | 
209 | 
210 | ## Developing
211 | 
212 | ```shell
213 | # install with development dependencies
214 | pip install -e .[dev]
215 | 
216 | # or install from requirements file
217 | pip install -r requirements/dev.txt
218 | ```
219 | 
220 | ### Test
221 | 
222 | ```shell
223 | # run all tests, see pytest section of pyproject.toml
224 | pytest
225 | 
226 | # run specific backends
227 | pytest -m 'not snowflake and not bigquery'
228 | 
229 | # stop on first failure, drop into debugger
230 | pytest -x --pdb
231 | ```
232 | 
233 | 
234 | ### Release
235 | 
236 | ```shell
237 | # set version number
238 | git tag v0.0.1
239 | 
240 | # (optional) push to github
241 | git push origin --tags
242 | 
243 | # check version
244 | python -m setuptools_scm
245 | ```
246 | 


--------------------------------------------------------------------------------
/requirements/dev.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with python 3.8
  3 | # To update, run:
  4 | #
  5 | #    pip-compile --extra=dev --output-file=- setup.cfg
  6 | #
  7 | alabaster==0.7.12
  8 |     # via sphinx
  9 | appnope==0.1.3
 10 |     # via
 11 |     #   ipykernel
 12 |     #   ipython
 13 | asn1crypto==1.5.1
 14 |     # via
 15 |     #   oscrypto
 16 |     #   snowflake-connector-python
 17 | asttokens==2.0.5
 18 |     # via stack-data
 19 | attrs==21.4.0
 20 |     # via
 21 |     #   jsonschema
 22 |     #   pytest
 23 | babel==2.10.2
 24 |     # via sphinx
 25 | backcall==0.2.0
 26 |     # via ipython
 27 | beautifulsoup4==4.11.1
 28 |     # via pydata-sphinx-theme
 29 | cachetools==5.2.0
 30 |     # via google-auth
 31 | certifi==2022.5.18.1
 32 |     # via
 33 |     #   requests
 34 |     #   snowflake-connector-python
 35 | cffi==1.15.0
 36 |     # via
 37 |     #   cryptography
 38 |     #   snowflake-connector-python
 39 | charset-normalizer==2.0.12
 40 |     # via
 41 |     #   requests
 42 |     #   snowflake-connector-python
 43 | click==8.1.3
 44 |     # via pip-tools
 45 | cryptography==36.0.2
 46 |     # via
 47 |     #   pyopenssl
 48 |     #   snowflake-connector-python
 49 | debugpy==1.6.0
 50 |     # via ipykernel
 51 | decorator==5.1.1
 52 |     # via ipython
 53 | docutils==0.17.1
 54 |     # via
 55 |     #   pydata-sphinx-theme
 56 |     #   sphinx
 57 | entrypoints==0.4
 58 |     # via jupyter-client
 59 | executing==0.8.3
 60 |     # via stack-data
 61 | fastjsonschema==2.15.3
 62 |     # via nbformat
 63 | future==0.18.2
 64 |     # via sqlalchemy-bigquery
 65 | google-api-core[grpc]==2.8.1
 66 |     # via
 67 |     #   google-cloud-bigquery
 68 |     #   google-cloud-bigquery-storage
 69 |     #   google-cloud-core
 70 |     #   sqlalchemy-bigquery
 71 | google-auth==2.7.0
 72 |     # via
 73 |     #   google-api-core
 74 |     #   google-cloud-core
 75 |     #   sqlalchemy-bigquery
 76 | google-cloud-bigquery==3.2.0
 77 |     # via sqlalchemy-bigquery
 78 | google-cloud-bigquery-storage==2.13.2
 79 |     # via
 80 |     #   google-cloud-bigquery
 81 |     #   sqlalchemy-bigquery
 82 | google-cloud-core==2.3.1
 83 |     # via google-cloud-bigquery
 84 | google-crc32c==1.3.0
 85 |     # via google-resumable-media
 86 | google-resumable-media==2.3.3
 87 |     # via google-cloud-bigquery
 88 | googleapis-common-protos==1.56.2
 89 |     # via
 90 |     #   google-api-core
 91 |     #   grpcio-status
 92 | grpcio==1.46.3
 93 |     # via
 94 |     #   google-api-core
 95 |     #   google-cloud-bigquery
 96 |     #   grpcio-status
 97 | grpcio-status==1.46.3
 98 |     # via google-api-core
 99 | idna==3.3
100 |     # via
101 |     #   requests
102 |     #   snowflake-connector-python
103 | imagesize==1.3.0
104 |     # via sphinx
105 | importlib-metadata==4.11.4
106 |     # via sphinx
107 | importlib-resources==5.7.1
108 |     # via
109 |     #   dbcooper (setup.cfg)
110 |     #   jsonschema
111 | iniconfig==1.1.1
112 |     # via pytest
113 | ipykernel==6.14.0
114 |     # via dbcooper (setup.cfg)
115 | ipython==8.4.0
116 |     # via ipykernel
117 | jedi==0.18.1
118 |     # via ipython
119 | jinja2==3.1.2
120 |     # via sphinx
121 | jsonschema==4.6.0
122 |     # via nbformat
123 | jupyter-client==7.3.4
124 |     # via ipykernel
125 | jupyter-core==4.10.0
126 |     # via
127 |     #   jupyter-client
128 |     #   nbformat
129 | jupytext==1.13.8
130 |     # via dbcooper (setup.cfg)
131 | markdown-it-py==2.1.0
132 |     # via
133 |     #   jupytext
134 |     #   mdit-py-plugins
135 | markupsafe==2.1.1
136 |     # via jinja2
137 | matplotlib-inline==0.1.3
138 |     # via
139 |     #   ipykernel
140 |     #   ipython
141 | mdit-py-plugins==0.3.0
142 |     # via jupytext
143 | mdurl==0.1.1
144 |     # via markdown-it-py
145 | nbformat==5.4.0
146 |     # via jupytext
147 | nest-asyncio==1.5.5
148 |     # via
149 |     #   ipykernel
150 |     #   jupyter-client
151 | numpy==1.22.4
152 |     # via
153 |     #   pandas
154 |     #   pyarrow
155 |     #   siuba
156 | oscrypto==1.3.0
157 |     # via snowflake-connector-python
158 | packaging==21.3
159 |     # via
160 |     #   google-cloud-bigquery
161 |     #   ipykernel
162 |     #   pydata-sphinx-theme
163 |     #   pytest
164 |     #   sphinx
165 | pandas==1.4.2
166 |     # via siuba
167 | parso==0.8.3
168 |     # via jedi
169 | pep517==0.12.0
170 |     # via pip-tools
171 | pexpect==4.8.0
172 |     # via ipython
173 | pickleshare==0.7.5
174 |     # via ipython
175 | pip-tools==6.6.2
176 |     # via dbcooper (setup.cfg)
177 | pluggy==1.0.0
178 |     # via pytest
179 | prompt-toolkit==3.0.29
180 |     # via ipython
181 | proto-plus==1.20.6
182 |     # via
183 |     #   google-cloud-bigquery
184 |     #   google-cloud-bigquery-storage
185 | protobuf==3.20.1
186 |     # via
187 |     #   google-api-core
188 |     #   google-cloud-bigquery
189 |     #   google-cloud-bigquery-storage
190 |     #   googleapis-common-protos
191 |     #   grpcio-status
192 |     #   proto-plus
193 | psutil==5.9.1
194 |     # via ipykernel
195 | psycopg2-binary==2.9.3
196 |     # via dbcooper (setup.cfg)
197 | ptyprocess==0.7.0
198 |     # via pexpect
199 | pure-eval==0.2.2
200 |     # via stack-data
201 | py==1.11.0
202 |     # via pytest
203 | pyarrow==6.0.1
204 |     # via
205 |     #   google-cloud-bigquery
206 |     #   sqlalchemy-bigquery
207 | pyasn1==0.4.8
208 |     # via
209 |     #   pyasn1-modules
210 |     #   rsa
211 | pyasn1-modules==0.2.8
212 |     # via google-auth
213 | pycparser==2.21
214 |     # via cffi
215 | pycryptodomex==3.14.1
216 |     # via snowflake-connector-python
217 | pydata-sphinx-theme==0.9.0
218 |     # via dbcooper (setup.cfg)
219 | pygments==2.12.0
220 |     # via
221 |     #   ipython
222 |     #   sphinx
223 | pyjwt==2.4.0
224 |     # via snowflake-connector-python
225 | pymysql==1.0.2
226 |     # via dbcooper (setup.cfg)
227 | pyopenssl==22.0.0
228 |     # via snowflake-connector-python
229 | pyparsing==3.0.9
230 |     # via packaging
231 | pyrsistent==0.18.1
232 |     # via jsonschema
233 | pytest==7.1.2
234 |     # via
235 |     #   dbcooper (setup.cfg)
236 |     #   pytest-dotenv
237 | pytest-dotenv==0.5.2
238 |     # via dbcooper (setup.cfg)
239 | python-dateutil==2.8.2
240 |     # via
241 |     #   google-cloud-bigquery
242 |     #   jupyter-client
243 |     #   pandas
244 | python-dotenv==0.20.0
245 |     # via pytest-dotenv
246 | pytz==2022.1
247 |     # via
248 |     #   babel
249 |     #   pandas
250 |     #   snowflake-connector-python
251 | pyyaml==6.0
252 |     # via
253 |     #   jupytext
254 |     #   siuba
255 | pyzmq==23.1.0
256 |     # via jupyter-client
257 | requests==2.28.0
258 |     # via
259 |     #   google-api-core
260 |     #   google-cloud-bigquery
261 |     #   snowflake-connector-python
262 |     #   sphinx
263 | rsa==4.8
264 |     # via google-auth
265 | siuba==0.3.0
266 |     # via dbcooper (setup.cfg)
267 | six==1.16.0
268 |     # via
269 |     #   asttokens
270 |     #   google-auth
271 |     #   grpcio
272 |     #   python-dateutil
273 | snowballstemmer==2.2.0
274 |     # via sphinx
275 | snowflake-connector-python==2.7.8
276 |     # via snowflake-sqlalchemy
277 | snowflake-sqlalchemy==1.3.4
278 |     # via dbcooper (setup.cfg)
279 | soupsieve==2.3.2.post1
280 |     # via beautifulsoup4
281 | sphinx==4.4.0
282 |     # via
283 |     #   dbcooper (setup.cfg)
284 |     #   pydata-sphinx-theme
285 | sphinxcontrib-applehelp==1.0.2
286 |     # via sphinx
287 | sphinxcontrib-devhelp==1.0.2
288 |     # via sphinx
289 | sphinxcontrib-htmlhelp==2.0.0
290 |     # via sphinx
291 | sphinxcontrib-jsmath==1.0.1
292 |     # via sphinx
293 | sphinxcontrib-qthelp==1.0.3
294 |     # via sphinx
295 | sphinxcontrib-serializinghtml==1.1.5
296 |     # via sphinx
297 | sqlalchemy==1.4.27
298 |     # via
299 |     #   dbcooper (setup.cfg)
300 |     #   siuba
301 |     #   snowflake-sqlalchemy
302 |     #   sqlalchemy-bigquery
303 | sqlalchemy-bigquery==1.4.4
304 |     # via dbcooper (setup.cfg)
305 | stack-data==0.2.0
306 |     # via ipython
307 | tabulate==0.8.9
308 |     # via dbcooper (setup.cfg)
309 | toml==0.10.2
310 |     # via jupytext
311 | tomli==2.0.1
312 |     # via
313 |     #   pep517
314 |     #   pytest
315 | tornado==6.1
316 |     # via
317 |     #   ipykernel
318 |     #   jupyter-client
319 | traitlets==5.2.2.post1
320 |     # via
321 |     #   ipykernel
322 |     #   ipython
323 |     #   jupyter-client
324 |     #   jupyter-core
325 |     #   matplotlib-inline
326 |     #   nbformat
327 | urllib3==1.26.9
328 |     # via requests
329 | wcwidth==0.2.5
330 |     # via prompt-toolkit
331 | wheel==0.37.1
332 |     # via pip-tools
333 | zipp==3.8.0
334 |     # via
335 |     #   importlib-metadata
336 |     #   importlib-resources
337 | 
338 | # The following packages are considered to be unsafe in a requirements file:
339 | # pip
340 | # setuptools
341 | 


--------------------------------------------------------------------------------
/dbcooper/inspect.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | from sqlalchemy import sql
  4 | from sqlalchemy.sql.elements import quoted_name
  5 | from sqlalchemy.engine import Dialect
  6 | 
  7 | from .utils import SingleGeneric
  8 | from .base import TableName, TableIdentity
  9 | 
 10 | from typing import Sequence
 11 | 
 12 | 
 13 | 
 14 | # list_tables generic =========================================================
 15 | 
 16 | list_tables = SingleGeneric("list_tables")
 17 | 
 18 | @list_tables.register("sqlite")
 19 | def _list_tables_sqlite(self: Dialect, conn, exclude=None) -> Sequence[TableName]:
 20 |     if exclude is None:
 21 |         exclude = ("INFORMATION_SCHEMA",)
 22 | 
 23 |     schemas = self.get_schema_names(conn)
 24 |     query_str = """SELECT name FROM {0} WHERE type='table' ORDER BY name"""
 25 | 
 26 |     results = []
 27 |     for schema in schemas:
 28 |         if schema in exclude:
 29 |             continue
 30 | 
 31 |         qschema = self.identifier_preparer.quote_identifier(schema)
 32 |         qmaster = f"{qschema}.sqlite_master"
 33 |         q = conn.exec_driver_sql(query_str.format(qmaster))
 34 | 
 35 |         for row in q:
 36 |             results.append(TableName(None, schema, row[0]))
 37 | 
 38 |     return results
 39 | 
 40 | 
 41 | @list_tables.register("mysql")
 42 | def _list_tables_mysql(self: Dialect, conn, exclude=None) -> Sequence[TableName]:
 43 |     if exclude is None:
 44 |         exclude = tuple()
 45 | 
 46 |     q = conn.execute("""
 47 |         SELECT table_schema AS "schema", table_name as "name"
 48 |         FROM INFORMATION_SCHEMA.TABLES
 49 |         WHERE
 50 |             TABLE_TYPE='BASE TABLE'
 51 |             AND TABLE_SCHEMA NOT IN ('mysql', 'performance_schema', 'sys')
 52 |     """)
 53 | 
 54 |     results = [TableName(None, row[0], row[1]) for row in q]
 55 |     return _filter_result(results, exclude)
 56 |         
 57 | 
 58 | 
 59 | @list_tables.register("postgresql")
 60 | @list_tables.register("duckdb")
 61 | def _list_tables_pg(self: Dialect, conn, exclude=None) -> Sequence[TableName]:
 62 |     if exclude is None:
 63 |         exclude = ("information_schema", "pg_catalog")
 64 | 
 65 |     q = conn.execute(sql.text("""
 66 |         SELECT db.db_name, nspname, relname  FROM pg_class c
 67 |         JOIN pg_namespace n ON n.oid = c.relnamespace
 68 |         CROSS JOIN (SELECT current_database() AS db_name) db
 69 |         WHERE
 70 |             c.relkind in ('r', 'p', 'v')
 71 |     """))
 72 | 
 73 |     result = [TableName(*row) for row in q]
 74 | 
 75 |     return _filter_result(result, exclude)
 76 | 
 77 | 
 78 | @list_tables.register("duckdb")
 79 | def _list_tables_pg(self: Dialect, conn, exclude=None) -> Sequence[TableName]:
 80 |     if exclude is None:
 81 |         exclude = ("information_schema", "pg_catalog")
 82 | 
 83 |     q = conn.execute(sql.text("""
 84 |         SELECT db.db_name, table_schema, table_name FROM information_schema.tables c
 85 |         CROSS JOIN (SELECT current_database() AS db_name) db
 86 |     """))
 87 | 
 88 |     result = [TableName(*row) for row in q]
 89 | 
 90 |     return _filter_result(result, exclude)
 91 | 
 92 | 
 93 | @list_tables.register("snowflake")
 94 | def _list_tables_sf(self: Dialect, conn, exclude=None) -> Sequence[TableName]:
 95 | 
 96 |     if exclude is None:
 97 |         exclude = ("INFORMATION_SCHEMA",)
 98 | 
 99 |     # snowflake sql supports urls with .../<database>/<schema>,
100 |     # so we need to parse them out.
101 |     # note that alternatively, you could get conn.connection.database, etc..
102 |     engine = conn.engine
103 |     _, opts = engine.dialect.create_connect_args(engine.url)
104 |     db_name, schema_name = opts.get("database"), opts.get("schema")
105 | 
106 |     if schema_name:
107 |         full_name = ".".join([db_name, schema_name])
108 |         in_clause = f"IN SCHEMA {full_name}"
109 |     elif db_name:
110 |         in_clause = f"IN DATABASE {db_name}"
111 |     else:
112 |         in_clause = "IN ACCOUNT"
113 | 
114 |     tables = conn.execute(sql.text(
115 |         "SHOW TERSE TABLES " + in_clause
116 |     ))
117 | 
118 |     views = conn.execute(sql.text(
119 |         "SHOW TERSE VIEWS " + in_clause
120 |     ))
121 | 
122 |     result = []
123 |     for row in itertools.chain(tables, views):
124 |         if db_name:
125 |             # a default database is set. snowflake's dialect automatically prepends
126 |             # the default database name everywhere, so we need to set database
127 |             # to None in our results
128 |             result.append(TableName(None, row[4], row[1]))
129 |         else:
130 |             # no default database, so return database in results. this allows
131 |             # us to specify sqlalchemy.Table(..., schema="<database>.<schema>")
132 |             result.append(TableName(row[3], row[4], row[1]))
133 | 
134 |     return _filter_result(result, exclude)
135 | 
136 | 
137 | @list_tables.register("bigquery")
138 | def _list_tables_bq(self: Dialect, conn, schema=None, exclude=None) -> Sequence[TableName]:
139 |     if exclude is None:
140 |         exclude = ("information_schema",)
141 | 
142 |     from google.api_core import exceptions
143 | 
144 |     client = conn.connection._client
145 |     datasets = client.list_datasets()
146 | 
147 |     result = []
148 |     for dataset in datasets:
149 |         try:
150 |             tables = client.list_tables(dataset.reference, self.list_tables_page_size)
151 | 
152 |             for table in tables:
153 |                 result.append(TableName(table.project, table.reference.dataset_id, table.table_id))
154 |         except exceptions.NotFound:
155 |             pass
156 | 
157 |     return _filter_result(result, exclude)
158 | 
159 | def _filter_result(result: Sequence[TableName], exclude: "Sequence | set") -> Sequence[TableName]:
160 |     exclude_set = set(exclude)
161 |     return [entry for entry in result if entry.schema not in exclude_set]
162 | 
163 | 
164 | # Table formatter =============================================================
165 | 
166 | format_table = SingleGeneric("format_table")
167 | 
168 | def _join_parts(dialect, parts):
169 |     return "_".join(parts)
170 | 
171 | def _table_from_part(dialect, table, from_part):
172 |     tup = table.to_tuple(exists=True)
173 |     ii = table.field_index_from_end(from_part)
174 |     return _join_parts(dialect, tup[ii:])
175 | 
176 | 
177 | @format_table.register_default
178 | def _format_table_default(self: Dialect, table: TableName, from_part=None) -> str:
179 |     if from_part is not None:
180 |         return _table_from_part(self, table, from_part)
181 | 
182 |     # just use fully qualified name parts to generate user friendly name
183 |     # e.g. databasename_schemaname_tablename
184 |     tup = table.to_tuple(exists=True)
185 |     return _join_parts(self, tup)
186 | 
187 | #@format_table.register("snowflake")
188 | #def _format_table_sf(self: Dialect, table: TableName, from_part=None) -> str:
189 | #    # names in snowflake are by default case insensitive (like many databases),
190 | #    # however, they are also UPPERCASE. Make lowercase for ease of use.
191 | #    lower = TableName(*[x.lower() if x is not None else x for x in table.to_tuple()])
192 | #    return format_table.default(self, lower, from_part)
193 | 
194 | 
195 | @format_table.register("sqlite")
196 | @format_table.register("postgresql")
197 | @format_table.register("mysql")
198 | @format_table.register("bigquery")
199 | def _format_table_no_db(self: Dialect, table: TableName, from_part=None) -> str:
200 |     """By default only use schema and table name.
201 | 
202 |     Note that this function is meant to be used for database implementations that
203 |     can't use the same sqlalchemy engine to query across databases. (Or that call
204 |     a schema "database").
205 |     """
206 | 
207 |     if from_part is not None:
208 |         return _table_from_part(self, table, from_part)
209 | 
210 |     # return {schema_name}.{table_name}
211 |     tup = table.to_tuple(exists=True)
212 |     return _join_parts(self, tup[-2:])
213 | 
214 | 
215 | # Table Identifier ============================================================
216 | 
217 | identify_table = SingleGeneric("identify_table")
218 | 
219 | def _identify_default_parts(dialect, parts):
220 |     if len(parts) == 3:
221 |         schema = ".".join(parts[:2])
222 |     elif len(parts) == 2:
223 |         schema = parts[0]
224 |     else:
225 |         schema = None
226 | 
227 |     return TableIdentity(schema, parts[-1])
228 | 
229 | def quote_if_not_upper(x):
230 |     if x != x.upper():
231 |         return quoted_name(x, True)
232 | 
233 |     return x
234 | 
235 | def _identify_snowflake_parts(dialect, parts):
236 |     # Handle snowflake, whose dialect is a bit funky ---
237 |     # basically, snowflake assumes you are being case insensitive,
238 |     # e.g. that some_table means SOME_TABLE. You can escape this by the quoting
239 |     # functions below. However, snowflake dialect also tries to be clever, and
240 |     # knows that sOmE_tAbLe needs to be escaped.
241 |     #
242 |     # Unfortunately its code is wrong in a way that if you quote an uppercase
243 |     # string it will fail. So we have to detect uppercase names.
244 |     quoted = [dialect.identifier_preparer.quote_identifier(x) for x in parts]
245 |     if len(parts) == 3:
246 |         schema = quoted_name(".".join(quoted[0:2]), False)
247 |     elif len(parts) == 2:
248 |         schema = quote_if_not_upper(parts[0])
249 |     else:
250 |         schema = None
251 | 
252 |     table_name = quote_if_not_upper(parts[-1])
253 |     return TableIdentity(schema, table_name)
254 | 
255 | 
256 | @identify_table.register_default
257 | def _identify_table_default(self: Dialect, table: TableName, from_part=None):
258 |     """By default only use schema and table name.
259 | 
260 |     Note that this function is meant to be used for database implementations that
261 |     can't use the same sqlalchemy engine to query across databases. (Or that call
262 |     a schema "database").
263 |     """
264 | 
265 |     if from_part is not None:
266 |         tup = table.to_tuple(exists=True)
267 |         ii = table.field_index_from_end(from_part)
268 |         return _identify_default_parts(self, tup[ii:])
269 | 
270 |     # Note that database is omitted
271 |     return _identify_default_parts(self, table.to_tuple(exists=True)[-2:])
272 | 
273 | 
274 | @identify_table.register("snowflake")
275 | def _identify_table_snowflake(self: Dialect, table: TableName, from_part=None):
276 |     if from_part is not None:
277 |         tup = table.to_tuple(exists=True)
278 |         ii = table.field_index_from_end(from_part)
279 |         return _identify_snowflake_parts(self, tup[ii:])
280 | 
281 |     return _identify_snowflake_parts(self, table.to_tuple(exists=True))
282 | 
283 | @identify_table.register("bigquery")
284 | def _identify_table_bigquery(self: Dialect, table: TableName, from_part=None):
285 |     # uses the default implementation (no need for explicit quoting), but
286 |     # includes database name
287 |     if from_part is None:
288 |         from_part = "database"
289 | 
290 |     return identify_table.default(self, table, from_part=from_part)
291 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # dbcooper-py
  2 | 
  3 | [![CI](https://github.com/machow/dbcooper-py/actions/workflows/ci.yml/badge.svg)](https://github.com/machow/dbcooper-py/actions/workflows/ci.yml)
  4 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/machow/dbcooper-py/HEAD)
  5 | 
  6 | The dbcooper package turns a database connection into a collection of functions,
  7 | handling logic for keeping track of connections and letting you take advantage of
  8 | autocompletion when exploring a database.
  9 | 
 10 | It's especially helpful to use when authoring database-specific Python packages,
 11 | for instance in an internal company package or one wrapping a public data source.
 12 | 
 13 | For the R version see [dgrtwo/dbcooper](https://github.com/dgrtwo/dbcooper).
 14 | 
 15 | ## Installation
 16 | 
 17 | ```
 18 | pip install dbcooper
 19 | ```
 20 | 
 21 | ## Example
 22 | 
 23 | ### Initializing the functions
 24 | 
 25 | The dbcooper package asks you to create the connection first.
 26 | As an example, we'll use the Lahman baseball database package (`lahman`).
 27 | 
 28 | 
 29 | ```python
 30 | from sqlalchemy import create_engine
 31 | from dbcooper.data import lahman_sqlite
 32 | 
 33 | # connect to sqlite
 34 | engine = create_engine("sqlite://")
 35 | 
 36 | # load the lahman data into the "lahman" schema
 37 | lahman_sqlite(engine)
 38 | ```
 39 | 
 40 | Next we'll set up dbcooper
 41 | 
 42 | 
 43 | ```python
 44 | from dbcooper import DbCooper
 45 | 
 46 | dbc = DbCooper(engine)
 47 | ```
 48 | 
 49 | The `DbCooper` object contains two important things:
 50 | 
 51 | * Accessors to fetch specific tables.
 52 | * Functions for interacting with the underlying database.
 53 | 
 54 | ### Using table accessors
 55 | 
 56 | In the example below, we'll use the `"Lahman"."Salaries"` table as an example.
 57 | By default, dbcooper makes this accessible as `.lahman_salaries`.
 58 | 
 59 | **Plain** `.lahman_salaries` prints out table and column info, including types and descriptions.
 60 | 
 61 | 
 62 | ```python
 63 | # show table and column descriptions
 64 | dbc.lahman_salaries
 65 | ```
 66 | 
 67 | 
 68 | 
 69 | 
 70 | <h3> salaries </h3>
 71 | <p> (No table description.) </p>
 72 | <table>
 73 | <thead>
 74 | <tr><th>name    </th><th>type  </th><th>description  </th></tr>
 75 | </thead>
 76 | <tbody>
 77 | <tr><td>index   </td><td>BIGINT</td><td>             </td></tr>
 78 | <tr><td>yearID  </td><td>BIGINT</td><td>             </td></tr>
 79 | <tr><td>teamID  </td><td>TEXT  </td><td>             </td></tr>
 80 | <tr><td>lgID    </td><td>TEXT  </td><td>             </td></tr>
 81 | <tr><td>playerID</td><td>TEXT  </td><td>             </td></tr>
 82 | <tr><td>salary  </td><td>BIGINT</td><td>             </td></tr>
 83 | </tbody>
 84 | </table>
 85 | 
 86 | 
 87 | 
 88 | Note that sqlite doesn't support table and columnn descriptions, so these sections
 89 | are empty.
 90 | 
 91 | **Calling** `.lahman_salaries()` fetches a lazy version of the data.
 92 | 
 93 | 
 94 | 
 95 | ```python
 96 | dbc.lahman_salaries()
 97 | ```
 98 | 
 99 | 
100 | 
101 | 
102 |     # Source: lazy query
103 |     # DB Conn: Engine(sqlite://)
104 |     # Preview:
105 |        index  yearID teamID lgID   playerID  salary
106 |     0      0    1985    ATL   NL  barkele01  870000
107 |     1      1    1985    ATL   NL  bedrost01  550000
108 |     2      2    1985    ATL   NL  benedbr01  545000
109 |     3      3    1985    ATL   NL   campri01  633333
110 |     4      4    1985    ATL   NL  ceronri01  625000
111 |     # .. may have more rows
112 | 
113 | 
114 | 
115 | Note that this data is a siuba `LazyTbl` object, which you can use to analyze the data.
116 | 
117 | 
118 | ```python
119 | from siuba import _, count
120 | 
121 | dbc.lahman_salaries() >> count(over_100k = _.salary > 100_000)
122 | ```
123 | 
124 | 
125 | 
126 | 
127 |     # Source: lazy query
128 |     # DB Conn: Engine(sqlite://)
129 |     # Preview:
130 |        over_100k      n
131 |     0       True  25374
132 |     1      False   1054
133 |     # .. may have more rows
134 | 
135 | 
136 | 
137 | ### Using database functions
138 | 
139 | * `.list()`: Get a list of tables
140 | * `.tbl()`: Access a table that can be worked with using `siuba`.
141 | * `.query()`: Perform a SQL query and work with the result.
142 | * `._engine`: Get the underlying sqlalchemy engine.
143 | 
144 | For instance, we could start by finding the names of the tables in the Lahman database.
145 | 
146 | 
147 | ```python
148 | dbc.list()
149 | ```
150 | 
151 | 
152 | 
153 | 
154 |     ['lahman.allstar_full',
155 |      'lahman.appearances',
156 |      'lahman.awards_managers',
157 |      'lahman.awards_players',
158 |      'lahman.awards_share_managers',
159 |      'lahman.awards_share_players',
160 |      'lahman.batting',
161 |      'lahman.batting_post',
162 |      'lahman.college_playing',
163 |      'lahman.fielding',
164 |      'lahman.fielding_of',
165 |      'lahman.fielding_ofsplit',
166 |      'lahman.fielding_post',
167 |      'lahman.hall_of_fame',
168 |      'lahman.home_games',
169 |      'lahman.managers',
170 |      'lahman.managers_half',
171 |      'lahman.parks',
172 |      'lahman.people',
173 |      'lahman.pitching',
174 |      'lahman.pitching_post',
175 |      'lahman.salaries',
176 |      'lahman.schools',
177 |      'lahman.series_post',
178 |      'lahman.teams',
179 |      'lahman.teams_franchises',
180 |      'lahman.teams_half']
181 | 
182 | 
183 | 
184 | We can access one of these tables with `dbc.tbl()`, then put it through any kind
185 | of siuba operation.
186 | 
187 | 
188 | ```python
189 | dbc.tbl("Salaries")
190 | ```
191 | 
192 | 
193 | 
194 | 
195 |     # Source: lazy query
196 |     # DB Conn: Engine(sqlite://)
197 |     # Preview:
198 |        index  yearID teamID lgID   playerID  salary
199 |     0      0    1985    ATL   NL  barkele01  870000
200 |     1      1    1985    ATL   NL  bedrost01  550000
201 |     2      2    1985    ATL   NL  benedbr01  545000
202 |     3      3    1985    ATL   NL   campri01  633333
203 |     4      4    1985    ATL   NL  ceronri01  625000
204 |     # .. may have more rows
205 | 
206 | 
207 | 
208 | 
209 | ```python
210 | from siuba import _, count
211 | dbc.tbl("Salaries") >> count(_.yearID, sort=True)
212 | ```
213 | 
214 | 
215 | 
216 | 
217 |     # Source: lazy query
218 |     # DB Conn: Engine(sqlite://)
219 |     # Preview:
220 |        yearID     n
221 |     0    1999  1006
222 |     1    1998   998
223 |     2    1995   986
224 |     3    1996   931
225 |     4    1997   925
226 |     # .. may have more rows
227 | 
228 | 
229 | 
230 | If you'd rather start from a SQL query, use the `.query()` method.
231 | 
232 | 
233 | ```python
234 | dbc.query("""
235 |     SELECT
236 |         playerID,
237 |         sum(AB) as AB
238 |     FROM Batting
239 |     GROUP BY playerID
240 | """)
241 | ```
242 | 
243 | 
244 | 
245 | 
246 |     # Source: lazy query
247 |     # DB Conn: Engine(sqlite://)
248 |     # Preview:
249 |         playerID     AB
250 |     0  aardsda01      4
251 |     1  aaronha01  12364
252 |     2  aaronto01    944
253 |     3   aasedo01      5
254 |     4   abadan01     21
255 |     # .. may have more rows
256 | 
257 | 
258 | 
259 | For anything else you might want to do, the sqlalchemy Engine object is available.
260 | For example, the code below shows how you can set its `.echo` attribute, which
261 | tells sqlalchemy to provide useful logs.
262 | 
263 | 
264 | ```python
265 | dbc._engine.echo = True
266 | table_names = dbc.list()
267 | ```
268 | 
269 |     2022-03-20 22:49:37,553 INFO sqlalchemy.engine.Engine PRAGMA database_list
270 |     2022-03-20 22:49:37,554 INFO sqlalchemy.engine.Engine [raw sql] ()
271 |     2022-03-20 22:49:37,555 INFO sqlalchemy.engine.Engine SELECT name FROM "main".sqlite_master WHERE type='table' ORDER BY name
272 |     2022-03-20 22:49:37,555 INFO sqlalchemy.engine.Engine [raw sql] ()
273 |     2022-03-20 22:49:37,556 INFO sqlalchemy.engine.Engine SELECT name FROM "lahman".sqlite_master WHERE type='table' ORDER BY name
274 |     2022-03-20 22:49:37,557 INFO sqlalchemy.engine.Engine [raw sql] ()
275 | 
276 | 
277 | Note that the log messages above show that the `.list()` method executed two queries:
278 | One to list tables in the "main" schema (which is empty), and one to list tables
279 | in the "lahman" schema.
280 | 
281 | ## Advanced Configuration
282 | 
283 | > ⚠️: These behaviors are well tested, but dbcooper's internals and API may change.
284 | 
285 | dbcooper can be configured in three ways, each corresponding to a class interface:
286 | 
287 | * **TableFinder**: Which tables will be used by `dbcooper`.
288 | * **AccessorBuilder**: How table names are turned into accessors.
289 | * **DbcDocumentedTable**: The class that defines what an accessor will return.
290 | 
291 | 
292 | ```python
293 | from sqlalchemy import create_engine
294 | from dbcooper.data import lahman_sqlite
295 | from dbcooper import DbCooper, AccessorBuilder
296 | 
297 | engine = create_engine("sqlite://")
298 | lahman_sqlite(engine)
299 | ```
300 | 
301 | ### Excluding a schema
302 | 
303 | 
304 | ```python
305 | from dbcooper import TableFinder
306 | 
307 | finder = TableFinder(exclude_schemas=["lahman"])
308 | dbc_no_lahman = DbCooper(engine, table_finder=finder)
309 | dbc_no_lahman.list()
310 | ```
311 | 
312 | 
313 | 
314 | 
315 |     []
316 | 
317 | 
318 | 
319 | ### Formatting table names
320 | 
321 | 
322 | ```python
323 | from dbcooper import AccessorBuilder
324 | 
325 | # omits schema, and keeps only table name
326 | # e.g. `salaries`, rather than `lahman_salaries`
327 | builder = AccessorBuilder(format_from_part="table")
328 | 
329 | tbl_flat = DbCooper(engine, accessor_builder=builder)
330 | tbl_flat.salaries()
331 | ```
332 | 
333 | 
334 | 
335 | 
336 |     # Source: lazy query
337 |     # DB Conn: Engine(sqlite://)
338 |     # Preview:
339 |        index  yearID teamID lgID   playerID  salary
340 |     0      0    1985    ATL   NL  barkele01  870000
341 |     1      1    1985    ATL   NL  bedrost01  550000
342 |     2      2    1985    ATL   NL  benedbr01  545000
343 |     3      3    1985    ATL   NL   campri01  633333
344 |     4      4    1985    ATL   NL  ceronri01  625000
345 |     # .. may have more rows
346 | 
347 | 
348 | 
349 | ### Grouping tables by schema
350 | 
351 | 
352 | ```python
353 | from dbcooper import AccessorHierarchyBuilder
354 | 
355 | tbl_nested = DbCooper(engine, accessor_builder=AccessorHierarchyBuilder())
356 | 
357 | # note the form: <schema>.<table>
358 | tbl_nested.lahman.salaries()
359 | ```
360 | 
361 | 
362 | 
363 | 
364 |     # Source: lazy query
365 |     # DB Conn: Engine(sqlite://)
366 |     # Preview:
367 |        index  yearID teamID lgID   playerID  salary
368 |     0      0    1985    ATL   NL  barkele01  870000
369 |     1      1    1985    ATL   NL  bedrost01  550000
370 |     2      2    1985    ATL   NL  benedbr01  545000
371 |     3      3    1985    ATL   NL   campri01  633333
372 |     4      4    1985    ATL   NL  ceronri01  625000
373 |     # .. may have more rows
374 | 
375 | 
376 | 
377 | ### Don't show table documentation
378 | 
379 | 
380 | ```python
381 | from dbcooper import DbcSimpleTable
382 | 
383 | dbc_no_doc = DbCooper(engine, table_factory=DbcSimpleTable)
384 | dbc_no_doc.lahman_salaries
385 | ```
386 | 
387 | 
388 | 
389 | 
390 |     DbcSimpleTable(..., 'salaries', 'lahman')
391 | 
392 | 
393 | 
394 | Note that sqlalchemy dialects like `snowflake-sqlalchemy` cannot look up things
395 | like table and column descriptions as well as other dialects, so `DbcSimpleTable`
396 | may be needed to connect to snowflake (see [this issue](https://github.com/snowflakedb/snowflake-sqlalchemy/issues/276)).
397 | 
398 | ## Developing
399 | 
400 | ```shell
401 | # install with development dependencies
402 | pip install -e .[dev]
403 | 
404 | # or install from requirements file
405 | pip install -r requirements/dev.txt
406 | ```
407 | 
408 | ### Test
409 | 
410 | ```shell
411 | # run all tests, see pytest section of pyproject.toml
412 | pytest
413 | 
414 | # run specific backends
415 | pytest -m 'not snowflake and not bigquery'
416 | 
417 | # stop on first failure, drop into debugger
418 | pytest -x --pdb
419 | ```
420 | 
421 | ### Release
422 | 
423 | ```shell
424 | # set version number
425 | git tag v0.0.1
426 | 
427 | # (optional) push to github
428 | git push origin --tags
429 | 
430 | # check version
431 | python -m setuptools_scm
432 | ```
433 | 


--------------------------------------------------------------------------------