├── databackend
    ├── py.typed
    ├── tests
    │   ├── an_unimported_module.py
    │   ├── a_data_class.py
    │   └── test_databackend.py
    └── __init__.py
├── examples
    └── broom
    │   ├── requirements.txt
    │   ├── readme.qmd
    │   ├── tidy.py
    │   └── readme.md
├── .github
    └── workflows
    │   ├── code-checks.yml
    │   └── ci.yml
├── .pre-commit-config.yaml
├── LICENSE
├── pyproject.toml
├── .gitignore
├── README.md
└── README.qmd


/databackend/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/databackend/tests/an_unimported_module.py:
--------------------------------------------------------------------------------
1 | class UnimportedClass:
2 |     pass
3 | 


--------------------------------------------------------------------------------
/examples/broom/requirements.txt:
--------------------------------------------------------------------------------
1 | pandas
2 | pymc3
3 | scikit-learn
4 | statsmodels
5 | 


--------------------------------------------------------------------------------
/databackend/tests/a_data_class.py:
--------------------------------------------------------------------------------
1 | class ADataClass:
2 |     pass
3 | 
4 | 
5 | class ADataClass2:
6 |     pass
7 | 


--------------------------------------------------------------------------------
/.github/workflows/code-checks.yml:
--------------------------------------------------------------------------------
 1 | name: Code Checks
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: ['main']
 7 |   pull_request:
 8 |   release:
 9 |     types: [published]
10 | 
11 | jobs:
12 |   pre-commit:
13 |     name: "Run pre-commit"
14 |     runs-on: ubuntu-latest
15 |     steps:
16 |       - uses: actions/checkout@v2
17 |       - uses: actions/setup-python@v2
18 |       - uses: pre-commit/action@v2.0.3
19 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: "(.*\\.csv)|(.*\\.q?md)"
 2 | repos:
 3 |   - repo: https://github.com/pycqa/flake8
 4 |     rev: 6.0.0
 5 |     hooks:
 6 |       - id: flake8
 7 |         types:
 8 |           - python
 9 |         additional_dependencies:
10 |           - flake8-pyproject
11 |   - repo: https://github.com/pre-commit/pre-commit-hooks
12 |     rev: v4.5.0
13 |     hooks:
14 |       - id: trailing-whitespace
15 |       - id: end-of-file-fixer
16 |       - id: check-yaml
17 |         args: ["--unsafe"]
18 |       - id: check-added-large-files
19 |   - repo: https://github.com/psf/black
20 |     rev: 24.2.0
21 |     hooks:
22 |       - id: black
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 databackend contributors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=45", "wheel", "setuptools_scm>=6.2"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [tool.setuptools_scm]
 6 | 
 7 | [project]
 8 | name = "databackend"
 9 | description = "Abstract data dispatch"
10 | readme = "README.md"
11 | license.file = "LICENSE"
12 | authors = [
13 |     { name = "Michael Chow", email = "mc_al_github@fastmail.com" }
14 | ]
15 | dynamic = ["version"]
16 | keywords = ["data"]
17 | classifiers = [
18 |     "Programming Language :: Python :: 3.7",
19 |     "Programming Language :: Python :: 3.8",
20 |     "Programming Language :: Python :: 3.9",
21 |     "Programming Language :: Python :: 3.10",
22 |     "Programming Language :: Python :: 3.11",
23 |     "Programming Language :: Python :: 3.12"
24 | ]
25 | requires-python = ">=3.7"
26 | 
27 | [project.urls]
28 | homepage = "https://github.com/machow/databackend"
29 | 
30 | [project.optional-dependencies]
31 | dev = [
32 |     "pip-tools",
33 |     "pytest"
34 | ]
35 | 
36 | [tool.pytest.ini_options]
37 | testpaths = ["databackend"]
38 | addopts = "--doctest-modules"
39 | doctest_optionflags = "NORMALIZE_WHITESPACE"
40 | 
41 | [tool.flake8]
42 | max-line-length = 88
43 | 
44 | ignore = [
45 |     "E702",    # multiple statements on one line (semicolon)
46 |     "E701",    # multiple statements on one line (colon)
47 |     "E704",    # multiple statements on one line (def)
48 | ]
49 | 


--------------------------------------------------------------------------------
/examples/broom/readme.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | jupyter:
 3 |   jupytext:
 4 |     text_representation:
 5 |       extension: .qmd
 6 |       format_name: quarto
 7 |       format_version: '1.0'
 8 |       jupytext_version: 1.14.1
 9 |   kernelspec:
10 |     display_name: Python 3 (ipykernel)
11 |     language: python
12 |     name: python3
13 | ---
14 | 
15 | ```{python}
16 | import numpy as np
17 | import pandas as pd
18 | 
19 | from siuba.data import mtcars
20 | 
21 | # imported from tidy.py in this folder
22 | from tidy import tidy
23 | ```
24 | 
25 | ## fit statsmodels ----
26 | 
27 | ```{python}
28 | import statsmodels.api as sm
29 | import statsmodels.formula.api as smf
30 | 
31 | results = smf.ols('mpg ~ hp', data=mtcars).fit()
32 | 
33 | tidy_sm = tidy(results)
34 | 
35 | tidy_sm
36 | ```
37 | 
38 | ## fit scikit ----
39 | 
40 | ```{python}
41 | from sklearn.linear_model import LinearRegression
42 | 
43 | X = mtcars[['hp']]
44 | y = mtcars['mpg']
45 | 
46 | # y = 1 * x_0 + 2 * x_1 + 3
47 | reg = LinearRegression().fit(X, y)
48 | 
49 | tidy_sk = tidy(reg)
50 | 
51 | tidy_sk
52 | ```
53 | 
54 | ## fit pymc3 ----
55 | 
56 | ```{python}
57 | from pymc3 import Model, HalfCauchy, Normal, sample
58 | 
59 | x = mtcars['hp'].values
60 | y = mtcars['mpg'].values
61 | 
62 | data = dict(x=x, y=y)
63 | 
64 | np.random.seed(999999)
65 | with Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
66 |     # Define priors
67 |     sigma = HalfCauchy('sigma', beta=10, testval=1.)
68 |     intercept = Normal('intercept', 0, sigma=20)
69 |     x_coeff = Normal('hp', 0, sigma=20)
70 | 
71 |     # Define likelihood
72 |     likelihood = Normal('mpg', mu=intercept + x_coeff * x,
73 |                         sigma=sigma, observed= y)
74 | 
75 |     # Inference!
76 |     trace = sample(500, cores=2, progressbar = False) # draw 3000 posterior samples using NUTS sampling
77 |     
78 | tidy_pymc3 = tidy(trace)
79 | 
80 | tidy_pymc3
81 | ```
82 | 


--------------------------------------------------------------------------------
/databackend/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2024 databackend contributors (MIT License)
 2 | #
 3 | # See https://github.com/machow/databackend
 4 | 
 5 | from __future__ import annotations
 6 | 
 7 | import sys
 8 | import importlib
 9 | 
10 | from typing import Type, List, Tuple
11 | 
12 | from abc import ABCMeta
13 | 
14 | 
15 | def _load_class(mod_name: str, cls_name: str) -> Type[object]:
16 |     mod = importlib.import_module(mod_name)
17 |     return getattr(mod, cls_name)
18 | 
19 | 
20 | class _AbstractBackendMeta(ABCMeta):
21 |     def register_backend(cls, mod_name: str, cls_name: str):
22 |         """Register a backend class to use in issubclass checks.
23 | 
24 |         This method is similar to the ABCMeta.register method, except that
25 |         it accepts strings, so that an import of the class is not required.
26 | 
27 |         Note that the arguments to this class match the two pieces in import statements.
28 |         E.g. `from a.b.c import d` would become `mod_name="a.b.c"` and `cls_name="d"`.
29 | 
30 |         Parameters
31 |         ----------
32 |         mod_name: str
33 |             A module path the class is imported from.
34 |         cls_name: str
35 |             The name of the class in the imported module.
36 |         """
37 |         cls._backends.append((mod_name, cls_name))
38 |         cls._abc_caches_clear()
39 | 
40 | 
41 | class AbstractBackend(metaclass=_AbstractBackendMeta):
42 |     """Represent a class, without needing to import it."""
43 | 
44 |     _backends: List[Tuple[str, str]]
45 | 
46 |     @classmethod
47 |     def __init_subclass__(cls):
48 |         if not hasattr(cls, "_backends"):
49 |             cls._backends = []
50 | 
51 |     @classmethod
52 |     def __subclasshook__(cls, subclass: Type[object]):
53 |         for mod_name, cls_name in cls._backends:
54 |             if mod_name not in sys.modules:
55 |                 # module isn't loaded, so it can't be the subclass
56 |                 # we don't want to import the module to explicitly run the check
57 |                 # so skip here.
58 |                 continue
59 |             else:
60 |                 parent_candidate = _load_class(mod_name, cls_name)
61 |                 if issubclass(subclass, parent_candidate):
62 |                     return True
63 | 
64 |         return NotImplemented
65 | 


--------------------------------------------------------------------------------
/examples/broom/tidy.py:
--------------------------------------------------------------------------------
 1 | from databackend import AbstractBackend
 2 | from functools import singledispatch
 3 | 
 4 | # Abstract backend classes ====================================================
 5 | 
 6 | 
 7 | class BaseSklearnModel(AbstractBackend):
 8 |     _backends = [("sklearn.linear_model", "LinearRegression")]
 9 | 
10 | 
11 | class BaseSmRegressionResult(AbstractBackend):
12 |     _backends = [("statsmodels.regression.linear_model", "RegressionResultsWrapper")]
13 | 
14 | 
15 | class BasePymcMultiTrace(AbstractBackend):
16 |     _backends = [("pymc3.backends.base", "MultiTrace")]
17 | 
18 | 
19 | # Implement generic function: tidy ============================================
20 | 
21 | 
22 | @singledispatch
23 | def tidy(fit, *args, **kwargs):
24 |     raise NotImplementedError(f"No tidy method for class {fit.__class__}")
25 | 
26 | 
27 | # sklearn ----
28 | 
29 | 
30 | @tidy.register
31 | def _tidy_sklearn(fit: BaseSklearnModel, col_names=None):
32 |     from pandas import DataFrame, NA
33 | 
34 |     estimates = [fit.intercept_, *fit.coef_]
35 | 
36 |     if col_names is None:
37 |         terms = list(range(len(estimates)))
38 |     else:
39 |         terms = ["intercept", *col_names]
40 | 
41 |     # pd.DataFrame()
42 |     return DataFrame({"term": terms, "estimate": estimates, "std_error": NA})
43 | 
44 | 
45 | # statsmodels ----
46 | 
47 | 
48 | @tidy.register
49 | def _tidy_statsmodels(fit: BaseSmRegressionResult):
50 |     from statsmodels.iolib.summary import summary_params_frame
51 | 
52 |     tidied = summary_params_frame(fit).reset_index()
53 |     rename_cols = {
54 |         "index": "term",
55 |         "coef": "estimate",
56 |         "std err": "std_err",
57 |         "t": "statistic",
58 |         "P>|t|": "p_value",
59 |         "Conf. Int. Low": "conf_int_low",
60 |         "Conf. Int. Upp.": "conf_int_high",
61 |     }
62 | 
63 |     return tidied.rename(columns=rename_cols)
64 | 
65 | 
66 | # pymc3 ----
67 | 
68 | 
69 | @tidy.register
70 | def _tidy_trace(fit: BasePymcMultiTrace, robust=False):
71 |     from pymc3 import trace_to_dataframe
72 | 
73 |     trace_df = trace_to_dataframe(fit)
74 | 
75 |     agg_funcs = ["median", "mad"] if robust else ["mean", "std"]
76 | 
77 |     # data frame with columns like: median, mad.
78 |     tidied = trace_df.agg(agg_funcs).T.reset_index()
79 |     tidied.columns = ["term", "estimate", "std_err"]
80 | 
81 |     return tidied
82 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |   pull_request:
 7 |   release:
 8 |     types: [published]
 9 | 
10 | jobs:
11 |   run-if:
12 |     name: "Run If"
13 |     runs-on: ubuntu-latest
14 |     if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false
15 |     steps:
16 |       - run: |
17 |           echo "Running CI"
18 |   test-python:
19 |     name: "Test Python Version"
20 |     needs: ["run-if"]
21 |     runs-on: ubuntu-latest
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         # Checks based on python versions ---
26 |         python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
27 | 
28 |     steps:
29 |       - uses: actions/checkout@v2
30 |       - uses: actions/setup-python@v2
31 |         with:
32 |           python-version: "${{ matrix.python-version }}"
33 |       - name: Install dependencies from requirements file
34 |         run: |
35 |           python -m pip install --upgrade pip
36 |           python -m pip install .[dev]
37 |         env:
38 |           REQUIREMENTS: ${{ matrix.requirements }}
39 |       - name: Run tests
40 |         run: |
41 |           pytest
42 | 
43 |   release-pypi:
44 |     name: "Release to pypi"
45 |     runs-on: ubuntu-latest
46 |     if: github.event_name == 'release'
47 |     needs: [test-python]
48 |     steps:
49 |       - uses: actions/checkout@v2
50 |       - uses: actions/setup-python@v2
51 |         with:
52 |           python-version: "3.10"
53 |       - name: "Build Package"
54 |         run: |
55 |           python -m pip install build wheel
56 |           python -m build --sdist --wheel
57 |       - name: "Deploy to PyPI"
58 |         uses: pypa/gh-action-pypi-publish@release/v1
59 |         with:
60 |           user: __token__
61 |           password: ${{ secrets.PYPI_API_TOKEN }}
62 | #  build-docs:
63 | #    name: "Build Docs"
64 | #    needs: ["run-if"]
65 | #    runs-on: ubuntu-latest
66 | #    steps:
67 | #      - uses: actions/checkout@v2
68 | #      - uses: actions/setup-python@v2
69 | #        with:
70 | #          python-version: "3.10"
71 | #      - name: Install dependencies from requirements file
72 | #        run: |
73 | #          python -m pip install --upgrade pip
74 | #          python -m pip install -r requirements/dev.txt
75 | #      - name: Build docs
76 | #        run: |
77 | #          make docs-build
78 | 


--------------------------------------------------------------------------------
/databackend/tests/test_databackend.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sys
 3 | import importlib
 4 | 
 5 | from databackend import AbstractBackend
 6 | from databackend.tests.a_data_class import ADataClass, ADataClass2
 7 | 
 8 | CLASS_MOD = "databackend.tests.a_data_class"
 9 | CLASS_NAME = "ADataClass"
10 | 
11 | 
12 | @pytest.fixture
13 | def Base():
14 |     class Base(AbstractBackend):
15 |         pass
16 | 
17 |     Base.register_backend(CLASS_MOD, CLASS_NAME)
18 | 
19 |     return Base
20 | 
21 | 
22 | def test_check_unimported_mod(Base):
23 |     class ABase(AbstractBackend):
24 |         pass
25 | 
26 |     mod_name = "databackend.tests.an_unimported_module"
27 |     ABase.register_backend(mod_name, "UnimportedClass")
28 | 
29 |     # check pre-import and verify it's still not imported ----
30 |     assert not issubclass(int, ABase)
31 |     assert mod_name not in sys.modules
32 | 
33 |     # do import and verify ABC is seen as parent class ----
34 |     mod = importlib.import_module(mod_name)
35 | 
36 |     assert issubclass(mod.UnimportedClass, ABase)
37 | 
38 | 
39 | def test_issubclass(Base):
40 |     assert issubclass(ADataClass, Base)
41 | 
42 | 
43 | def test_isinstance(Base):
44 |     assert isinstance(ADataClass(), Base)
45 | 
46 | 
47 | def test_check_is_cached():
48 |     checks = [0]
49 | 
50 |     class ABase(AbstractBackend):
51 |         @classmethod
52 |         def __subclasshook__(cls, subclass):
53 |             # increment the number in checks, as a dumb way
54 |             # of seeing how often this runs
55 |             # could also use abc.ABCMeta._dump_registry
56 |             checks[0] = checks[0] + 1
57 |             return super().__subclasshook__(subclass)
58 | 
59 |     # this check runs subclasshook ----
60 |     issubclass(ADataClass, ABase)
61 |     assert checks[0] == 1
62 | 
63 |     # now that ADataClass is in the abc.ABCMeta cache, it
64 |     # does *not* run subclasshook
65 |     issubclass(ADataClass, ABase)
66 |     assert checks[0] == 1
67 | 
68 | 
69 | def test_backends_spec_at_class_declaration():
70 |     class ABase(AbstractBackend):
71 |         _backends = [(CLASS_MOD, CLASS_NAME)]
72 | 
73 |     assert issubclass(ADataClass, ABase)
74 | 
75 | 
76 | def test_backends_do_not_overlap():
77 |     class ABase1(AbstractBackend): ...
78 | 
79 |     class ABase2(AbstractBackend): ...
80 | 
81 |     ABase1.register_backend(CLASS_MOD, "ADataClass")
82 |     ABase2.register_backend(CLASS_MOD, "ADataClass2")
83 | 
84 |     obj1 = ADataClass()
85 |     obj2 = ADataClass2()
86 | 
87 |     assert isinstance(obj1, ABase1)
88 |     assert not isinstance(obj1, ABase2)
89 | 
90 |     assert not isinstance(obj2, ABase1)
91 |     assert isinstance(obj2, ABase2)
92 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Vim
  2 | .*.sw[po]
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | cover/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | local_settings.py
 64 | db.sqlite3
 65 | db.sqlite3-journal
 66 | 
 67 | # Flask stuff:
 68 | instance/
 69 | .webassets-cache
 70 | 
 71 | # Scrapy stuff:
 72 | .scrapy
 73 | 
 74 | # Sphinx documentation
 75 | docs/_build/
 76 | 
 77 | # PyBuilder
 78 | .pybuilder/
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | #   For a library or package, you might want to ignore these files since the code is
 90 | #   intended to run in multiple environments; otherwise, check them in:
 91 | # .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # poetry
101 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
103 | #   commonly ignored for libraries.
104 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105 | #poetry.lock
106 | 
107 | # pdm
108 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109 | #pdm.lock
110 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111 | #   in version control.
112 | #   https://pdm.fming.dev/#use-with-ide
113 | .pdm.toml
114 | 
115 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116 | __pypackages__/
117 | 
118 | # Celery stuff
119 | celerybeat-schedule
120 | celerybeat.pid
121 | 
122 | # SageMath parsed files
123 | *.sage.py
124 | 
125 | # Environments
126 | .env
127 | .venv
128 | env/
129 | venv/
130 | ENV/
131 | env.bak/
132 | venv.bak/
133 | 
134 | # Spyder project settings
135 | .spyderproject
136 | .spyproject
137 | 
138 | # Rope project settings
139 | .ropeproject
140 | 
141 | # mkdocs documentation
142 | /site
143 | 
144 | # mypy
145 | .mypy_cache/
146 | .dmypy.json
147 | dmypy.json
148 | 
149 | # Pyre type checker
150 | .pyre/
151 | 
152 | # pytype static type analyzer
153 | .pytype/
154 | 
155 | # Cython debug symbols
156 | cython_debug/
157 | 
158 | # PyCharm
159 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
162 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
163 | #.idea/
164 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # databackend
  3 | 
  4 | The `databackend` package allows you to register a subclass, without
  5 | needing to import the subclass itself. This is useful for implementing
  6 | actions over optional dependencies.
  7 | 
  8 | ## Example
  9 | 
 10 | For this example, we’ll implement a function, `fill_na()`, that fills in
 11 | missing values in a DataFrame. It works with DataFrame objects from two
 12 | popular libraries: `pandas` and `polars`. Importantly, neither library
 13 | needs to be installed.
 14 | 
 15 | ### Setup
 16 | 
 17 | The code below defines “abstract” parent classes for each of the
 18 | DataFrame classes in the two libraries.
 19 | 
 20 | ``` python
 21 | from databackend import AbstractBackend
 22 | 
 23 | class AbstractPandasFrame(AbstractBackend):
 24 |     _backends = [("pandas", "DataFrame")]
 25 | 
 26 | 
 27 | class AbstractPolarsFrame(AbstractBackend):
 28 |     _backends = [("polars", "DataFrame")]
 29 | ```
 30 | 
 31 | Note that the abstract classes can be used as stand-ins for the real
 32 | thing in `issubclass()` and `isinstance`.
 33 | 
 34 | ``` python
 35 | from pandas import DataFrame
 36 | 
 37 | issubclass(DataFrame, AbstractPandasFrame)
 38 | isinstance(DataFrame(), AbstractPandasFrame)
 39 | ```
 40 | 
 41 |     True
 42 | 
 43 | > 📝 Note that you can use
 44 | > `AbstractPandasFrame.register_backend("pandas", "DataFrame")`, as an
 45 | > alternative way to register backends.
 46 | 
 47 | ### Simple fill_na: isinstance to switch behavior
 48 | 
 49 | The `fill_na()` function below uses custom handling for pandas and
 50 | polars.
 51 | 
 52 | ``` python
 53 | def fill_na(data, x):
 54 |     if isinstance(data, AbstractPolarsFrame):
 55 |         return data.fill_nan(x)
 56 |     elif isinstance(data, AbstractPandasFrame):
 57 |         return data.fillna(x)
 58 |     else:
 59 |         raise NotImplementedError()
 60 | ```
 61 | 
 62 | Notice that neither `pandas` nor `polars` need to be imported when
 63 | defining `fill_na()`.
 64 | 
 65 | Here is an example of calling `fill_na()` on both kinds of DataFrames.
 66 | 
 67 | ``` python
 68 | # test polars ----
 69 | 
 70 | import polars as pl
 71 | 
 72 | df = pl.DataFrame({"x": [1, 2, None]})
 73 | fill_na(df, 3)
 74 | 
 75 | 
 76 | # test pandas ----
 77 | 
 78 | import pandas as pd
 79 | 
 80 | df = pd.DataFrame({"x": [1, 2, None]})
 81 | fill_na(df, 3)
 82 | ```
 83 | 
 84 |          x
 85 |     0  1.0
 86 |     1  2.0
 87 |     2  3.0
 88 | 
 89 | The key here is that a user could have only pandas, or only polars,
 90 | installed. Importantly, doing the isinstance checks do not import any
 91 | libraries!
 92 | 
 93 | ### Advanced fill_na: generic function dispatch
 94 | 
 95 | `databackend` shines when combined with [generic function
 96 | dispatch](https://mchow.com/posts/2020-02-24-single-dispatch-data-science/).
 97 | This is a programming approach where you declare a function
 98 | (e.g. `fill_na()`), and then register each backend specific
 99 | implementation on the function.
100 | 
101 | Python has a built-in function implementing this called
102 | [`functools.singledispatch`](https://docs.python.org/3/library/functools.html#functools.singledispatch).
103 | 
104 | Here is an example of the previous `fill_na()` function written using
105 | it.
106 | 
107 | ``` python
108 | from functools import singledispatch
109 | 
110 | @singledispatch
111 | def fill_na2(data, x):
112 |     raise NotImplementedError(f"No support for class: {type(data)}")
113 | 
114 | 
115 | # handle polars ----
116 | 
117 | @fill_na2.register
118 | def _(data: AbstractPolarsFrame, x):
119 |     return data.fill_nan(x)
120 | 
121 | 
122 | # handle pandas ----
123 | 
124 | @fill_na2.register
125 | def _(data: AbstractPandasFrame, x):
126 |     return data.fillna(x)
127 | ```
128 | 
129 | Note two important decorators:
130 | 
131 | -   `@singledispatch` defines a default function. This gets called if no
132 |     specific implementations are found.
133 | -   `@fill_na2.register` defines specific versions of the function.
134 | 
135 | Here’s an example of it in action.
136 | 
137 | ``` python
138 | # example ----
139 | 
140 | import pandas as pd
141 | import polars as pl
142 | 
143 | df = pl.DataFrame({"x": [1, 2, None]})
144 | fill_na2(df, 3)
145 | 
146 | df = pd.DataFrame({"x": [1, 2, None]})
147 | fill_na2(df, 3)
148 | ```
149 | 
150 |          x
151 |     0  1.0
152 |     1  2.0
153 |     2  3.0
154 | 
155 | ### How it works
156 | 
157 | Under the hood, `AbstractBackend` behaves similarly to python’s builtin
158 | [`abc.ABC` class](https://docs.python.org/3/library/abc.html#abc.ABC).
159 | 
160 | ``` python
161 | from abc import ABC
162 | 
163 | class MyABC(ABC):
164 |     pass
165 | 
166 | from io import StringIO
167 | 
168 | MyABC.register(StringIO)
169 | 
170 | 
171 | # StringIO is a "virtual subclass" of MyABC
172 | isinstance(StringIO("abc"), MyABC)
173 | ```
174 | 
175 |     True
176 | 
177 | The key difference is that you can specify the virtual subclass using
178 | the tuple `("<mod_name>", "<class_name>")`.
179 | 
180 | When `issubclass(SomeClass, AbstractBackend)` runs, then…
181 | 
182 | -   The standard ABC caching mechanism is checked, and potentially
183 |     returns the answer immediately.
184 | -   Otherwise, a subclass hook cycles through registered backends.
185 | -   The hook runs the subclass check for any backends that are imported
186 |     (e.g. are in `sys.modules`).
187 | 
188 | Technically, `AbstractBackend` inherits all the useful metaclass things
189 | from `abc.ABCMeta`, so these can be used also.
190 | 


--------------------------------------------------------------------------------
/README.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | jupyter: python3
  3 | format: gfm
  4 | ---
  5 | 
  6 | # databackend
  7 | 
  8 | ```{python}
  9 | #| include: false
 10 | # this keeps the pandas dataframe repr from spitting out scoped style tags
 11 | # which don't render on github
 12 | import pandas as pd
 13 | pd.set_option("display.notebook_repr_html", False)
 14 | 
 15 | ```
 16 | 
 17 | The `databackend` package allows you to register a subclass, without needing to import the subclass itself.
 18 | This is useful for implementing actions over optional dependencies.
 19 | 
 20 | 
 21 | ## Example
 22 | 
 23 | For this example, we'll implement a function, `fill_na()`, that fills in missing values
 24 | in a DataFrame.
 25 | It works with DataFrame objects from two popular libraries: `pandas` and `polars`.
 26 | Importantly, neither library needs to be installed.
 27 | 
 28 | ### Setup
 29 | 
 30 | The code below defines "abstract" parent classes for each of the DataFrame classes in the two libraries.
 31 | 
 32 | ```{python}
 33 | from databackend import AbstractBackend
 34 | 
 35 | class AbstractPandasFrame(AbstractBackend):
 36 |     _backends = [("pandas", "DataFrame")]
 37 | 
 38 | 
 39 | class AbstractPolarsFrame(AbstractBackend):
 40 |     _backends = [("polars", "DataFrame")]
 41 | 
 42 | 
 43 | ```
 44 | 
 45 | Note that the abstract classes can be used as stand-ins for the real thing
 46 | in `issubclass()` and `isinstance`.
 47 | 
 48 | ```{python}
 49 | from pandas import DataFrame
 50 | 
 51 | issubclass(DataFrame, AbstractPandasFrame)
 52 | isinstance(DataFrame(), AbstractPandasFrame)
 53 | ```
 54 | 
 55 | > 📝 Note that you can use `AbstractPandasFrame.register_backend("pandas", "DataFrame")`, as an alternative way to register backends.
 56 | 
 57 | ### Simple fill_na: isinstance to switch behavior
 58 | 
 59 | The `fill_na()` function below uses custom handling for pandas and polars.
 60 | 
 61 | ```{python}
 62 | def fill_na(data, x):
 63 |     if isinstance(data, AbstractPolarsFrame):
 64 |         return data.fill_nan(x)
 65 |     elif isinstance(data, AbstractPandasFrame):
 66 |         return data.fillna(x)
 67 |     else:
 68 |         raise NotImplementedError()
 69 | 
 70 | ```
 71 | 
 72 | Notice that neither `pandas` nor `polars` need to be imported when defining `fill_na()`.
 73 | 
 74 | Here is an example of calling `fill_na()` on both kinds of DataFrames.
 75 | 
 76 | ```{python}
 77 | # test polars ----
 78 | 
 79 | import polars as pl
 80 | 
 81 | df = pl.DataFrame({"x": [1, 2, None]})
 82 | fill_na(df, 3)
 83 | 
 84 | 
 85 | # test pandas ----
 86 | 
 87 | import pandas as pd
 88 | 
 89 | df = pd.DataFrame({"x": [1, 2, None]})
 90 | fill_na(df, 3)
 91 | ```
 92 | 
 93 | The key here is that a user could have only pandas, or only polars, installed.
 94 | Importantly, doing the isinstance checks do not import any libraries!
 95 | 
 96 | 
 97 | ### Advanced fill_na: generic function dispatch
 98 | 
 99 | `databackend` shines when combined with [generic function dispatch](https://mchow.com/posts/2020-02-24-single-dispatch-data-science/).
100 | This is a programming approach where you declare a function (e.g. `fill_na()`),
101 | and then register each backend specific implementation on the function.
102 | 
103 | Python has a built-in function implementing this called [`functools.singledispatch`](https://docs.python.org/3/library/functools.html#functools.singledispatch).
104 | 
105 | Here is an example of the previous `fill_na()` function written using it.
106 | 
107 | ```{python}
108 | from functools import singledispatch
109 | 
110 | @singledispatch
111 | def fill_na2(data, x):
112 |     raise NotImplementedError(f"No support for class: {type(data)}")
113 | 
114 | 
115 | # handle polars ----
116 | 
117 | @fill_na2.register
118 | def _(data: AbstractPolarsFrame, x):
119 |     return data.fill_nan(x)
120 | 
121 | 
122 | # handle pandas ----
123 | 
124 | @fill_na2.register
125 | def _(data: AbstractPandasFrame, x):
126 |     return data.fillna(x)
127 | 
128 | ```
129 | 
130 | Note two important decorators:
131 | 
132 | * `@singledispatch` defines a default function. This gets called if no specific implementations are found. 
133 | * `@fill_na2.register` defines specific versions of the function.
134 | 
135 | Here's an example of it in action.
136 | 
137 | ```{python}
138 | # example ----
139 | 
140 | import pandas as pd
141 | import polars as pl
142 | 
143 | df = pl.DataFrame({"x": [1, 2, None]})
144 | fill_na2(df, 3)
145 | 
146 | df = pd.DataFrame({"x": [1, 2, None]})
147 | fill_na2(df, 3)
148 | ```
149 | 
150 | ### How it works
151 | 
152 | Under the hood, `AbstractBackend` behaves similarly to python's builtin [`abc.ABC` class](https://docs.python.org/3/library/abc.html#abc.ABC).
153 | 
154 | ```{python}
155 | from abc import ABC
156 | 
157 | class MyABC(ABC):
158 |     pass
159 | 
160 | from io import StringIO
161 | 
162 | MyABC.register(StringIO)
163 | 
164 | 
165 | # StringIO is a "virtual subclass" of MyABC
166 | isinstance(StringIO("abc"), MyABC)
167 | ```
168 | 
169 | The key difference is that you can specify the virtual subclass using the tuple `("<mod_name>", "<class_name>")`.
170 | 
171 | When `issubclass(SomeClass, AbstractBackend)` runs, then...
172 | 
173 | * The standard ABC caching mechanism is checked, and potentially returns the answer immediately.
174 | * Otherwise, a subclass hook cycles through registered backends.
175 | * The hook runs the subclass check for any backends that are imported (e.g. are in `sys.modules`).
176 | 
177 | Technically, `AbstractBackend` inherits all the useful metaclass things from `abc.ABCMeta`, so these can be used also.
178 | 


--------------------------------------------------------------------------------
/examples/broom/readme.md:
--------------------------------------------------------------------------------
  1 | ```python
  2 | import numpy as np
  3 | import pandas as pd
  4 | 
  5 | from siuba.data import mtcars
  6 | 
  7 | # imported from tidy.py in this folder
  8 | from tidy import tidy
  9 | ```
 10 | 
 11 | ## fit statsmodels ----
 12 | 
 13 | 
 14 | ```python
 15 | import statsmodels.api as sm
 16 | import statsmodels.formula.api as smf
 17 | 
 18 | results = smf.ols('mpg ~ hp', data=mtcars).fit()
 19 | 
 20 | tidy_sm = tidy(results)
 21 | 
 22 | tidy_sm
 23 | ```
 24 | 
 25 | 
 26 | 
 27 | 
 28 | <div>
 29 | <style scoped>
 30 |     .dataframe tbody tr th:only-of-type {
 31 |         vertical-align: middle;
 32 |     }
 33 | 
 34 |     .dataframe tbody tr th {
 35 |         vertical-align: top;
 36 |     }
 37 | 
 38 |     .dataframe thead th {
 39 |         text-align: right;
 40 |     }
 41 | </style>
 42 | <table border="1" class="dataframe">
 43 |   <thead>
 44 |     <tr style="text-align: right;">
 45 |       <th></th>
 46 |       <th>term</th>
 47 |       <th>estimate</th>
 48 |       <th>std_err</th>
 49 |       <th>statistic</th>
 50 |       <th>p_value</th>
 51 |       <th>conf_int_low</th>
 52 |       <th>conf_int_high</th>
 53 |     </tr>
 54 |   </thead>
 55 |   <tbody>
 56 |     <tr>
 57 |       <th>0</th>
 58 |       <td>Intercept</td>
 59 |       <td>30.098861</td>
 60 |       <td>1.633921</td>
 61 |       <td>18.421246</td>
 62 |       <td>6.642736e-18</td>
 63 |       <td>26.761949</td>
 64 |       <td>33.435772</td>
 65 |     </tr>
 66 |     <tr>
 67 |       <th>1</th>
 68 |       <td>hp</td>
 69 |       <td>-0.068228</td>
 70 |       <td>0.010119</td>
 71 |       <td>-6.742389</td>
 72 |       <td>1.787835e-07</td>
 73 |       <td>-0.088895</td>
 74 |       <td>-0.047562</td>
 75 |     </tr>
 76 |   </tbody>
 77 | </table>
 78 | </div>
 79 | 
 80 | 
 81 | 
 82 | ## fit scikit ----
 83 | 
 84 | 
 85 | ```python
 86 | from sklearn.linear_model import LinearRegression
 87 | 
 88 | X = mtcars[['hp']]
 89 | y = mtcars['mpg']
 90 | 
 91 | # y = 1 * x_0 + 2 * x_1 + 3
 92 | reg = LinearRegression().fit(X, y)
 93 | 
 94 | tidy_sk = tidy(reg)
 95 | 
 96 | tidy_sk
 97 | ```
 98 | 
 99 | 
100 | 
101 | 
102 | <div>
103 | <style scoped>
104 |     .dataframe tbody tr th:only-of-type {
105 |         vertical-align: middle;
106 |     }
107 | 
108 |     .dataframe tbody tr th {
109 |         vertical-align: top;
110 |     }
111 | 
112 |     .dataframe thead th {
113 |         text-align: right;
114 |     }
115 | </style>
116 | <table border="1" class="dataframe">
117 |   <thead>
118 |     <tr style="text-align: right;">
119 |       <th></th>
120 |       <th>term</th>
121 |       <th>estimate</th>
122 |       <th>std_error</th>
123 |     </tr>
124 |   </thead>
125 |   <tbody>
126 |     <tr>
127 |       <th>0</th>
128 |       <td>0</td>
129 |       <td>30.098861</td>
130 |       <td>&lt;NA&gt;</td>
131 |     </tr>
132 |     <tr>
133 |       <th>1</th>
134 |       <td>1</td>
135 |       <td>-0.068228</td>
136 |       <td>&lt;NA&gt;</td>
137 |     </tr>
138 |   </tbody>
139 | </table>
140 | </div>
141 | 
142 | 
143 | 
144 | ## fit pymc3 ----
145 | 
146 | 
147 | ```python
148 | from pymc3 import Model, HalfCauchy, Normal, sample
149 | 
150 | x = mtcars['hp'].values
151 | y = mtcars['mpg'].values
152 | 
153 | data = dict(x=x, y=y)
154 | 
155 | np.random.seed(999999)
156 | with Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
157 |     # Define priors
158 |     sigma = HalfCauchy('sigma', beta=10, testval=1.)
159 |     intercept = Normal('intercept', 0, sigma=20)
160 |     x_coeff = Normal('hp', 0, sigma=20)
161 | 
162 |     # Define likelihood
163 |     likelihood = Normal('mpg', mu=intercept + x_coeff * x,
164 |                         sigma=sigma, observed= y)
165 | 
166 |     # Inference!
167 |     trace = sample(500, cores=2, progressbar = False) # draw 3000 posterior samples using NUTS sampling
168 |     
169 | tidy_pymc3 = tidy(trace)
170 | 
171 | tidy_pymc3
172 | ```
173 | 
174 |     /Users/machow/.virtualenvs/databackend/lib/python3.8/site-packages/deprecat/classic.py:215: FutureWarning: In v4.0, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
175 |       return wrapped_(*args_, **kwargs_)
176 |     Auto-assigning NUTS sampler...
177 |     Initializing NUTS using jitter+adapt_diag...
178 |     Multiprocess sampling (2 chains in 2 jobs)
179 |     NUTS: [hp, intercept, sigma]
180 |     /Users/machow/.virtualenvs/databackend/lib/python3.8/site-packages/scipy/stats/_continuous_distns.py:624: RuntimeWarning: overflow encountered in _beta_ppf
181 |       return _boost._beta_ppf(q, a, b)
182 |     /Users/machow/.virtualenvs/databackend/lib/python3.8/site-packages/scipy/stats/_continuous_distns.py:624: RuntimeWarning: overflow encountered in _beta_ppf
183 |       return _boost._beta_ppf(q, a, b)
184 |     Sampling 2 chains for 1_000 tune and 500 draw iterations (2_000 + 1_000 draws total) took 3 seconds.
185 |     The acceptance probability does not match the target. It is 0.8794624791437439, but should be close to 0.8. Try to increase the number of tuning steps.
186 | 
187 | 
188 | 
189 | 
190 | 
191 | <div>
192 | <style scoped>
193 |     .dataframe tbody tr th:only-of-type {
194 |         vertical-align: middle;
195 |     }
196 | 
197 |     .dataframe tbody tr th {
198 |         vertical-align: top;
199 |     }
200 | 
201 |     .dataframe thead th {
202 |         text-align: right;
203 |     }
204 | </style>
205 | <table border="1" class="dataframe">
206 |   <thead>
207 |     <tr style="text-align: right;">
208 |       <th></th>
209 |       <th>term</th>
210 |       <th>estimate</th>
211 |       <th>std_err</th>
212 |     </tr>
213 |   </thead>
214 |   <tbody>
215 |     <tr>
216 |       <th>0</th>
217 |       <td>intercept</td>
218 |       <td>30.051721</td>
219 |       <td>1.705625</td>
220 |     </tr>
221 |     <tr>
222 |       <th>1</th>
223 |       <td>hp</td>
224 |       <td>-0.068023</td>
225 |       <td>0.010389</td>
226 |     </tr>
227 |     <tr>
228 |       <th>2</th>
229 |       <td>sigma</td>
230 |       <td>4.030565</td>
231 |       <td>0.557375</td>
232 |     </tr>
233 |   </tbody>
234 | </table>
235 | </div>
236 | 
237 | 
238 | 


--------------------------------------------------------------------------------