├── tests
    ├── __init__.py
    ├── mssql
    │   ├── __init__.py
    │   ├── test_mssql_ddl.sql
    │   ├── README.md
    │   ├── test_mssql_config.py
    │   ├── test_mssql_sqsh.py_
    │   └── test_mssql.py
    ├── postgres
    │   ├── __init__.py
    │   ├── test_postgres_ddl.sql
    │   └── test_postgres.py
    ├── seed
    │   ├── names_lf.csv
    │   ├── names_crlf.csv
    │   ├── names_lf_lastrow.csv
    │   ├── names_lf_header.csv
    │   ├── names_crlf_lastrow.csv
    │   ├── names_lf_quoted.csv
    │   ├── names_crlf_header.csv
    │   ├── names_lf_lastrow_header.csv
    │   ├── names_crlf_quoted.csv
    │   ├── names_lf_quoted_lastrow.csv
    │   ├── names_crlf_lastrow_header.csv
    │   ├── names_lf_quoted_header.csv
    │   ├── names_crlf_quoted_lastrow.csv
    │   ├── names_crlf_quoted_header.csv
    │   ├── names_lf_quoted_lastrow_header.csv
    │   ├── names_crlf_quoted_lastrow_header.csv
    │   ├── README.md
    │   ├── accounts_lf.jsonl
    │   ├── accounts_crlf.jsonl
    │   ├── accounts_lf_lastrow.jsonl
    │   └── accounts_crlf_lastrow.jsonl
    ├── test1.py
    ├── docker-compose.yml
    ├── command_helper.py
    ├── local_config.py.example
    ├── test_snowflake.py
    ├── test_databricks.py
    └── db_test_helper.py
├── docs
    ├── changes.md
    ├── _static
    │   ├── favicon.ico
    │   ├── mara-animal.jpg
    │   └── schema-visualization.png
    ├── requirements.txt
    ├── license.rst
    ├── cli.rst
    ├── Makefile
    ├── config.rst
    ├── api.rst
    ├── dbs
    │   ├── SQLite.rst
    │   ├── Mysql.rst
    │   ├── Oracle.rst
    │   ├── PostgreSQL.rst
    │   ├── Redshift.rst
    │   ├── Snowflake.rst
    │   ├── Databricks.rst
    │   ├── BigQuery.rst
    │   └── SQLServer.rst
    ├── installation.md
    ├── conf.py
    ├── index.rst
    └── databases-overview.md
├── setup.py
├── .vscode
    └── settings.json
├── pyproject.toml
├── .readthedocs.yaml
├── .gitignore
├── .pre-commit-config.yaml
├── mara_db
    ├── mysql.py
    ├── databricks.py
    ├── postgresql.py
    ├── sqlserver.py
    ├── __init__.py
    ├── cli.py
    ├── config.py
    ├── sqlalchemy_engine.py
    ├── static
    │   └── schema-page.js
    ├── formats.py
    ├── bigquery.py
    ├── auto_migration.py
    ├── dbs.py
    └── views.py
├── .github
    └── workflows
    │   └── build.yml
├── LICENSE
├── Makefile
├── setup.cfg
├── CHANGELOG.md
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/mssql/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/postgres/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/changes.md:
--------------------------------------------------------------------------------
1 | ```{include} ../CHANGELOG.md
2 | ```
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup()
4 | 


--------------------------------------------------------------------------------
/docs/_static/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mara/mara-db/HEAD/docs/_static/favicon.ico


--------------------------------------------------------------------------------
/docs/_static/mara-animal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mara/mara-db/HEAD/docs/_static/mara-animal.jpg


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==4.5.0
2 | sphinxcontrib-napoleon==0.7
3 | sphinx-tabs==3.3.1
4 | myst-parser==0.18.0
5 | 


--------------------------------------------------------------------------------
/docs/_static/schema-visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mara/mara-db/HEAD/docs/_static/schema-visualization.png


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "makefile.extensionOutputFolder": "./.vscode",
3 |     "esbonio.sphinx.confDir": ""
4 | }
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools >= 40.6.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/tests/mssql/test_mssql_ddl.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE names
 2 | (
 3 |     id INT,
 4 |     name TEXT
 5 | );
 6 | 
 7 | CREATE TABLE names_with_header
 8 | (
 9 |     id INT,
10 |     name TEXT
11 | );
12 | 


--------------------------------------------------------------------------------
/tests/seed/names_lf.csv:
--------------------------------------------------------------------------------
 1 | 1,Elinor Meklit
 2 | 2,Triana Mahalah
 3 | 3,Eugraphios Esmae
 4 | 4,Agustín Alvilda
 5 | 5,Behruz Hathor
 6 | 6,Mathilde Tola
 7 | 7,Kapel Tupaq
 8 | 8,Shet Badulf
 9 | 9,Ruslan Vančo
10 | 10,Madhavi Traian


--------------------------------------------------------------------------------
/tests/seed/names_crlf.csv:
--------------------------------------------------------------------------------
 1 | 1,Elinor Meklit
 2 | 2,Triana Mahalah
 3 | 3,Eugraphios Esmae
 4 | 4,Agustín Alvilda
 5 | 5,Behruz Hathor
 6 | 6,Mathilde Tola
 7 | 7,Kapel Tupaq
 8 | 8,Shet Badulf
 9 | 9,Ruslan Vančo
10 | 10,Madhavi Traian


--------------------------------------------------------------------------------
/tests/seed/names_lf_lastrow.csv:
--------------------------------------------------------------------------------
 1 | 1,Elinor Meklit
 2 | 2,Triana Mahalah
 3 | 3,Eugraphios Esmae
 4 | 4,Agustín Alvilda
 5 | 5,Behruz Hathor
 6 | 6,Mathilde Tola
 7 | 7,Kapel Tupaq
 8 | 8,Shet Badulf
 9 | 9,Ruslan Vančo
10 | 10,Madhavi Traian
11 | 


--------------------------------------------------------------------------------
/tests/seed/names_lf_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,Elinor Meklit
 3 | 2,Triana Mahalah
 4 | 3,Eugraphios Esmae
 5 | 4,Agustín Alvilda
 6 | 5,Behruz Hathor
 7 | 6,Mathilde Tola
 8 | 7,Kapel Tupaq
 9 | 8,Shet Badulf
10 | 9,Ruslan Vančo
11 | 10,Madhavi Traian


--------------------------------------------------------------------------------
/tests/seed/names_crlf_lastrow.csv:
--------------------------------------------------------------------------------
 1 | 1,Elinor Meklit
 2 | 2,Triana Mahalah
 3 | 3,Eugraphios Esmae
 4 | 4,Agustín Alvilda
 5 | 5,Behruz Hathor
 6 | 6,Mathilde Tola
 7 | 7,Kapel Tupaq
 8 | 8,Shet Badulf
 9 | 9,Ruslan Vančo
10 | 10,Madhavi Traian
11 | 


--------------------------------------------------------------------------------
/tests/seed/names_lf_quoted.csv:
--------------------------------------------------------------------------------
 1 | 1,"Elinor Meklit"
 2 | 2,"Triana Mahalah"
 3 | 3,"Eugraphios Esmae"
 4 | 4,"Agustín Alvilda"
 5 | 5,"Behruz Hathor"
 6 | 6,"Mathilde Tola"
 7 | 7,"Kapel Tupaq"
 8 | 8,"Shet Badulf"
 9 | 9,"Ruslan Vančo"
10 | 10,"Madhavi Traian"


--------------------------------------------------------------------------------
/tests/seed/names_crlf_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,Elinor Meklit
 3 | 2,Triana Mahalah
 4 | 3,Eugraphios Esmae
 5 | 4,Agustín Alvilda
 6 | 5,Behruz Hathor
 7 | 6,Mathilde Tola
 8 | 7,Kapel Tupaq
 9 | 8,Shet Badulf
10 | 9,Ruslan Vančo
11 | 10,Madhavi Traian


--------------------------------------------------------------------------------
/tests/seed/names_lf_lastrow_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,Elinor Meklit
 3 | 2,Triana Mahalah
 4 | 3,Eugraphios Esmae
 5 | 4,Agustín Alvilda
 6 | 5,Behruz Hathor
 7 | 6,Mathilde Tola
 8 | 7,Kapel Tupaq
 9 | 8,Shet Badulf
10 | 9,Ruslan Vančo
11 | 10,Madhavi Traian
12 | 


--------------------------------------------------------------------------------
/tests/seed/names_crlf_quoted.csv:
--------------------------------------------------------------------------------
 1 | 1,"Elinor Meklit"
 2 | 2,"Triana Mahalah"
 3 | 3,"Eugraphios Esmae"
 4 | 4,"Agustín Alvilda"
 5 | 5,"Behruz Hathor"
 6 | 6,"Mathilde Tola"
 7 | 7,"Kapel Tupaq"
 8 | 8,"Shet Badulf"
 9 | 9,"Ruslan Vančo"
10 | 10,"Madhavi Traian"


--------------------------------------------------------------------------------
/tests/seed/names_lf_quoted_lastrow.csv:
--------------------------------------------------------------------------------
 1 | 1,"Elinor Meklit"
 2 | 2,"Triana Mahalah"
 3 | 3,"Eugraphios Esmae"
 4 | 4,"Agustín Alvilda"
 5 | 5,"Behruz Hathor"
 6 | 6,"Mathilde Tola"
 7 | 7,"Kapel Tupaq"
 8 | 8,"Shet Badulf"
 9 | 9,"Ruslan Vančo"
10 | 10,"Madhavi Traian"
11 | 


--------------------------------------------------------------------------------
/tests/seed/names_crlf_lastrow_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,Elinor Meklit
 3 | 2,Triana Mahalah
 4 | 3,Eugraphios Esmae
 5 | 4,Agustín Alvilda
 6 | 5,Behruz Hathor
 7 | 6,Mathilde Tola
 8 | 7,Kapel Tupaq
 9 | 8,Shet Badulf
10 | 9,Ruslan Vančo
11 | 10,Madhavi Traian
12 | 


--------------------------------------------------------------------------------
/tests/seed/names_lf_quoted_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,"Elinor Meklit"
 3 | 2,"Triana Mahalah"
 4 | 3,"Eugraphios Esmae"
 5 | 4,"Agustín Alvilda"
 6 | 5,"Behruz Hathor"
 7 | 6,"Mathilde Tola"
 8 | 7,"Kapel Tupaq"
 9 | 8,"Shet Badulf"
10 | 9,"Ruslan Vančo"
11 | 10,"Madhavi Traian"


--------------------------------------------------------------------------------
/tests/seed/names_crlf_quoted_lastrow.csv:
--------------------------------------------------------------------------------
 1 | 1,"Elinor Meklit"
 2 | 2,"Triana Mahalah"
 3 | 3,"Eugraphios Esmae"
 4 | 4,"Agustín Alvilda"
 5 | 5,"Behruz Hathor"
 6 | 6,"Mathilde Tola"
 7 | 7,"Kapel Tupaq"
 8 | 8,"Shet Badulf"
 9 | 9,"Ruslan Vančo"
10 | 10,"Madhavi Traian"
11 | 


--------------------------------------------------------------------------------
/tests/seed/names_crlf_quoted_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,"Elinor Meklit"
 3 | 2,"Triana Mahalah"
 4 | 3,"Eugraphios Esmae"
 5 | 4,"Agustín Alvilda"
 6 | 5,"Behruz Hathor"
 7 | 6,"Mathilde Tola"
 8 | 7,"Kapel Tupaq"
 9 | 8,"Shet Badulf"
10 | 9,"Ruslan Vančo"
11 | 10,"Madhavi Traian"


--------------------------------------------------------------------------------
/tests/seed/names_lf_quoted_lastrow_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,"Elinor Meklit"
 3 | 2,"Triana Mahalah"
 4 | 3,"Eugraphios Esmae"
 5 | 4,"Agustín Alvilda"
 6 | 5,"Behruz Hathor"
 7 | 6,"Mathilde Tola"
 8 | 7,"Kapel Tupaq"
 9 | 8,"Shet Badulf"
10 | 9,"Ruslan Vančo"
11 | 10,"Madhavi Traian"
12 | 


--------------------------------------------------------------------------------
/tests/test1.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | def test_my():
 4 |     import mara_db.dbs
 5 | 
 6 |     db = mara_db.dbs.SqlcmdSQLServerDB(host='ABC123', user='A', password="a", trust_server_certificate=True)
 7 | 
 8 |     odbc = db.sqlalchemy_url
 9 | 
10 |     print(odbc)
11 | 
12 |     assert False
13 | 


--------------------------------------------------------------------------------
/tests/seed/names_crlf_quoted_lastrow_header.csv:
--------------------------------------------------------------------------------
 1 | id,name
 2 | 1,"Elinor Meklit"
 3 | 2,"Triana Mahalah"
 4 | 3,"Eugraphios Esmae"
 5 | 4,"Agustín Alvilda"
 6 | 5,"Behruz Hathor"
 7 | 6,"Mathilde Tola"
 8 | 7,"Kapel Tupaq"
 9 | 8,"Shet Badulf"
10 | 9,"Ruslan Vančo"
11 | 10,"Madhavi Traian"
12 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
 1 | License
 2 | =======
 3 | 
 4 | MIT Source License
 5 | ------------------
 6 | 
 7 | The MIT license applies to all files in the Mara repository
 8 | and source distribution. This includes Mara's source code, the
 9 | examples, and tests, as well as the documentation.
10 | 
11 | .. include:: ../LICENSE
12 | 


--------------------------------------------------------------------------------
/tests/postgres/test_postgres_ddl.sql:
--------------------------------------------------------------------------------
 1 | CREATE TABLE names
 2 | (
 3 |     id INT,
 4 |     name TEXT
 5 | );
 6 | 
 7 | CREATE TABLE names_with_header
 8 | (
 9 |     id INT,
10 |     name TEXT
11 | );
12 | 
13 | CREATE TABLE accounts_json
14 | (
15 |     data jsonb,
16 |     row BIGINT GENERATED ALWAYS AS IDENTITY
17 | );
18 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-20.04
 8 |   tools:
 9 |     python: "3.9"
10 | 
11 | sphinx:
12 |    configuration: docs/conf.py
13 | 
14 | python:
15 |    install:
16 |      - requirements: docs/requirements.txt
17 |      - method: pip
18 |        path: .
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | build/
 8 | dist/
 9 | *.egg-info/
10 | .eggs/
11 | 
12 | # Unit test / coverage reports
13 | .cache
14 | 
15 | # Sphinx documentation
16 | docs/_build/
17 | 
18 | # Environments
19 | /.venv
20 | 
21 | # Dev tools
22 | .idea
23 | 
24 | # == Specific for this repository ==
25 | /tests/local_config.py
26 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | repos:
 4 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 5 |     rev: v3.2.0
 6 |     hooks:
 7 |     -   id: trailing-whitespace
 8 |         exclude: ^tests/seed/
 9 |     -   id: end-of-file-fixer
10 |         exclude: ^tests/seed/
11 |     -   id: check-toml
12 |     -   id: check-yaml
13 |     -   id: check-added-large-files
14 | 


--------------------------------------------------------------------------------
/tests/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3.1'
 2 | 
 3 | services:
 4 |   postgres:
 5 |     image: postgres:14
 6 |     environment:
 7 |       POSTGRES_DB: mara
 8 |       POSTGRES_USER: mara
 9 |       POSTGRES_PASSWORD: mara
10 |       POSTGRES_HOST_AUTH_METHOD: md5
11 |     ports:
12 |       - "5432"
13 | 
14 |   mssql:
15 |     image: mcr.microsoft.com/mssql/server:2019-latest
16 |     environment:
17 |       - ACCEPT_EULA=Y
18 |       - SA_PASSWORD=YourStrong@Passw0rd
19 |     ports:
20 |       - "1433"
21 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
 1 | CLI
 2 | ===
 3 | 
 4 | .. module:: mara_db.cli
 5 | 
 6 | This part of the documentation covers all the available cli commands of Mara DB.
 7 | 
 8 | 
 9 | ``migrate``
10 | -----------
11 | 
12 | .. tabs::
13 | 
14 |     .. group-tab:: Mara CLI
15 | 
16 |         .. code-block:: shell
17 | 
18 |             mara db migrate
19 | 
20 |     .. group-tab:: Mara Flask App
21 | 
22 |         .. code-block:: python
23 | 
24 |             flask mara-db migrate
25 | 
26 | 
27 | Compares the current database db alias `mara` with all defined models and applies
28 | the diff using alembic.
29 | 


--------------------------------------------------------------------------------
/tests/mssql/README.md:
--------------------------------------------------------------------------------
 1 | SQL Server Test Matrix
 2 | ======================
 3 | 
 4 | There some notes about which tests failes / are on purpose not implemented:
 5 | 
 6 | sqsh
 7 | ----
 8 | * does not support `trust_server_certificate`
 9 | 
10 | sqlcmd
11 | ------
12 | All looks fine
13 | 
14 | bcp
15 | ---
16 | 
17 | Known issues:
18 | * return value is always zero (`0`) even when an import error occurs
19 | * a import file e.g. CSV must have a last empty row (`names_lf_lastrow.csv` is supported, but `names_lf.csv` not)
20 | * db.`trust_server_certificate` is only supported when using mssql tools 18+ and higher
21 | 


--------------------------------------------------------------------------------
/mara_db/mysql.py:
--------------------------------------------------------------------------------
 1 | """Easy access to MySQL databases via mysql-client"""
 2 | 
 3 | import typing
 4 | from warnings import warn
 5 | 
 6 | import mara_db.dbs
 7 | 
 8 | 
 9 | def mysql_cursor_context(db: typing.Union[str, mara_db.dbs.MysqlDB]) -> 'MySQLdb.cursors.Cursor':
10 |     """Creates a context with a mysql-client cursor for a database alias or database"""
11 |     warn('Function mysql_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.')
12 | 
13 |     if isinstance(db, str):
14 |         db = mara_db.dbs.db(db)
15 | 
16 |     assert (isinstance(db, mara_db.dbs.MysqlDB))
17 | 
18 |     return mara_db.dbs.cursor_context(db)
19 | 


--------------------------------------------------------------------------------
/mara_db/databricks.py:
--------------------------------------------------------------------------------
 1 | """Easy access to Databricks databases via databricks-sql-connector"""
 2 | 
 3 | import typing
 4 | from warnings import warn
 5 | 
 6 | import mara_db.dbs
 7 | 
 8 | 
 9 | def databricks_cursor_context(db: typing.Union[str, mara_db.dbs.DatabricksDB]) \
10 |         -> 'databricks.sql.client.Cursor':
11 |     warn('Function databricks_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.',
12 |          category=DeprecationWarning)
13 | 
14 |     if isinstance(db, str):
15 |         db = mara_db.dbs.db(db)
16 | 
17 |     assert (isinstance(db, mara_db.dbs.DatabricksDB))
18 | 
19 |     return mara_db.dbs.cursor_context(db)
20 | 


--------------------------------------------------------------------------------
/mara_db/postgresql.py:
--------------------------------------------------------------------------------
 1 | """Easy access to postgres databases via psycopg2"""
 2 | 
 3 | import typing
 4 | from warnings import warn
 5 | 
 6 | import mara_db.dbs
 7 | 
 8 | 
 9 | def postgres_cursor_context(db: typing.Union[str, mara_db.dbs.PostgreSQLDB]) -> 'psycopg2.extensions.cursor':
10 |     """Creates a context with a psycopg2 cursor for a database alias"""
11 |     warn('Function postgres_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.',
12 |          category=DeprecationWarning)
13 | 
14 |     if isinstance(db, str):
15 |         db = mara_db.dbs.db(db)
16 | 
17 |     assert (isinstance(db, mara_db.dbs.PostgreSQLDB))
18 | 
19 |     return mara_db.dbs.cursor_context(db)
20 | 


--------------------------------------------------------------------------------
/mara_db/sqlserver.py:
--------------------------------------------------------------------------------
 1 | """Easy access to SQLServer databases via pyodbc-client"""
 2 | 
 3 | import typing
 4 | from warnings import warn
 5 | 
 6 | import mara_db.dbs
 7 | 
 8 | 
 9 | def sqlserver_cursor_context(db: typing.Union[str, mara_db.dbs.SQLServerDB]) -> 'pyodbc.Cursor':
10 |     """Creates a context with a pyodbc-client cursor for a database alias or database"""
11 |     warn('Function sqlserver_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.',
12 |          category=DeprecationWarning)
13 | 
14 |     if isinstance(db, str):
15 |         db = mara_db.dbs.db(db)
16 | 
17 |     assert (isinstance(db, mara_db.dbs.SQLServerDB))
18 | 
19 |     return mara_db.dbs.cursor_context(db)
20 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/mara_db/__init__.py:
--------------------------------------------------------------------------------
 1 | """Make the functionalities of this package auto-discoverable by mara-app"""
 2 | __version__ = '4.11.0'
 3 | 
 4 | 
 5 | def MARA_CONFIG_MODULES():
 6 |     from . import config
 7 |     return [config]
 8 | 
 9 | 
10 | def MARA_FLASK_BLUEPRINTS():
11 |     from . import views
12 |     return [views.blueprint]
13 | 
14 | 
15 | def MARA_AUTOMIGRATE_SQLALCHEMY_MODELS():
16 |     return []
17 | 
18 | 
19 | def MARA_ACL_RESOURCES():
20 |     from . import views
21 |     return {'DB Schema': views.acl_resource}
22 | 
23 | 
24 | def MARA_CLICK_COMMANDS():
25 |     from . import cli
26 |     return [cli.mara_db,
27 |             cli._migrate]
28 | 
29 | 
30 | def MARA_NAVIGATION_ENTRIES():
31 |     from . import views
32 |     return {'DB Schema': views.navigation_entry()}
33 | 


--------------------------------------------------------------------------------
/docs/config.rst:
--------------------------------------------------------------------------------
 1 | Configuration
 2 | =============
 3 | 
 4 | 
 5 | Mara Configuration Values
 6 | -------------------------
 7 | 
 8 | The following configuration values are used by this module. They are defined as python functions in ``mara_db.config``
 9 | and can be changed with the `monkey patch`_ from `Mara App`_. An example can be found `here <https://github.com/mara/mara-example-project-1/blob/master/app/local_setup.py.example>`_.
10 | 
11 | .. _monkey patch: https://github.com/mara/mara-app/blob/master/mara_app/monkey_patch.py
12 | .. _Mara App: https://github.com/mara/mara-app
13 | 
14 | 
15 | .. module:: mara_db.config
16 | 
17 | .. autofunction:: databases
18 | 
19 | |
20 | 
21 | .. autofunction:: default_timezone
22 | 
23 | |
24 | 
25 | .. autofunction:: default_echo_queries
26 | 
27 | |
28 | 
29 | .. autofunction:: schema_ui_foreign_key_column_regex
30 | 


--------------------------------------------------------------------------------
/mara_db/cli.py:
--------------------------------------------------------------------------------
 1 | """Auto-migrate command line interface"""
 2 | 
 3 | import click
 4 | import sys
 5 | from warnings import warn
 6 | 
 7 | 
 8 | @click.group()
 9 | def mara_db():
10 |     """Commands to interact with the database."""
11 |     pass
12 | 
13 | 
14 | @mara_db.command()
15 | def migrate():
16 |     """Compares the current database with all defined models and applies the diff"""
17 |     import mara_db.auto_migration
18 | 
19 |     if not mara_db.auto_migration.auto_discover_models_and_migrate():
20 |         sys.exit(-1)
21 | 
22 | 
23 | # Old cli commands to be dropped in 5.0:
24 | 
25 | @click.command("migrate")
26 | def _migrate():
27 |     """Compares the current database with all defined models and applies the diff"""
28 |     warn("CLI command `<app> mara_db.migrate` will be dropped in 5.0. Please use: `<app> mara-db migrate`")
29 |     migrate.callback()
30 | 


--------------------------------------------------------------------------------
/tests/command_helper.py:
--------------------------------------------------------------------------------
 1 | """Helper functions to generate commands for testings"""
 2 | import shlex
 3 | 
 4 | from mara_db import shell
 5 | 
 6 | 
 7 | def execute_sql_statement_command(db, sql_statement):
 8 |     command = f'echo {shlex.quote(sql_statement)} \\\n'
 9 |     command += '  | ' + shell.query_command(db)
10 |     assert command
11 |     print(command)
12 |     return command
13 | 
14 | def execute_sql_file_command(db, file_path):
15 |     command = f'cat {file_path} \\\n'
16 |     command += '  | ' + shell.query_command(db)
17 |     assert command
18 |     print(command)
19 |     return command
20 | 
21 | def execute_sql_statement_to_stdout_csv_command(db, sql_statement):
22 |     command = f'echo {shlex.quote(sql_statement)} \\\n'
23 |     command += '  | ' + shell.copy_to_stdout_command(db, delimiter_char=',')
24 |     assert command
25 |     print(command)
26 |     return command
27 | 


--------------------------------------------------------------------------------
/mara_db/config.py:
--------------------------------------------------------------------------------
 1 | """Configuration of database connections"""
 2 | import typing
 3 | 
 4 | from mara_db import dbs
 5 | 
 6 | 
 7 | def databases() -> typing.Dict[str, dbs.DB]:
 8 |     """The list of database connections to use, by alias"""
 9 |     return {}
10 | 
11 | 
12 | def default_timezone() -> str:
13 |     """
14 |     The default timezone to be used for database connections
15 |     See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
16 |     """
17 |     return 'Europe/Berlin'
18 | 
19 | 
20 | def default_echo_queries() -> bool:
21 |     """
22 |     If queries should be printed on execution by default, if applicable
23 |     """
24 |     return True
25 | 
26 | 
27 | def schema_ui_foreign_key_column_regex() -> typing.Pattern:
28 |     """A regex that classifies a table column as being used in a foreign constraint (for coloring missing constraints)"""
29 |     return r'.*_fk$'
30 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: mara-db
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   build:
13 |     runs-on: ubuntu-latest
14 |     strategy:
15 |       matrix:
16 |         python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']
17 |     steps:
18 |       - name: Chechout code
19 |         uses: actions/checkout@v3.3.0
20 |       - name: Setup python
21 |         uses: actions/setup-python@v4.5.0
22 |         with:
23 |           python-version: ${{ matrix.python-version }}
24 |       - name: Install application
25 |         env:
26 |           pythonversion: ${{ matrix.python-version }}
27 |         run: |
28 |           python -c "import sys; print(sys.version)"
29 |           pip install .[test]
30 |           echo Finished successful build with Python $pythonversion
31 |       - name: Test with pytest
32 |         run: |
33 |           make tests/local_config.py
34 |           pytest -v tests
35 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. module:: mara_db
 5 | 
 6 | This part of the documentation covers all the interfaces of Mara DB. For
 7 | parts where the package depends on external libraries, we document the most
 8 | important right here and provide links to the canonical documentation.
 9 | 
10 | 
11 | DBs
12 | ---
13 | 
14 | .. module:: mara_db.dbs
15 | 
16 | .. autofunction:: db
17 | 
18 | .. autofunction:: connect
19 | 
20 | .. autofunction:: cursor_context
21 | 
22 | 
23 | Auto migration
24 | --------------
25 | 
26 | .. module:: mara_db.auto_migration
27 | 
28 | .. autofunction:: auto_migrate
29 | 
30 | .. autofunction:: auto_discover_models_and_migrate
31 | 
32 | 
33 | Shell
34 | -----
35 | 
36 | .. module:: mara_db.shell
37 | 
38 | .. autofunction:: query_command
39 | 
40 | .. autofunction:: copy_to_stdout_command
41 | 
42 | .. autofunction:: copy_from_stdin_command
43 | 
44 | .. autofunction:: copy_command
45 | 
46 | 
47 | SQLAlchemy
48 | ----------
49 | 
50 | .. module:: mara_db.sqlalchemy_engine
51 | 
52 | .. autofunction:: engine
53 | 


--------------------------------------------------------------------------------
/docs/dbs/SQLite.rst:
--------------------------------------------------------------------------------
 1 | SQLite
 2 | ======
 3 | 
 4 | 
 5 | 
 6 | Installation
 7 | ------------
 8 | 
 9 | There are no special requirements for SQLite since it is already included in python.
10 | 
11 | The shell command `sqlite3` is required. This is available in standard distributions.
12 | Version >3.20.x required (not the case on Ubuntu 14.04).
13 | 
14 | 
15 | Configuration examples
16 | ----------------------
17 | 
18 | .. tabs::
19 | 
20 |     .. group-tab:: Local file
21 | 
22 |         .. code-block:: python
23 | 
24 |             import mara_db.dbs
25 |             mara_db.config.databases = lambda: {
26 |                 'dwh': mara_db.dbs.SQLiteDB(
27 |                     file_name='database.db'),
28 |             }
29 | 
30 | |
31 | 
32 | |
33 | 
34 | API reference
35 | -------------
36 | 
37 | This section contains database specific API in the module.
38 | 
39 | 
40 | Configuration
41 | ~~~~~~~~~~~~~
42 | 
43 | .. module:: mara_db.dbs
44 |     :noindex:
45 | 
46 | .. autoclass:: SQLiteDB
47 |     :special-members: __init__
48 |     :inherited-members:
49 |     :members:
50 | 


--------------------------------------------------------------------------------
/docs/dbs/Mysql.rst:
--------------------------------------------------------------------------------
 1 | MySQL
 2 | =====
 3 | 
 4 | 
 5 | Installation
 6 | ------------
 7 | 
 8 | Use extras `mysql` to install all required packages.
 9 | 
10 | .. code-block:: shell
11 | 
12 |     $ pip install mara-db[mysql]
13 | 
14 | 
15 | Configuration examples
16 | ----------------------
17 | 
18 | .. tabs::
19 | 
20 |     .. group-tab:: Default
21 | 
22 |         .. code-block:: python
23 | 
24 |             import mara_db.dbs
25 |             mara_db.config.databases = lambda: {
26 |                 'dwh': mara_db.dbs.MysqlDB(
27 |                     host='localhost',
28 |                     user='root',
29 |                     password='<my_strong_password>',
30 |                     database='dwh'),
31 |             }
32 | 
33 | |
34 | 
35 | |
36 | 
37 | API reference
38 | -------------
39 | 
40 | This section contains database specific API in the module.
41 | 
42 | .. module:: mara_db.mysql
43 | 
44 | Configuration
45 | ~~~~~~~~~~~~~
46 | 
47 | .. module:: mara_db.dbs
48 |     :noindex:
49 | 
50 | .. autoclass:: MysqlDB
51 |     :special-members: __init__
52 |     :inherited-members:
53 |     :members:
54 | 


--------------------------------------------------------------------------------
/tests/local_config.py.example:
--------------------------------------------------------------------------------
 1 | # This file contains secrets used by the tests
 2 | 
 3 | from mara_db import dbs
 4 | 
 5 | # supported placeholders
 6 | #   host='DOCKER_IP' will be replaced with the ip address given from pytest-docker
 7 | #   port=-1 will be replaced with the ip address given from pytest-docker
 8 | 
 9 | POSTGRES_DB = dbs.PostgreSQLDB(host='DOCKER_IP', port=-1, user="mara", password="mara", database="mara")
10 | MSSQL_DB = None # dbs.SQLServerDB(host='DOCKER_IP', port=-1, user='sa', password='YourStrong@Passw0rd', database='master')
11 | MSSQL_SQSH_DB = None # dbs.SqshSQLServerDB(host='DOCKER_IP', port=-1, user='sa', password='YourStrong@Passw0rd', database='master')
12 | MSSQL_SQLCMD_DB = None # dbs.SqlcmdSQLServerDB(host='DOCKER_IP', port=-1, user='sa', password='YourStrong@Passw0rd', database='master', trust_server_certificate=True)
13 | SNOWFLAKE_DB = None #dbs.SnowflakeDB( account='ACCOUNT_IDENTIFER', user='USER', password='PASSWORD', database='SNOWFLAKE_SAMPLE_DATA')
14 | DATABRICKS_DB = None #dbs.DatabricksDB(host='DBSQLCLI_HOST_NAME', http_path='DBSQLCLI_HTTP_PATH', access_token='DBSQLCLI_ACCESS_TOKEN')
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017 Mara contributors
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | MODULE_NAME=mara_db
 2 | 
 3 | 
 4 | all:
 5 | 	# builds virtual env. and starts install in it
 6 | 	make .venv/bin/python
 7 | 	make install
 8 | 
 9 | 
10 | install:
11 | 	# install of module
12 | 	.venv/bin/pip install .
13 | 
14 | 
15 | test:
16 | 	make .venv/bin/python
17 | 	# test of module
18 | 	.venv/bin/pip install .[test]
19 | 	make tests/local_config.py
20 | 	.venv/bin/pytest
21 | 
22 | 
23 | publish:
24 | 	# manually publishing the package
25 | 	.venv/bin/pip install build twine
26 | 	.venv/bin/python -m build
27 | 	.venv/bin/twine upload dist/*
28 | 
29 | 
30 | clean:
31 | 	# clean up
32 | 	rm -rf .venv/ build/ dist/ ${MODULE_NAME}.egg-info/ .pytest_cache/ .eggs/
33 | 
34 | 
35 | .PYTHON3:=$(shell PATH='$(subst $(CURDIR)/.venv/bin:,,$(PATH))' which python3)
36 | 
37 | .venv/bin/python:
38 | 	mkdir -p .venv
39 | 	cd .venv && $(.PYTHON3) -m venv --copies --prompt='[$(shell basename `pwd`)/.venv]' .
40 | 
41 | 	.venv/bin/python -m pip install --upgrade pip
42 | 
43 | tests/local_config.py:
44 | 	cp -v tests/local_config.py.example tests/local_config.py
45 | 	@ >&2 echo '!!! copied tests/local_config.py.example to tests/local_config.py. Please check'
46 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = mara-db
 3 | version = attr: mara_db.__version__
 4 | url = https://github.com/mara/mara-db
 5 | description = Configuration and monitoring of database connections
 6 | long_description = file: README.md
 7 | long_description_content_type = text/markdown
 8 | author = Mara contributors
 9 | license = MIT
10 | 
11 | [options]
12 | packages = mara_db
13 | python_requires = >= 3.6
14 | install_requires =
15 |     SQLAlchemy>=1.1.5
16 |     sqlalchemy-utils>=0.32.14
17 |     alembic>=0.8.10
18 |     multimethod>=1.0.0
19 |     graphviz>=0.8
20 |     mara-page>=1.3.0
21 |     psycopg2-binary>=2.7.3
22 | 
23 | [options.package_data]
24 | mara_db = static/*
25 | 
26 | [options.extras_require]
27 | test =
28 |     pytest
29 |     pytest_click
30 |     pytest-docker
31 |     pytest-dependency
32 |     SQLAlchemy>=1.2.0
33 | bigquery =
34 |     google-cloud-bigquery
35 |     google-cloud-bigquery-storage
36 |     pyarrow
37 |     sqlalchemy-bigquery
38 | mssql = pyodbc
39 | mysql = mysqlclient
40 | postgres = psycopg2-binary>=2.7.3
41 | redshift =
42 |     psycopg2-binary>=2.7.3
43 |     sqlalchemy-redshift
44 | snowflake = snowflake-sqlalchemy
45 | databricks =
46 |     databricks-sql-cli
47 |     databricks-sql-connector
48 |     sqlalchemy-databricks
49 | 
50 | [options.entry_points]
51 | mara.commands =
52 |     db = mara_db.cli:mara_db
53 | 


--------------------------------------------------------------------------------
/docs/dbs/Oracle.rst:
--------------------------------------------------------------------------------
 1 | Oracle
 2 | ======
 3 | 
 4 | 
 5 | Installation
 6 | ------------
 7 | 
 8 | You have to make sure that the `Oracle Instant Client <https://www.oracle.com/database/technologies/instant-client.html>`_ (`sqlplus64`) is installed.
 9 | 
10 | On Mac, follow `these instructions <https://vanwollingen.nl/install-oracle-instant-client-and-sqlplus-using-homebrew-a233ce224bf>`_. Then `sudo ln -s /usr/local/bin/sqlplus /usr/local/bin/sqlplus64` to make the binary accessible as `sqlplus64`.
11 | 
12 | 
13 | Configuration examples
14 | ----------------------
15 | 
16 | .. tabs::
17 | 
18 |     .. group-tab:: Default
19 | 
20 |         .. code-block:: python
21 | 
22 |             import mara_db.dbs
23 |             mara_db.config.databases = lambda: {
24 |                 'dwh': mara_db.dbs.OracleDB(
25 |                     host='localhost',
26 |                     user='root',
27 |                     password='<my_strong_password>',
28 |                     endpoint='oracle-endpoint'),
29 |             }
30 | 
31 | |
32 | 
33 | |
34 | 
35 | API reference
36 | -------------
37 | 
38 | This section contains database specific API in the module.
39 | 
40 | 
41 | Configuration
42 | ~~~~~~~~~~~~~
43 | 
44 | .. module:: mara_db.dbs
45 |     :noindex:
46 | 
47 | .. autoclass:: OracleDB
48 |     :special-members: __init__
49 |     :inherited-members:
50 |     :members:
51 | 


--------------------------------------------------------------------------------
/tests/test_snowflake.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import subprocess
 3 | 
 4 | from mara_db import shell, sqlalchemy_engine
 5 | from tests.local_config import SNOWFLAKE_DB
 6 | 
 7 | 
 8 | if not SNOWFLAKE_DB:
 9 |     pytest.skip("skipping SnowflakeDB tests: variable SNOWFLAKE_DB not set", allow_module_level=True)
10 | 
11 | 
12 | def test_snowflake_query_command():
13 |     command = 'echo "SELECT 1" \\\n'
14 |     command += '  | ' + shell.query_command(SNOWFLAKE_DB)
15 |     assert command
16 | 
17 |     print(command)
18 |     (exitcode, _) = subprocess.getstatusoutput(command)
19 |     assert exitcode == 0
20 | 
21 | 
22 | def test_snowflake_copy_to_stdout():
23 |     command = 'echo "SELECT 1 AS Col1, \'FOO\' AS Col2 UNION ALL SELECT 2, \'BAR\'" \\\n'
24 |     command += '  | ' + shell.copy_to_stdout_command(SNOWFLAKE_DB,
25 |         csv_format=True,
26 |         header=True,
27 |         delimiter_char=',')
28 |     assert command
29 | 
30 |     print(command)
31 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
32 |     assert exitcode == 0
33 |     print(pstdout)
34 |     assert pstdout == '''"COL1","COL2"
35 | "1","FOO"
36 | "2","BAR"'''
37 | 
38 | 
39 | def test_snowflake_sqlalchemy():
40 |     from sqlalchemy import text
41 |     engine = sqlalchemy_engine.engine(SNOWFLAKE_DB)
42 |     with engine.connect() as con:
43 |         con.execute(statement = text("SELECT 1"))
44 | 


--------------------------------------------------------------------------------
/tests/seed/README.md:
--------------------------------------------------------------------------------
 1 | This folder holds seed test files which are used to test mara_db.shell.copy_from_stdin_command
 2 | 
 3 | | File                               | Encoding            | Has header | delimiter_char | Quoted columns | File ending | Has last line closing
 4 | | ---------------------------------- | ------------------- | ---------- | -------------- | -------------- | ----------- | ----------------------
 5 | | names_lf.csv                       | UTF-8 (without BOM) | No         | `,`            | No             | LF          | No
 6 | | names_lf_header.csv                | UTF-8 (without BOM) | Yes        | `,`            | No             | LF          | No
 7 | | names_lf_lastrow.csv               | UTF-8 (without BOM) | No         | `,`            | No             | LF          | Yes
 8 | | names_lf_lastrow_header.csv        | UTF-8 (without BOM) | Yes        | `,`            | No             | LF          | Yes
 9 | | names_lf_quoted.csv                | UTF-8 (without BOM) | No         | `,`            | Yes            | LF          | No
10 | | names_lf_quoted_header.csv         | UTF-8 (without BOM) | Yes        | `,`            | Yes            | LF          | No
11 | | names_lf_quoted_lastrow.csv        | UTF-8 (without BOM) | No         | `,`            | Yes            | LF          | Yes
12 | | names_lf_quoted_lastrow_header.csv | UTF-8 (without BOM) | Yes        | `,`            | Yes            | LF          | Yes
13 | 


--------------------------------------------------------------------------------
/mara_db/sqlalchemy_engine.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import sqlalchemy.engine
 4 | import sqlalchemy.sql.schema
 5 | 
 6 | import mara_db.dbs
 7 | 
 8 | 
 9 | @functools.singledispatch
10 | def engine(db: object) -> sqlalchemy.engine.Engine:
11 |     """
12 |     Returns a sql alchemy engine for a configured database connection
13 | 
14 |     Args:
15 |         db: The database to use (either an alias or a `dbs.DB` object
16 | 
17 |     Returns:
18 |         The generated sqlalchemy engine
19 | 
20 |     Example:
21 |         >>> print(engine('mara'))
22 |         Engine(postgresql+psycopg2://None@localhost/mara)
23 |     """
24 |     pass
25 | 
26 | 
27 | @engine.register(str)
28 | def __(alias: str, **_):
29 |     return engine(mara_db.dbs.db(alias))
30 | 
31 | 
32 | @engine.register(mara_db.dbs.DB)
33 | def __(db: mara_db.dbs.DB, **_):
34 |     return sqlalchemy.create_engine(db.sqlalchemy_url)
35 | 
36 | 
37 | @engine.register(mara_db.dbs.BigQueryDB)
38 | def __(db: mara_db.dbs.BigQueryDB):
39 |     # creates bigquery dialect
40 |     url = db.sqlalchemy_url
41 | 
42 |     return sqlalchemy.create_engine(url,
43 |                                     credentials_path=db.service_account_json_file_name,
44 |                                     location=db.location)
45 | 
46 | 
47 | @engine.register(mara_db.dbs.DatabricksDB)
48 | def __(db: mara_db.dbs.DatabricksDB):
49 |     url = db.sqlalchemy_url
50 | 
51 |     return sqlalchemy.create_engine(url,
52 |                                     connect_args={
53 |                                         "http_path": db.http_path
54 |                                     })
55 | 


--------------------------------------------------------------------------------
/tests/test_databricks.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import subprocess
 3 | 
 4 | from mara_db import shell, sqlalchemy_engine
 5 | from tests.local_config import DATABRICKS_DB
 6 | 
 7 | 
 8 | if not DATABRICKS_DB:
 9 |     pytest.skip("skipping DatabricksDB tests: variable DATABRICKS_DB not set", allow_module_level=True)
10 | 
11 | 
12 | def test_databricks_query_command():
13 |     command = 'echo "SELECT 1" \\\n'
14 |     command += '  | ' + shell.query_command(DATABRICKS_DB)
15 |     assert command
16 | 
17 |     print(command)
18 |     (exitcode, _) = subprocess.getstatusoutput(command)
19 |     assert exitcode == 0
20 | 
21 | 
22 | def test_databricks_copy_to_stdout():
23 |     command = 'echo "SELECT 1 AS Col1, \'FOO\' AS Col2 UNION ALL SELECT 2, \'BAR\'" \\\n'
24 |     command += '  | ' + shell.copy_to_stdout_command(DATABRICKS_DB,
25 |         csv_format=True,
26 |         header=True,
27 |         delimiter_char=',')
28 |     assert command
29 | 
30 |     print(command)
31 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
32 |     assert exitcode == 0
33 |     print(pstdout)
34 |     assert pstdout == '''Col1,Col2
35 | 1,FOO
36 | 2,BAR'''
37 | 
38 | 
39 | def test_databricks_sqlalchemy():
40 |     from sqlalchemy import text
41 |     engine = sqlalchemy_engine.engine(DATABRICKS_DB)
42 |     with engine.connect() as con:
43 |         con.execute(statement = text("SELECT 1"))
44 | 
45 | 
46 | def test_databricks_connect():
47 |     """
48 |     A simple test to check if the connect API works.
49 |     """
50 |     from .db_test_helper import _test_connect
51 |     _test_connect(DATABRICKS_DB)
52 | 
53 | 
54 | def test_databricks_cursor_context():
55 |     """
56 |     A simple test to check if the cursor context of the db works.
57 |     """
58 |     from .db_test_helper import _test_cursor_context
59 |     _test_cursor_context(DATABRICKS_DB)
60 | 


--------------------------------------------------------------------------------
/docs/dbs/PostgreSQL.rst:
--------------------------------------------------------------------------------
 1 | PostgreSQL
 2 | ==========
 3 | 
 4 | PostgreSQL is the main database engines which is currently installed by default.
 5 | 
 6 | .. warning::
 7 |     From version 5 the requirements for PostgreSQL will not be installed by default anymore.
 8 |     Please make sure to include extras ``postgres`` in your requirements.txt file, see below.
 9 | 
10 | 
11 | Installation
12 | ------------
13 | 
14 | Use extras `postgres` to install all required packages.
15 | 
16 | .. code-block:: shell
17 | 
18 |     $ pip install mara-db[postgres]
19 | 
20 | The ``psql`` client is required which can be installed on Ubuntu/Debian via
21 | 
22 | .. code-block:: shell
23 | 
24 |     $ sudo apt-get install postgresql-client
25 | 
26 | Configuration examples
27 | ----------------------
28 | 
29 | .. tabs::
30 | 
31 |     .. group-tab:: Trusted authentication
32 | 
33 |         .. code-block:: python
34 | 
35 |             import mara_db.dbs
36 |             mara_db.config.databases = lambda: {
37 |                 'dwh': mara_db.dbs.PostgreSQLDB(
38 |                     host='localhost',
39 |                     user='root',
40 |                     database='dwh'),
41 |             }
42 | 
43 |     .. group-tab:: Password authentication
44 | 
45 |         .. code-block:: python
46 | 
47 |             import mara_db.dbs
48 |             mara_db.config.databases = lambda: {
49 |                 'dwh': mara_db.dbs.PostgreSQLDB(
50 |                     host='localhost',
51 |                     user='root',
52 |                     password='<my_strong_password>',
53 |                     database='dwh'),
54 |             }
55 | 
56 | |
57 | 
58 | |
59 | 
60 | API reference
61 | -------------
62 | 
63 | This section contains database specific API in the module.
64 | 
65 | Configuration
66 | ~~~~~~~~~~~~~
67 | 
68 | .. module:: mara_db.dbs
69 |     :noindex:
70 | 
71 | .. autoclass:: PostgreSQLDB
72 |     :special-members: __init__
73 |     :inherited-members:
74 |     :members:
75 | 


--------------------------------------------------------------------------------
/tests/mssql/test_mssql_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | For SQL Server we use a special configuration:
 3 | - we use a generic 'SQLServerDB' config instance which instanciates the default provider
 4 | - we support two different connection modes: via sqsh (SqshSQLServerDB) or via sqlcmd (SqlcmdSQLServerDB)
 5 | 
 6 | To make sure that the config is properly implemented, the following unit tests are added.
 7 | """
 8 | import functools
 9 | 
10 | from mara_db import dbs
11 | 
12 | 
13 | @functools.singledispatch
14 | def check_dbconfig(db) -> str:
15 |     """A test functiontools overloading"""
16 |     raise Exception("Not expected to end up here in the test")
17 | 
18 | @check_dbconfig.register(dbs.SQLServerDB)
19 | def __(db: dbs.SQLServerDB) -> str:
20 |     return 'undefined'
21 | 
22 | @check_dbconfig.register(dbs.SqlcmdSQLServerDB)
23 | def __(db: dbs.SqlcmdSQLServerDB) -> str:
24 |     return 'sqlcmd'
25 | 
26 | @check_dbconfig.register(dbs.SqshSQLServerDB)
27 | def __(db: dbs.SqshSQLServerDB) -> str:
28 |     return 'sqsh'
29 | 
30 | 
31 | def test_mssql_dbconfig():
32 |     """Test the behavior of instancing SQLServerDB"""
33 | 
34 |     sqlcmd_db = dbs.SqlcmdSQLServerDB(host="localhost")
35 |     sqsh_db = dbs.SqshSQLServerDB(host="localhost")
36 |     default_db = dbs.SQLServerDB(host="localhost")
37 | 
38 |     # check if singledispatch uses the right class
39 |     assert check_dbconfig(sqlcmd_db) == "sqlcmd"
40 |     assert check_dbconfig(sqsh_db) == "sqsh"
41 |     assert check_dbconfig(default_db) in ["sqsh", "sqlcmd"]
42 | 
43 |     # check if all db config instances are detected via 'isinstance(..., SQLServerDB)'
44 |     assert isinstance(sqlcmd_db, dbs.SQLServerDB)
45 |     assert isinstance(sqsh_db, dbs.SQLServerDB)
46 |     assert isinstance(default_db, dbs.SQLServerDB)
47 | 
48 |     # check that we get 'SqlcmdSQLServerDB' or 'SqshSQLServerDB' when instantiating 'dbs.SQLServerDB(...)'
49 |     assert isinstance(default_db, dbs.SqlcmdSQLServerDB) or isinstance(default_db, dbs.SqshSQLServerDB)
50 | 


--------------------------------------------------------------------------------
/tests/db_test_helper.py:
--------------------------------------------------------------------------------
 1 | import sqlalchemy
 2 | from mara_db import dbs
 3 | 
 4 | 
 5 | def db_is_responsive(db: dbs.DB) -> bool:
 6 |     """Returns True when the DB is available on the given port, otherwise False"""
 7 |     engine = sqlalchemy.create_engine(db.sqlalchemy_url, pool_pre_ping=True)
 8 | 
 9 |     try:
10 |         with engine.connect() as conn:
11 |             return True
12 |     except:
13 |         return False
14 | 
15 | 
16 | def db_replace_placeholders(db: dbs.DB, docker_ip: str, docker_port: int) -> dbs.DB:
17 |     """Replaces the internal placeholders with the docker ip and docker port"""
18 |     if db.host == 'DOCKER_IP':
19 |         db.host = docker_ip
20 |     if db.port == -1:
21 |         db.port = docker_port
22 |     return db
23 | 
24 | 
25 | """
26 | Basic tests which can be used for different DB engines.
27 | """
28 | 
29 | def _test_sqlalchemy(db: dbs.DB):
30 |     """
31 |     A simple test to check if the SQLAlchemy connection works
32 |     """
33 |     from mara_db.sqlalchemy_engine import engine
34 |     from sqlalchemy import select
35 | 
36 |     eng = engine(db)
37 | 
38 |     with eng.connect() as conn:
39 |         # run a SELECT 1.   use a core select() so that
40 |         # the SELECT of a scalar value without a table is
41 |         # appropriately formatted for the backend
42 |         assert conn.scalar(select(1)) == 1
43 | 
44 | def _test_connect(db: dbs.DB):
45 |     connection = dbs.connect(db)
46 |     cursor = connection.cursor()
47 |     try:
48 |         cursor.execute('SELECT 1')
49 |         row = cursor.fetchone()
50 |         assert row[0] == 1
51 |         connection.commit()
52 |     except Exception as e:
53 |         connection.rollback()
54 |         raise e
55 |     finally:
56 |         cursor.close()
57 |         connection.close()
58 | 
59 | def _test_cursor_context(db: dbs.DB):
60 |     with dbs.cursor_context(db) as cursor:
61 |         cursor.execute('SELECT 1')
62 |         row = cursor.fetchone()
63 |         assert row[0] == 1
64 | 


--------------------------------------------------------------------------------
/mara_db/static/schema-page.js:
--------------------------------------------------------------------------------
 1 | var SchemaPage = function (baseUrl, dbAlias) {
 2 | 
 3 |     function localStorageKey(schema) {
 4 |         return 'db-schema-' + dbAlias + '-' + schema;
 5 |     }
 6 | 
 7 |     $('.schema-checkbox').each(function (n, checkbox) {
 8 |         if (localStorage.getItem(localStorageKey(checkbox.value)) == 'true') {
 9 |             checkbox.checked = true;
10 |         }
11 |     });
12 | 
13 |     if (localStorage.getItem('db-schema-hide-columns') == 'true') {
14 |         $('#hide-columns-checkbox')[0].checked = true;
15 |     }
16 | 
17 |     if (localStorage.getItem('db-schema-engine')) {
18 |         $('#engine').val(localStorage.getItem('db-schema-engine'));
19 |     }
20 | 
21 |     var url = '';
22 | 
23 |     function updateUI() {
24 |         var selectedSchemas = [];
25 |         $('.schema-checkbox').each(function (n, checkbox) {
26 |             if (checkbox.checked) {
27 |                 selectedSchemas.push(checkbox.value);
28 |             }
29 |             localStorage.setItem(localStorageKey(checkbox.value), checkbox.checked);
30 |         });
31 |         localStorage.setItem('db-schema-hide-columns', $('#hide-columns-checkbox')[0].checked);
32 |         localStorage.setItem('db-schema-engine', $('#engine').val());
33 | 
34 |         if (selectedSchemas.length > 0) {
35 |             $('#schema-container').html(spinner());
36 |             url = baseUrl + '/' + selectedSchemas.join('/') + '?engine=' + $('#engine').val();
37 |             if ($('#hide-columns-checkbox')[0].checked) {
38 |                 url += '&hide-columns=true'
39 |             }
40 |             loadContentAsynchronously('schema-container', url);
41 |         } else {
42 |             $('#schema-container').html('<i>No schemas selected</i>');
43 |         }
44 | 
45 |     }
46 | 
47 |     function downloadSvg() {
48 |         window.location.href = url;
49 |     }
50 | 
51 |     $('.schema-checkbox').change(updateUI);
52 |     $('#hide-columns-checkbox').change(updateUI);
53 |     $('#engine').change(updateUI);
54 | 
55 |     updateUI();
56 | 
57 |     return {'downloadSvg': downloadSvg}
58 | };
59 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Python Version
 5 | --------------
 6 | 
 7 | We recommend using the latest version of Python. Mara supports Python
 8 | 3.6 and newer.
 9 | 
10 | Dependencies
11 | ------------
12 | 
13 | These packages will be installed automatically when installing Mara DB.
14 | 
15 | * [SQLAlchemy] the Database SQL Toolkit and Object Relation Mapper (ORM) for python
16 | * [SQLAlchemy-Utils] various utility functions, new data types and helpers for SQLAlchemy
17 | * [Alembic] a lightweight database migration tool
18 | * [Multimethod] provides a decorator for adding multiple argument dispatching to functions
19 | * [Graphviz] facilitates the creation and rendering of graph descriptions in the [DOT](https://www.graphviz.org/doc/info/lang.html) language of the [Graphviz](https://www.graphviz.org/) graph drawing software from Python.
20 | * [Mara Page] mara core module for defining pages of Flask-based backends
21 | * [psycopg2-binary] required fro PostgreSQL database support
22 | 
23 | [SQLAlchemy]: https://www.sqlalchemy.org/
24 | [SQLAlchemy-Utils]: https://sqlalchemy-utils.readthedocs.io/
25 | [Alembic]: https://pygments.org/
26 | [Multimethod]: https://pypi.org/project/multimethod/
27 | [Graphviz]: https://graphviz.readthedocs.io/
28 | [Mara Page]: https://mara-page.readthedocs.io/
29 | [psycopg2-binary]: https://pypi.org/project/psycopg2-binary/
30 | 
31 | ```{warning}
32 | The package ``psycopg2-binary`` is planned to be removed as default requirement. When using PostgreSQL as database
33 | backend, please use extras ``postgres`` like `mara-db[postgres]` to make sure that the module gets installed.
34 | ```
35 | 
36 | Install Mara DB
37 | ---------------
38 | 
39 | To use the library directly, use pip:
40 | 
41 | ``` bash
42 | $ pip install mara-db
43 | ```
44 | 
45 | or
46 | 
47 | ``` bash
48 | $ pip install git+https://github.com/mara/mara-db.git
49 | ```
50 | 
51 | ```{note}
52 | For most of the database engines additional python packages are required which can be installed via extras.
53 | 
54 | For example, for PostgreSQL use
55 | 
56 | ``$ pip install mara-db[postgres]``
57 | 
58 | to make sure all additional required packages are installed.
59 | ```
60 | 


--------------------------------------------------------------------------------
/docs/dbs/Redshift.rst:
--------------------------------------------------------------------------------
 1 | Amazon Redshift
 2 | ===============
 3 | 
 4 | .. warning::
 5 |     From version 5 the package ``psycopg2-binary``` will not be installed by default anymore.
 6 |     Please make sure to include extras ``redshift`` in your requirements.txt file, see below.
 7 | 
 8 | 
 9 | Installation
10 | ------------
11 | 
12 | Use extras `redshift` to install all required packages.
13 | 
14 | .. code-block:: shell
15 | 
16 |     $ pip install mara-db[redshift]
17 | 
18 | The ``psql`` client is required which can be installed on Ubuntu/Debian via
19 | 
20 | .. code-block:: shell
21 | 
22 |     $ sudo apt-get install postgresql-client
23 | 
24 | To read from STDIN an additional S3 bucket is required as temp storage. You need to install the `awscli <https://pypi.org/project/awscli/>`_ package in addition:
25 | 
26 | .. code-block:: shell
27 | 
28 |     $ pip install awscli
29 | 
30 | 
31 | Configuration examples
32 | ----------------------
33 | 
34 | .. tabs::
35 | 
36 |     .. group-tab:: Default
37 | 
38 |         .. code-block:: python
39 | 
40 |             import mara_db.dbs
41 |             mara_db.config.databases = lambda: {
42 |                 'dwh': mara_db.dbs.RedshiftDB(
43 |                     host='localhost',
44 |                     user='root',
45 |                     password='<my_strong_password>',
46 |                     database='dwh'),
47 |             }
48 | 
49 |     .. group-tab:: With S3 bucket
50 | 
51 |         .. code-block:: python
52 | 
53 |             import mara_db.dbs
54 |             mara_db.config.databases = lambda: {
55 |                 'dwh': mara_db.dbs.RedshiftDB(
56 |                     host='localhost',
57 |                     user='root',
58 |                     password='<my_strong_password>',
59 |                     database='dwh',
60 |                     aws_access_key_id='...,
61 |                     aws_secret_access_key='...',
62 |                     =aws_s3_bucket_name='my-s3-bucket'),
63 |             }
64 | 
65 | |
66 | 
67 | |
68 | 
69 | API reference
70 | -------------
71 | 
72 | This section contains database specific API in the module.
73 | 
74 | 
75 | Configuration
76 | ~~~~~~~~~~~~~
77 | 
78 | .. module:: mara_db.dbs
79 |     :noindex:
80 | 
81 | .. autoclass:: RedshiftDB
82 |     :special-members: __init__
83 |     :inherited-members:
84 |     :members:
85 | 


--------------------------------------------------------------------------------
/docs/dbs/Snowflake.rst:
--------------------------------------------------------------------------------
 1 | Snowflake
 2 | =========
 3 | 
 4 | 
 5 | Installation
 6 | ------------
 7 | 
 8 | Use extras `snowflake` to install all required packages.
 9 | 
10 | .. code-block:: shell
11 | 
12 |     $ pip install mara-db[snowflake]
13 | 
14 | The official `snowsql` client is required. See the `Installing SnowSQL <https://docs.snowflake.com/en/user-guide/snowsql-install-config.html>`_ page for installation details.
15 | 
16 | 
17 | Configuration examples
18 | ----------------------
19 | 
20 | .. tabs::
21 | 
22 |     .. group-tab:: Use account
23 | 
24 |         .. code-block:: python
25 | 
26 |             import mara_db.dbs
27 |             mara_db.config.databases = lambda: {
28 |                 'dwh': mara_db.dbs.SnowflakeDB(
29 |                     account='kaXXXXX.regio.cloud',
30 |                     user='<user>',
31 |                     password='<my_strong_password>',
32 |                     database='dwh'),
33 |             }
34 | 
35 |     .. group-tab:: Private key file
36 | 
37 |         .. code-block:: python
38 | 
39 |             import mara_db.dbs
40 |             mara_db.config.databases = lambda: {
41 |                 'dwh': mara_db.dbs.BigQueryDB(
42 |                     account='kaXXXXX.regio.cloud',
43 |                     user='<user>',
44 |                     private_key_file='<path>/rsa_key.p8',
45 |                     private_key_passphrase='<passphrase>',
46 |                     database='dwh'),
47 |             }
48 | 
49 |     .. group-tab:: Local connection configuration
50 | 
51 |         You can configure a named connection in the snowsql config file. See `here <https://docs.snowflake.com/en/user-guide/snowsql-start.html#label-connecting-named-connection>`_.
52 | 
53 |         .. code-block:: python
54 | 
55 |             import mara_db.dbs
56 |             mara_db.config.databases = lambda: {
57 |                 'dwh': mara_db.dbs.BigQueryDB(
58 |                     connection='my_example_connection',
59 |                     database='dwh'),
60 |             }
61 | 
62 | |
63 | 
64 | |
65 | 
66 | API reference
67 | -------------
68 | 
69 | This section contains database specific API in the module.
70 | 
71 | 
72 | Configuration
73 | ~~~~~~~~~~~~~
74 | 
75 | .. module:: mara_db.dbs
76 |     :noindex:
77 | 
78 | .. autoclass:: SnowflakeDB
79 |     :special-members: __init__
80 |     :inherited-members:
81 |     :members:
82 | 


--------------------------------------------------------------------------------
/docs/dbs/Databricks.rst:
--------------------------------------------------------------------------------
 1 | Databricks
 2 | ==========
 3 | 
 4 | 
 5 | Installation
 6 | ------------
 7 | 
 8 | Use extras `databricks` to install all required packages.
 9 | 
10 | .. code-block:: shell
11 | 
12 |     $ pip install mara-db[databricks]
13 | 
14 | The official `dbsqlcli` client is required. See the `Install the Databricks SQL CLI <https://docs.databricks.com/dev-tools/databricks-sql-cli.html#install-the-databricks-sql-cli>`_ page for installation details.
15 | 
16 | 
17 | Configuration examples
18 | ----------------------
19 | 
20 | .. tabs::
21 | 
22 |     .. group-tab:: Use access token
23 | 
24 |         .. code-block:: python
25 | 
26 |             import mara_db.dbs
27 |             mara_db.config.databases = lambda: {
28 |                 'dwh': mara_db.dbs.DatabricksDB(
29 |                     host='dbc-a1b2345c-d6e78.cloud.databricks.com',
30 |                     http_path='/sql/1.0/warehouses/1abc2d3456e7f890a',
31 |                     access_token='dapi1234567890b2cd34ef5a67bc8de90fa12b'),
32 |             }
33 | 
34 |     .. group-tab:: Environment variables
35 | 
36 |         .. code-block:: python
37 | 
38 |             import mara_db.dbs
39 |             mara_db.config.databases = lambda: {
40 |                 'dwh': mara_db.dbs.DatabricksDB(),
41 |             }
42 | 
43 |         You need to define the environment variables `DBSQLCLI_HOST_NAME`, `DBSQLCLI_HTTP_PATH` and `DBSQLCLI_ACCESS_TOKEN`. See as well `Environment variables <https://docs.databricks.com/dev-tools/databricks-sql-cli.html#environment-variables>`_
44 | 
45 |     .. group-tab:: Settings file
46 | 
47 |         .. code-block:: python
48 | 
49 |             import mara_db.dbs
50 |             mara_db.config.databases = lambda: {
51 |                 'dwh': mara_db.dbs.DatabricksDB(),
52 |             }
53 | 
54 |         You need to define the database connection in the `dbsqlclirc` settings file. See as well `Settings file <https://docs.databricks.com/dev-tools/databricks-sql-cli.html#settings-file>`_. Note that using a custom settings file is currently not supported in Mara.
55 | 
56 | |
57 | 
58 | |
59 | 
60 | API reference
61 | -------------
62 | 
63 | This section contains database specific API in the module.
64 | 
65 | .. module:: mara_db.databricks
66 | 
67 | Configuration
68 | ~~~~~~~~~~~~~
69 | 
70 | .. module:: mara_db.dbs
71 |     :noindex:
72 | 
73 | .. autoclass:: DatabricksDB
74 |     :special-members: __init__
75 |     :inherited-members:
76 |     :members:
77 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'Mara DB'
21 | copyright = '2017-2022, Mara contributors'
22 | author = 'Mara contributors'
23 | 
24 | # The short X.Y version.
25 | from mara_db import __version__
26 | version = __version__
27 | # The full version, including alpha/beta/rc tags
28 | release = version
29 | 
30 | 
31 | # -- General configuration ---------------------------------------------------
32 | 
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
35 | # ones.
36 | extensions = [
37 |     'sphinx.ext.autodoc',
38 |     'sphinx_tabs.tabs',
39 |     'sphinxcontrib.napoleon',
40 |     'myst_parser',
41 | ]
42 | 
43 | # Add any paths that contain templates here, relative to this directory.
44 | templates_path = ['_templates']
45 | 
46 | # List of patterns, relative to source directory, that match files and
47 | # directories to ignore when looking for source files.
48 | # This pattern also affects html_static_path and html_extra_path.
49 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
50 | 
51 | 
52 | # -- Options for HTML output -------------------------------------------------
53 | 
54 | # The theme to use for HTML and HTML Help pages.  See the documentation for
55 | # a list of builtin themes.
56 | #
57 | html_theme = 'alabaster'
58 | 
59 | # Add any paths that contain custom static files (such as style sheets) here,
60 | # relative to this directory. They are copied after the builtin static files,
61 | # so a file named "default.css" will overwrite the builtin "default.css".
62 | html_static_path = ['_static']
63 | html_favicon = "_static/favicon.ico"
64 | html_logo = "_static/mara-animal.jpg"
65 | html_title = f"Mara DB Documentation ({version})"
66 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. rst-class:: hide-header
 2 | 
 3 | Mara DB documentation
 4 | =======================
 5 | 
 6 | Welcome to Mara DB’s documentation. This is one of the core modules of the `Mara Framework <https://github.com/mara>`_
 7 | for configuring and accessing multiple databases. Decouples the use of databases and their configuration by using "aliases" for databases.
 8 | 
 9 | The module ``mara_db.dbs`` contains abstract database configurations for various database backends. The database connections of a project
10 | are configured by overwriting the ``databases`` function in ``mara_db.config``.
11 | 
12 | .. code-block:: python
13 | 
14 |    import mara_db.config
15 |    import mara_db.dbs
16 | 
17 |    ## configure database connections for different aliases
18 |    mara_db.config.databases = lambda: {
19 |       'mara': mara_db.dbs.PostgreSQLDB(host='localhost', user='root', database='mara'),
20 |       'dwh': mara_db.dbs.PostgreSQLDB(database='dwh'),
21 |       'source-1': mara_db.dbs.MysqlDB(host='some-localhost', database='my_app', user='dwh'),
22 |       'source-2': mara_db.dbs.SQLServerDB(user='dwh_read', password='123abc', database='db1', host='some-sql-server')
23 |    }
24 | 
25 |    ## access individual database configurations with `dbs.db`:
26 |    print(mara_db.dbs.db('mara'))
27 |    # -> <PostgreSQLDB: host=localhost, database=mara>
28 | 
29 | 
30 | User's Guide
31 | ------------
32 | 
33 | This part of the documentation focuses on step-by-step instructions how to use this module.
34 | 
35 | .. toctree::
36 |    :maxdepth: 2
37 | 
38 |    installation
39 |    config
40 | 
41 | 
42 | Databases
43 | ---------
44 | 
45 | This section focuses on the supported database engines.
46 | 
47 | .. toctree::
48 |    :maxdepth: 2
49 | 
50 |    databases-overview
51 |    dbs/PostgreSQL
52 |    dbs/Redshift
53 |    dbs/BigQuery
54 |    dbs/Databricks
55 |    dbs/Oracle
56 |    dbs/SQLServer
57 |    dbs/Mysql
58 |    dbs/Snowflake
59 |    dbs/SQLite
60 | 
61 | 
62 | CLI commands
63 | ------------
64 | 
65 | When you are looking at available CLI commands, here you are at the right place.
66 | 
67 | .. toctree::
68 |    :maxdepth: 2
69 | 
70 |    cli
71 | 
72 | 
73 | API Reference
74 | -------------
75 | 
76 | If you are looking for information on a specific function, class or
77 | method, this part of the documentation is for you.
78 | 
79 | .. toctree::
80 |    :maxdepth: 2
81 | 
82 |    api
83 | 
84 | 
85 | Additional Notes
86 | ----------------
87 | 
88 | Legal information and changelog are here for the interested.
89 | 
90 | .. toctree::
91 |    :maxdepth: 2
92 | 
93 |    license
94 |    changes
95 | 


--------------------------------------------------------------------------------
/mara_db/formats.py:
--------------------------------------------------------------------------------
 1 | """Different formats for piping"""
 2 | from typing import Optional
 3 | 
 4 | 
 5 | class Format:
 6 |     """Base format definition"""
 7 | 
 8 |     def __repr__(self) -> str:
 9 |         return (f'<{self.__class__.__name__}: '
10 |                 + ', '.join([f'{var}={getattr(self, var)}'
11 |                              for var in vars(self) if getattr(self, var)])
12 |                 + '>')
13 | 
14 | 
15 | class NativeFormat(Format):
16 |     """Use the native format of e.g. a database."""
17 |     def __init__(self):
18 |         pass
19 | 
20 | 
21 | class CsvFormat(Format):
22 |     """
23 |     CSV file format. See https://tools.ietf.org/html/rfc4180
24 |     """
25 |     def __init__(self, delimiter_char: str = ',', quote_char: Optional[str] = None, header: bool = False, footer: bool = False, null_value_string: Optional[str] = None):
26 |         """
27 |         CSV file format. See https://tools.ietf.org/html/rfc4180
28 | 
29 |         Args:
30 |             delimiter_char: The character that separates columns
31 |             quote_char: The character for quoting strings
32 |             header: Whether a csv header with the column name(s) is part of the CSV file.
33 |             footer: Whether a footer will be included or not. False by default.
34 |             null_value_string: The string used to indicate NULL.
35 |         """
36 |         self.delimiter_char = delimiter_char or ','
37 |         self.quote_char = quote_char
38 |         self.header = header or False
39 |         self.footer = footer or False
40 |         self.null_value_string = null_value_string
41 | 
42 | 
43 | class JsonlFormat(Format):
44 |     """New line delimited JSON stream. See https://en.wikipedia.org/wiki/JSON_streaming"""
45 |     def __init__(self):
46 |         pass
47 | 
48 | 
49 | class AvroFormat(Format):
50 |     """Apache Avro"""
51 |     def __init__(self):
52 |         pass
53 | 
54 | 
55 | class ParquetFormat(Format):
56 |     """Apache Parquet"""
57 |     def __init__(self):
58 |         pass
59 | 
60 | 
61 | class OrcFormat(Format):
62 |     """Apache ORC"""
63 |     def __init__(self):
64 |         pass
65 | 
66 | 
67 | def _check_format_with_args_used(pipe_format: Format, header: Optional[bool] = None, footer: Optional[bool] = None, delimiter_char: Optional[str] = None,
68 |                                  csv_format: Optional[bool] = None, quote_char: Optional[str] = None, null_value_string: Optional[str] = None):
69 |     if pipe_format:
70 |         assert all(v is None for v in [header, footer, delimiter_char, csv_format, quote_char, null_value_string]), "You cannot pass format and an old parameter (header, footer, delimiter, csv_format, quote_char, null_value_string) at the same time"
71 | 
72 | 
73 | def _get_format_from_args(header: Optional[bool] = None, footer: Optional[bool] = None, delimiter_char: Optional[str] = None, csv_format: Optional[bool] = None,
74 |                           quote_char: Optional[str] = None, null_value_string: Optional[str] = None) -> Format:
75 |     """A internal method handling old parameter settings"""
76 |     if csv_format or delimiter_char and csv_format is None:
77 |         return CsvFormat(delimiter_char=delimiter_char,
78 |                          quote_char=quote_char,
79 |                          header=header,
80 |                          footer=footer,
81 |                          null_value_string=null_value_string)
82 |     else:
83 |         return NativeFormat()
84 | 


--------------------------------------------------------------------------------
/docs/dbs/BigQuery.rst:
--------------------------------------------------------------------------------
  1 | Google Big Query
  2 | ================
  3 | 
  4 | Optionally, for loading data from files into BigQuery, the `gcloud_gcs_bucket_name` can be specified in the database initialization.
  5 | This will use the Google Cloud Storage bucket specified as cache for loading data and over-coming potential limitations.
  6 | For more see `loading-data <https://cloud.google.com/bigquery/docs/bq-command-line-tool#loading_data>`_.
  7 | By default, files will directly loaded locally as described in `loading-local-data <https://cloud.google.com/bigquery/docs/loading-data-local#loading_data_from_a_local_data_source>`_.
  8 | 
  9 | Installation
 10 | ------------
 11 | 
 12 | Use extras `bigquery` to install all required packages.
 13 | 
 14 | .. code-block:: shell
 15 | 
 16 |     $ pip install mara-db[bigquery]
 17 | 
 18 | The official `bq` and `gcloud` clients are required.
 19 | See the `Google Cloud SDK <https://cloud.google.com/sdk/docs/quickstarts>`_ page for installation details.
 20 | 
 21 | Enabling the BigQuery API and Service account JSON credentials are also required as listed
 22 | in the official documentation `here <https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries#before-you-begin>`_.
 23 | 
 24 | One time authentication of the service-account used:
 25 | 
 26 | .. code-block:: bash
 27 | 
 28 |     $ gcloud auth activate-service-account --key-file='path-to/service-account.json'
 29 | 
 30 | To read from STDIN an additional Google Cloud Storage bucket is required as temp storage.
 31 | 
 32 | Configuration examples
 33 | ----------------------
 34 | 
 35 | .. tabs::
 36 | 
 37 |     .. group-tab:: Service account
 38 | 
 39 |         .. code-block:: python
 40 | 
 41 |             import mara_db.dbs
 42 |             mara_db.config.databases = lambda: {
 43 |                 'dwh': mara_db.dbs.BigQueryDB(
 44 |                     service_account_json_file_name='service-account.json',
 45 |                     location='EU',
 46 |                     project='my-project-name',
 47 |                     dataset='dwh'),
 48 |             }
 49 | 
 50 |     .. group-tab:: ... with GSC bucket
 51 | 
 52 |         .. code-block:: python
 53 | 
 54 |             import mara_db.dbs
 55 |             mara_db.config.databases = lambda: {
 56 |                 'dwh': mara_db.dbs.BigQueryDB(
 57 |                     service_account_json_file_name='service-account.json',
 58 |                     location='EU',
 59 |                     project='my-project-name',
 60 |                     dataset='dwh',
 61 |                     gcloud_gcs_bucket_name='my-temp-bucket'),
 62 |             }
 63 | 
 64 | |
 65 | 
 66 | |
 67 | 
 68 | API reference
 69 | -------------
 70 | 
 71 | This section contains database specific API in the module.
 72 | 
 73 | .. module:: mara_db.bigquery
 74 | 
 75 | Configuration
 76 | ~~~~~~~~~~~~~
 77 | 
 78 | .. module:: mara_db.dbs
 79 |     :noindex:
 80 | 
 81 | .. autoclass:: BigQueryDB
 82 |     :special-members: __init__
 83 |     :inherited-members:
 84 |     :members:
 85 | 
 86 | 
 87 | General helper functions
 88 | ~~~~~~~~~~~~~~~~~~~~~~~~
 89 | 
 90 | .. module:: mara_db.bigquery
 91 |     :noindex:
 92 | 
 93 | .. autofunction:: bigquery_credentials
 94 | 
 95 | .. autofunction:: bigquery_client
 96 | 
 97 | Data modelling helper functions
 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 99 | 
100 | .. module:: mara_db.bigquery
101 |     :noindex:
102 | 
103 | .. autofunction:: create_bigquery_table_from_postgresql_query
104 | 
105 | .. autofunction:: replace_dataset
106 | 


--------------------------------------------------------------------------------
/tests/mssql/test_mssql_sqsh.py_:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import pytest
 3 | import subprocess
 4 | import typing as t
 5 | 
 6 | from mara_db import dbs, shell
 7 | 
 8 | from ..command_helper import *
 9 | from ..db_test_helper import db_is_responsive
10 | from .test_mssql import MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE
11 | 
12 | # make sure that the tests of 'test_mssql' are run before the tests in this file:
13 | from .test_mssql import test_mssql_ddl
14 | 
15 | 
16 | @pytest.fixture(scope="session")
17 | def mssql_sqsh_db(docker_ip, docker_services) -> t.Tuple[str, int]:
18 |     """Ensures that SQL Server server is running on docker."""
19 | 
20 |     docker_port = docker_services.port_for("mssql", 1433)
21 |     db = dbs.SqshSQLServerDB(host=docker_ip, port=docker_port, user=MSSQL_USER, password=MSSQL_PASSWORD, database=MSSQL_DATABASE)
22 | 
23 |     # here we need to wait until the SQL Server port is available.
24 |     docker_services.wait_until_responsive(
25 |         timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db)
26 |     )
27 | 
28 |     return db
29 | 
30 | 
31 | @pytest.mark.dependency()
32 | def test_mssql_sqsh_shell_query_command(mssql_sqsh_db):
33 |     command = execute_sql_statement_command(mssql_sqsh_db, "SELECT 1")
34 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
35 |     print(pstdout)
36 |     assert exitcode == 0
37 | 
38 | 
39 | @pytest.mark.dependency()
40 | def test_mssql_sqsh_shell_copy_to_stout(mssql_sqsh_db):
41 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'")
42 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
43 |     print(pstdout)
44 |     assert exitcode == 0
45 |     assert pstdout == '''1,FOO
46 | 2,BAR'''
47 | 
48 | 
49 | @pytest.mark.dependency(depends=["test_mssql_sqsh_shell_query_command", "test_mssql_sqsh_shell_copy_to_stout"])
50 | def test_mssql_sqsh_shell_copy_from_stdin_csv_noheader(mssql_sqsh_db):
51 |     # reading csv file...
52 |     names_csv_file_path = str((pathlib.Path(__file__).parent / '../seed/names.csv').absolute())
53 |     command = f'cat {names_csv_file_path} \\\n'
54 |     command += '  | ' + shell.copy_from_stdin_command(mssql_sqsh_db,target_table='names',csv_format=True,skip_header=False)#,delimiter_char=',')
55 |     print(command)
56 | 
57 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
58 |     print(pstdout)
59 |     assert exitcode == 0
60 | 
61 |     # check if writing was successful
62 | 
63 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT COUNT(*) FROM names")
64 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
65 |     assert exitcode == 0
66 |     assert pstdout == "10"
67 | 
68 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT name FROM names WHERE id = 1")
69 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
70 |     assert exitcode == 0
71 |     assert pstdout == "Elinor Meklit"
72 | 
73 | 
74 | @pytest.mark.dependency(depends=["test_mssql_sqsh_shell_query_command", "test_mssql_sqsh_shell_copy_to_stout", "test_mssql_ddl"])
75 | def test_mssql_sqsh_shell_copy_from_stdin_csv_skipheader(mssql_sqsh_db):
76 |     # reading csv file...
77 |     names_csv_file_path = str((pathlib.Path(__file__).parent / '../seed/names_header.csv').absolute())
78 |     command = f'cat {names_csv_file_path} \\\n'
79 |     command += '  | ' + shell.copy_from_stdin_command(mssql_sqsh_db,target_table='names_with_header',csv_format=True,skip_header=True,delimiter_char=',')
80 |     print(command)
81 | 
82 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
83 |     print(pstdout)
84 |     assert exitcode == 0
85 | 
86 |     # check if writing was successful
87 | 
88 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT COUNT(*) FROM names_with_header")
89 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
90 |     assert exitcode == 0
91 |     assert pstdout == "10"
92 | 
93 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT name FROM names_with_header WHERE id = 1")
94 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
95 |     assert exitcode == 0
96 |     assert pstdout == "Elinor Meklit"
97 | 


--------------------------------------------------------------------------------
/docs/dbs/SQLServer.rst:
--------------------------------------------------------------------------------
  1 | Microsoft SQL Server
  2 | ====================
  3 | 
  4 | There are two ways to use SQL Server with mara:
  5 | 
  6 | 1. using the official MSSQL Tools for SQL Server on linux (`sqlcmd`, `bcp`)
  7 | 2. using the linux sql client tool `sqsh` (legacy)
  8 | 
  9 | Currently by default `sqsh` is used. This will be changed in a future version to the official MSSQL Tools from Microsoft. You can explicitly
 10 | specify the client tool you want to use, see below.
 11 | 
 12 | 
 13 | Prerequisites
 14 | -------------
 15 | 
 16 | On Ubuntu/Debian make sure you have the ODBC header files before installing
 17 | 
 18 | .. code-block:: shell
 19 | 
 20 |     $ sudo apt install unixodbc-dev
 21 | 
 22 | The python module `pyodbc <https://pypi.org/project/pyodbc/>`_ requires a ODBC driver to be installed. By default Microsoft ODBC Driver 17 for SQL Server is used. You can find the installation guide here:
 23 | `Installing the Microsoft ODBC Driver for SQL Server (Linux) <https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-ver15>`_.
 24 | 
 25 | 
 26 | Installation
 27 | ------------
 28 | 
 29 | Use extras `mssql` to install all required packages.
 30 | 
 31 | .. code-block:: shell
 32 | 
 33 |     $ pip install mara-db[mssql]
 34 | 
 35 | Use MSSQL Tools
 36 | ~~~~~~~~~~~~~~~
 37 | 
 38 | To see how to install the MSSQL Tools, follow this guide:
 39 | `Install the SQL Server command-line tools sqlcmd and bcp on Linux <https://docs.microsoft.com/en-us/sql/linux/sql-server-linux-setup-tools>`_
 40 | 
 41 | 
 42 | Use sqsh
 43 | ~~~~~~~~
 44 | To install the `sqsh` shell tool, see here https://sourceforge.net/projects/sqsh/. Usually messy to get working.
 45 | On ubuntu, use http://ppa.launchpad.net/jasc/sqsh/ubuntu/ backport. On Mac, try the homebrew version or install from source.
 46 | 
 47 | 
 48 | Configuration examples
 49 | ----------------------
 50 | 
 51 | .. tabs::
 52 | 
 53 |     .. group-tab:: Default
 54 | 
 55 |         .. code-block:: python
 56 | 
 57 |             import mara_db.dbs
 58 |             mara_db.config.databases = lambda: {
 59 |                 'dwh': mara_db.dbs.SQLServerDB(
 60 |                     host='localhost',
 61 |                     user='sa',
 62 |                     password='<my_strong_password>',
 63 |                     database='dwh'),
 64 |             }
 65 | 
 66 |             # explicitly define to use the MSSQL Tools (RECOMMENDED)
 67 |             mara_db.config.databases = lambda: {
 68 |                 'dwh': mara_db.dbs.SqlcmdSQLServerDB(
 69 |                     host='localhost',
 70 |                     user='sa',
 71 |                     password='<my_strong_password>',
 72 |                     database='dwh'),
 73 |             }
 74 | 
 75 |             # explicitly define to use sqsh
 76 |             mara_db.config.databases = lambda: {
 77 |                 'dwh': mara_db.dbs.SqshSQLServerDB(
 78 |                     host='localhost',
 79 |                     user='sa',
 80 |                     password='<my_strong_password>',
 81 |                     database='dwh'),
 82 |             }
 83 | 
 84 |     .. group-tab:: Use ODBC Driver 18
 85 | 
 86 |         .. code-block:: python
 87 | 
 88 |             import mara_db.dbs
 89 |             mara_db.config.databases = lambda: {
 90 |                 'dwh': mara_db.dbs.SQLServerDB(
 91 |                     host='localhost',
 92 |                     user='sa',
 93 |                     password='<my_strong_password>',
 94 |                     database='dwh',
 95 |                     odbc_driver='ODBC Driver 18 for SQL Server'),
 96 |             }
 97 | 
 98 |             # explicitly define to use the MSSQL Tools (RECOMMENDED)
 99 |             mara_db.config.databases = lambda: {
100 |                 'dwh': mara_db.dbs.SqlcmdSQLServerDB(
101 |                     host='localhost',
102 |                     user='sa',
103 |                     password='<my_strong_password>',
104 |                     database='dwh',
105 |                     odbc_driver='ODBC Driver 18 for SQL Server'),
106 |             }
107 | 
108 |             # explicitly define to use sqsh
109 |             mara_db.config.databases = lambda: {
110 |                 'dwh': mara_db.dbs.SqshSQLServerDB(
111 |                     host='localhost',
112 |                     user='sa',
113 |                     password='<my_strong_password>',
114 |                     database='dwh',
115 |                     odbc_driver='ODBC Driver 18 for SQL Server'),
116 |             }
117 | 
118 | |
119 | 
120 | |
121 | 
122 | API reference
123 | -------------
124 | 
125 | This section contains database specific API in the module.
126 | 
127 | .. module:: mara_db.sqlserver
128 | 
129 | Configuration
130 | ~~~~~~~~~~~~~
131 | 
132 | .. module:: mara_db.dbs
133 |     :noindex:
134 | 
135 | .. autoclass:: SQLServerDB
136 |     :special-members: __init__
137 |     :inherited-members:
138 |     :members:
139 | 
140 | .. autoclass:: SqlcmdSQLServerDB
141 |     :special-members: __init__
142 |     :inherited-members:
143 |     :members:
144 | 
145 | .. autoclass:: SqshSQLServerDB
146 |     :special-members: __init__
147 |     :inherited-members:
148 |     :members:
149 | 


--------------------------------------------------------------------------------
/docs/databases-overview.md:
--------------------------------------------------------------------------------
  1 | Overview
  2 | ========
  3 | 
  4 | The following database engines are supported:
  5 | 
  6 | | Database                  | Configuration class | SQLAlchemy Engine / dialect |
  7 | | ------------------------- | ------------------- | --------------------------- |
  8 | | [PostgreSQL]              | PostgreSQLDB        | postgresql+psycopg2
  9 | | [Amazon Redshift]         | RedshiftDB          | postgresql+psycopg2
 10 | | [Google Big Query]        | BigQueryDB          | bigquery
 11 | | [Databricks]              | DatabricksDB        | databricks+connector
 12 | | [MariaDB]                 | MysqlDB             | -
 13 | | [MySQL]                   | MysqlDB             | -
 14 | | [Microsoft SQL Server]    | SQLServerDB         | mssql+pyodbc
 15 | | [Azure Synapse Analytics] | SQLServerDB         | mssql+pyodbc
 16 | | [Oracle Database]         | OracleDB            | -
 17 | | [Snowflake]               | SnowflakeDB         | snowflake
 18 | | [SQLite]                  | SQLiteDB            | sqlite
 19 | 
 20 | 
 21 | [PostgreSQL]: https://www.postgresql.org/
 22 | [Amazon Redshift]: https://aws.amazon.com/de/redshift/
 23 | [Google Big Query]: https://cloud.google.com/bigquery
 24 | [Databricks]: https://www.databricks.com/
 25 | [MariaDB]: https://mariadb.com/
 26 | [MySQL]: https://www.mysql.com/
 27 | [Oracle Database]: https://www.oracle.com/database/
 28 | [Snowflake]: https://www.snowflake.com/
 29 | [SQLite]: https://www.sqlite.org/
 30 | [Microsoft SQL Server]: https://www.microsoft.com/en-us/sql-server
 31 | [Azure Synapse Analytics]: https://azure.microsoft.com/en-us/services/synapse-analytics/
 32 | 
 33 | 
 34 | Function support matrix
 35 | -----------------------
 36 | 
 37 | Shows which functions are supported with which database engine:
 38 | 
 39 | | Configuration class | Querying | Write STDOUT | Read STDIN | DB-API 2.0 | UI schema support |
 40 | | ------------------- | -------- | ------------ | ---------- | ---------- | ----------------- |
 41 | | PostgreSQLDB        | Yes      | Yes          | Yes        | Yes        | Yes
 42 | | RedshiftDB          | Yes      | Yes          | Yes        | Yes        | Yes
 43 | | BigQueryDB          | Yes      | Yes          | Yes        | Yes        | *no foreign key support by engine*
 44 | | DatabricksDB        | Yes      | Yes          | -          | Yes        |
 45 | | MysqlDB             | Yes      | Yes          | -          | Yes        | Yes
 46 | | SQLServerDB         | Yes      | Yes          | Yes        | Yes        | Yes
 47 | | OracleDB            | Yes      | Yes          | -          | -          |
 48 | | SnowflakeDB         | Yes      | Yes          | -          | -          |
 49 | | SQLiteDB            | Yes      | Yes          | -          | Yes        |
 50 | 
 51 | *Write STDOUT* gives the possibility to write a query to STDOUT
 52 | 
 53 | *Read STDIN* gives the possiblity to read a file to a predefined SQL table
 54 | 
 55 | 
 56 | Format support
 57 | --------------
 58 | 
 59 | Shows the formats supported per database engine
 60 | 
 61 | ### Read STDIN
 62 | 
 63 | | Configuration class | CSV | JsonL | Avro | Parquet | ORC |
 64 | | ------------------- | ----| ----- | ---- | ------- | --- |
 65 | | PostgreSQLDB        | Yes | Yes   | -    | -       | -   |
 66 | | RedshiftDB          | Yes | Yes   | -    | -       | -   |
 67 | | BigQueryDB          | Yes | Yes   | Yes  | Yes     | Yes |
 68 | | SQLServerDB         | Yes | -     | -    | -       | -   |
 69 | 
 70 | 
 71 | ### Write STDOUT
 72 | 
 73 | | Configuration class | CSV | JsonL | Avro | Parquet | ORC |
 74 | | ------------------- | ----| ----- | ---- | ------- | --- |
 75 | | PostgreSQLDB        | Yes | -     | -    | -       | -   |
 76 | | RedshiftDB          | Yes | -     | -    | -       | -   |
 77 | | BigQueryDB          | Yes | -     | -    | -       | -   |
 78 | | DatabricksDB        | Yes | -     | -    | -       | -   |
 79 | | MysqlDB             | Yes | -     | -    | -       | -   |
 80 | | SQLServerDB         | Yes | -     | -    | -       | -   |
 81 | | OracleDB            | Yes | -     | -    | -       | -   |
 82 | | SnowflakeDB         | Yes | -     | -    | -       | -   |
 83 | | SQLiteDB            | Yes | -     | -    | -       | -   |
 84 | 
 85 | 
 86 | Copy matrix
 87 | -----------
 88 | 
 89 | Shows which copy operations are implemented by default.
 90 | 
 91 | | from / to    | PostgreSQLDB | RedshiftDB | BigQueryDB | DatabricksDB | MysqlDB | SQLServerDB | OracleDB | SnowflakeDB | SQLiteDB |
 92 | | ------------ | ------------ | ---------- | ---------- | ------------ | ------- | ----------- | -------- | ----------- | -------- |
 93 | | PostgreSQLDB | Yes          | Yes        | Yes        | -            | -       | -           | -        | -           | -        |
 94 | | RedshiftDB   | Yes          | Yes        | Yes        | -            | -       | -           | -        | -           | -        |
 95 | | BigQueryDB   | Yes          | Yes        | -          | -            | -       | -           | -        | -           | -        |
 96 | | DatabricksDB | -            | -          | -          | -            | -       | -           | -        | -           | -        |
 97 | | MysqlDB      | Yes          | Yes        | Yes        | -            | -       | -           | -        | -           | -        |
 98 | | SQLServerDB  | Yes          | Yes        | Yes        | -            | -       | -           | -        | -           | -        |
 99 | | OracleDB     | Yes          | Yes        | Yes        | -            | -       | -           | -        | -           | -        |
100 | | SnowflakeDB  | -            | -          | -          | -            | -       | -           | -        | -           | -        |
101 | | SQLiteDB     | Yes          | Yes        | Yes        | -            | -       | -           | -        | -           | -        |
102 | 


--------------------------------------------------------------------------------
/tests/seed/accounts_lf.jsonl:
--------------------------------------------------------------------------------
1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"}
2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"}
3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"}
4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"}
5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"}
6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"}


--------------------------------------------------------------------------------
/tests/seed/accounts_crlf.jsonl:
--------------------------------------------------------------------------------
1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"}
2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"}
3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"}
4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"}
5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"}
6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"}


--------------------------------------------------------------------------------
/tests/seed/accounts_lf_lastrow.jsonl:
--------------------------------------------------------------------------------
1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"}
2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"}
3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"}
4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"}
5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"}
6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"}
7 | 


--------------------------------------------------------------------------------
/tests/seed/accounts_crlf_lastrow.jsonl:
--------------------------------------------------------------------------------
1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"}
2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"}
3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"}
4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"}
5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"}
6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"}
7 | 


--------------------------------------------------------------------------------
/mara_db/bigquery.py:
--------------------------------------------------------------------------------
  1 | """Easy access to BigQuery databases via google.cloud.bigquery"""
  2 | 
  3 | import typing
  4 | from warnings import warn
  5 | 
  6 | import mara_db.dbs
  7 | import sys
  8 | import time
  9 | from google.api_core.exceptions import BadRequest
 10 | 
 11 | 
 12 | def bigquery_credentials(db: typing.Union[str, mara_db.dbs.BigQueryDB]) -> 'google.oauth2.service_account.Credentials':
 13 |     """Get the parsed service account"""
 14 |     from google.oauth2.service_account import Credentials
 15 | 
 16 |     if isinstance(db, str):
 17 |         db = mara_db.dbs.db(db)
 18 | 
 19 |     return Credentials.from_service_account_file(db.service_account_json_file_name)
 20 | 
 21 | 
 22 | def bigquery_client(db: typing.Union[str, mara_db.dbs.BigQueryDB]) -> 'google.cloud.bigquery.client.Client':
 23 |     """Get an bigquery client for a bq database alias"""
 24 |     from google.cloud.bigquery.client import Client
 25 | 
 26 |     if isinstance(db, str):
 27 |         db = mara_db.dbs.db(db)
 28 | 
 29 |     credentials = bigquery_credentials(db)
 30 | 
 31 |     return Client(project=credentials.project_id, credentials=credentials, location=db.location)
 32 | 
 33 | 
 34 | def bigquery_cursor_context(db: typing.Union[str, mara_db.dbs.BigQueryDB]) \
 35 |         -> 'google.cloud.bigquery.dbapi.cursor.Cursor':
 36 |     """Creates a context with a bigquery cursor for a database alias"""
 37 |     warn('Function bigquery_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.',
 38 |          category=DeprecationWarning)
 39 | 
 40 |     if isinstance(db, str):
 41 |         db = mara_db.dbs.db(db)
 42 | 
 43 |     assert (isinstance(db, mara_db.dbs.BigQueryDB))
 44 | 
 45 |     return mara_db.dbs.cursor_context(db)
 46 | 
 47 | 
 48 | def create_bigquery_table_from_postgresql_query(
 49 |         postgresql_query: str, postgresql_db_alias: str,
 50 |         bigquery_db_alias: str, bigquery_dataset_id: str, bigquery_table_name: str):
 51 |     """
 52 |     Creates a table for bigquery from a Postgresql SELECT query. Will print the query
 53 | 
 54 |     Useful for copying PostgreSQL tables to BigQuery (create table first and then copy)
 55 | 
 56 |     Example:
 57 |         >>> create_bigquery_table_from_postgresql_query(
 58 |         >>>              postgresql_db_alias='dwh',
 59 |         >>>              postgresql_query='SELECT 1::SMALLINT AS a, now() as b',
 60 |         >>>              bigquery_db_alias='reporting',
 61 |         >>>              bigquery_dataset_id='foo',
 62 |         >>>              bigquery_table_name='bar')
 63 |         CREATE OR REPLACE TABLE `foo`.`bar` (
 64 |             `a` INT64,
 65 |             `b` TIMESTAMP
 66 |         )
 67 | 
 68 |     Args:
 69 |         postgresql_query: The query to execute in PostgreSQL, must not end with a semicolon
 70 |         postgresql_db_alias: The postgresql database to execute the query in
 71 |         bigquery_db_alias: The mara db alias of the bigquery connection
 72 |         bigquery_dataset_id: The id of the bigquery dataset in which the table is to be created
 73 |         bigquery_table_name: The name of the to be created table
 74 |     """
 75 |     from mara_db.postgresql import postgres_cursor_context
 76 |     with mara_db.postgresql.postgres_cursor_context(postgresql_db_alias) as cursor:
 77 |         cursor.execute('SELECT oid, typname FROM pg_type;')
 78 |         pg_types = {}
 79 |         for oid, type_name in cursor.fetchall():
 80 |             pg_types[oid] = type_name
 81 | 
 82 |         # https://cloud.google.com/bigquery/docs/reference/standard-sql/federated_query_functions#postgressql_mapping
 83 |         pg_to_bigquery_type_mapping = {
 84 |             'bool': 'BOOL',
 85 |             'bytea': 'BYTES',
 86 |             'date': 'DATE',
 87 |             'int2': 'INT64',
 88 |             'int4': 'INT64',
 89 |             'int8': 'INT64',
 90 |             'json': 'STRING',
 91 |             'jsonb': 'STRING',
 92 |             'numeric': 'NUMERIC',
 93 |             'float4': 'FLOAT64',
 94 |             'float8': 'FLOAT64',
 95 |             'varchar': 'STRING',
 96 |             'text': 'STRING',
 97 |             'time': 'TIME',
 98 |             'timestamp': 'DATETIME',
 99 |             'timestamptz': 'TIMESTAMP',
100 |         }
101 | 
102 |         cursor.execute(postgresql_query + ' LIMIT 0')
103 | 
104 |         column_specs = []
105 |         for column in cursor.description:
106 |             pg_type = pg_types[column.type_code]
107 |             assert pg_type in pg_to_bigquery_type_mapping, f"Unmapped type '{pg_type}'"
108 |             column_specs.append(f'`{column.name}` {pg_to_bigquery_type_mapping[pg_type]}')
109 | 
110 |         query = f"""
111 | CREATE OR REPLACE TABLE `{bigquery_dataset_id}`.`{bigquery_table_name}` (
112 |     """ + ',\n    '.join(column_specs) + "\n)"
113 | 
114 |         print(query)
115 | 
116 |         client = bigquery_client(bigquery_db_alias)
117 |         client.query(query)
118 | 
119 | 
120 | def replace_dataset(db_alias: str, dataset_id: str, next_dataset_id: str):
121 |     """
122 |     Replaces the a bigquery dataset with the contents of another one.
123 | 
124 |     Args:
125 |         db_alias: the mara db alias of the bigquery connection
126 |         dataset_id: the dataset that will be replaced
127 |         nextdata_set_id: the contents of the new dataset
128 |     """
129 |     print(f'replacing dataset `{dataset_id}` with contents of `{next_dataset_id}`')
130 |     from mara_db.bigquery import bigquery_client
131 | 
132 |     client = bigquery_client(db_alias)
133 | 
134 |     # create target dataset if not exists
135 |     client.create_dataset(dataset=dataset_id, exists_ok=True)
136 | 
137 |     # all tables in the next dataset
138 |     next_tables = set([table.table_id for table in client.list_tables(next_dataset_id)])
139 | 
140 |     ddl = '\n'
141 | 
142 |     # delete tables in target dataset that are not in next dataset
143 |     for table in client.list_tables(dataset_id):
144 |         if table.table_id not in next_tables:
145 |             ddl += f'DROP TABLE `{dataset_id}`.`{table.table_id}`; \n'
146 | 
147 |     # hopefully atomic operation
148 |     for table_id in next_tables:
149 |         ddl += f'CREATE OR REPLACE TABLE `{dataset_id}`.`{table_id}` AS SELECT * FROM `{next_dataset_id}`.`{table_id}`;\n'
150 |         ddl += f'DROP TABLE `{next_dataset_id}`.`{table_id}`;\n'
151 | 
152 |     print(ddl)
153 |     client.query(ddl)
154 | 
155 |     print(f'deleting dataset {next_dataset_id}')
156 |     retries = 1
157 |     # for some reason the 'DROP TABLE ...'  statements take some time, retry the data set deletion
158 |     while True:
159 |         try:
160 |             client.delete_dataset(next_dataset_id)
161 |             return
162 |         except BadRequest as e:
163 |             if retries <= 10:
164 |                 print(e, file=sys.stderr)
165 |                 seconds_to_sleep = retries * 4
166 |                 print(f'Waiting {seconds_to_sleep} seconds')
167 |                 time.sleep(seconds_to_sleep)
168 |                 retries += 1
169 |             else:
170 |                 raise e
171 | 


--------------------------------------------------------------------------------
/mara_db/auto_migration.py:
--------------------------------------------------------------------------------
  1 | """Auto-migration of sql alchemy models with alembic. Use with care"""
  2 | 
  3 | import copy
  4 | import io
  5 | import sys
  6 | import typing
  7 | 
  8 | import sqlalchemy.engine
  9 | import sqlalchemy.sql.schema
 10 | from sqlalchemy import *  # unfortunately needed to get the eval part further down working
 11 | # noinspection PyUnresolvedReferences
 12 | from sqlalchemy.dialects import *  # unfortunately needed to get the eval part further down working
 13 | 
 14 | import mara_db.dbs
 15 | from .sqlalchemy_engine import engine
 16 | 
 17 | 
 18 | def auto_migrate(engine: sqlalchemy.engine.Engine, models: typing.List[sqlalchemy.sql.schema.MetaData]):
 19 |     """
 20 |     Compares a database with a list of defined orm models and applies the diff. Prints executed SQL statements to stdout.
 21 | 
 22 |     Based on `alembic automigrations`_, but doesn't require intermediate migration files.
 23 | 
 24 |     Use with care, does not work in many cases.
 25 | 
 26 |     Args:
 27 |         engine: the database to use
 28 |         models: A list of orm models
 29 | 
 30 |     Returns:
 31 |         True in case of no failures
 32 | 
 33 |     .. _alembic automigrations:
 34 |         http://alembic.zzzcomputing.com/en/latest/autogenerate.html
 35 |     """
 36 |     import alembic.runtime.migration
 37 |     import alembic.autogenerate
 38 |     import sqlalchemy_utils
 39 | 
 40 |     try:
 41 |         # create database if it does not exist
 42 |         if not sqlalchemy_utils.database_exists(engine.url):
 43 |             sqlalchemy_utils.create_database(engine.url)
 44 |             print(f'Created database "{engine.url!r}"\n')
 45 |     except Exception as e:
 46 |         print(f'Could not access or create database "{engine.url!r}":\n{e}', file=sys.stderr)
 47 |         return False
 48 | 
 49 |     # merge all models into a single metadata object
 50 |     combined_meta_data = MetaData()
 51 |     for model in models:
 52 |         model.metadata.tables[model.__tablename__].tometadata(combined_meta_data)
 53 | 
 54 |     # create diff between models and current db and translate to ddl
 55 |     ddl = []
 56 |     with engine.connect() as connection:
 57 |         output = io.StringIO()
 58 | 
 59 |         diff_context = alembic.runtime.migration.MigrationContext(connection.dialect, connection, opts={})
 60 | 
 61 |         autogen_context = alembic.autogenerate.api.AutogenContext(diff_context,
 62 |                                                                   opts={'sqlalchemy_module_prefix': 'sqlalchemy.',
 63 |                                                                         'alembic_module_prefix': 'executor.'})
 64 | 
 65 |         execution_context = alembic.runtime.migration.MigrationContext(connection.dialect, connection,
 66 |                                                                        opts={'output_buffer': output, 'as_sql': True})
 67 | 
 68 |         # needed for the eval below
 69 |         executor = alembic.operations.Operations(execution_context)
 70 | 
 71 |         # Step 1: create a diff between the meta data and the data base
 72 |         # operations is a list of MigrateOperation instances, e.g. a DropTableOp
 73 |         operations = alembic.autogenerate.produce_migrations(diff_context, combined_meta_data).upgrade_ops.ops
 74 | 
 75 |         for operation in operations:
 76 |             # Step 2: autogenerate a python statement from the operation, e.g. "executor.drop_table('bar')"
 77 |             renderer = alembic.autogenerate.renderers.dispatch(operation)
 78 |             statements = renderer(autogen_context, operation)
 79 |             if not isinstance(statements, list):
 80 |                 statements = [statements]
 81 | 
 82 |             for statement in statements:
 83 |                 # Step 3: "execute" python statement and get sql from buffer, e.g. "DROP TABLE bar;"
 84 |                 try:
 85 |                     eval(statement)
 86 |                 except Exception as e:
 87 |                     print('statement: ' + statement)
 88 |                     raise (e)
 89 |                 ddl.append(output.getvalue())
 90 |                 output.truncate(0)
 91 |                 output.seek(0)
 92 | 
 93 |     with engine.begin() as connection:
 94 |         for statement in ddl:
 95 |             sys.stdout.write('\033[1;32m' + statement + '\033[0;0m')
 96 |             connection.execute(statement)
 97 | 
 98 |     return True
 99 | 
100 | 
101 | def auto_discover_models_and_migrate() -> bool:
102 |     """
103 |     Auto-migrates all sqlalchemy models that been marked for auto-migration database with the alias 'mara'.
104 | 
105 |     Models are marked for auto-migration by being put into a module-level `MARA_AUTOMIGRATE_SQLALCHEMY_MODELS`
106 |     variable. E.g.
107 | 
108 |         MARA_AUTOMIGRATE_SQLALCHEMY_MODELS = [MyModel]
109 | 
110 |     For this, all modules that contain sqlalchemy models need to be loaded first
111 | 
112 |     Returns:
113 |         True when no failure happened
114 |     """
115 |     models = []
116 |     for name, module in copy.copy(sys.modules).items():
117 |         if 'MARA_AUTOMIGRATE_SQLALCHEMY_MODELS' in dir(module):
118 |             module_models = getattr(module, 'MARA_AUTOMIGRATE_SQLALCHEMY_MODELS')
119 |             if isinstance(module_models, typing.Callable):
120 |                 module_models = module_models()
121 |             if isinstance(models, typing.Dict):
122 |                 module_models = module_models.values()
123 |             assert (isinstance(module_models, typing.Iterable))
124 |             models += module_models
125 |     return auto_migrate(engine('mara'), models)
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     # Example
130 |     import sqlalchemy.ext.declarative
131 |     import tempfile
132 |     import pathlib
133 | 
134 |     with tempfile.TemporaryDirectory() as dir:
135 |         db = mara_db.dbs.SQLiteDB(file_name=pathlib.Path(dir) / 'test.sqlite')
136 | 
137 | 
138 |         # define a model / table
139 |         class MyTable(sqlalchemy.ext.declarative.declarative_base()):
140 |             __tablename__ = 'my_table'
141 |             my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
142 |             column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False, index=True)
143 | 
144 | 
145 |         auto_migrate(engine=engine(db), models=[MyTable])
146 | 
147 | 
148 |         # ->
149 |         # Created database "sqlite:////var/folders/gg/8117h7rj08zd9rpt55l315_1xx044y/T/tmpl_sdop4j/test.sqlite"
150 |         #
151 |         # CREATE TABLE my_table (
152 |         #     my_table_id SERIAL NOT NULL,
153 |         #     column_1 TEXT NOT NULL,
154 |         #     PRIMARY KEY (my_table_id)
155 |         # );
156 |         #
157 |         # CREATE INDEX ix_my_table_column_1 ON my_table (column_1);
158 | 
159 |         # remove index and add another column
160 |         class MyTable(sqlalchemy.ext.declarative.declarative_base()):
161 |             __tablename__ = 'my_table'
162 |             my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
163 |             column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False)
164 |             column_2 = sqlalchemy.Column(sqlalchemy.Integer)
165 | 
166 | 
167 |         auto_migrate(engine=engine(db), models=[MyTable])
168 |         # ->
169 |         # ALTER TABLE my_table ADD COLUMN column_2 INTEGER;
170 |         #
171 |         # DROP INDEX ix_my_table_text_column_1;
172 | 


--------------------------------------------------------------------------------
/tests/mssql/test_mssql.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import pytest
  3 | import subprocess
  4 | import typing as t
  5 | 
  6 | from mara_db import dbs
  7 | 
  8 | from ..command_helper import *
  9 | from ..db_test_helper import db_is_responsive, db_replace_placeholders
 10 | from ..local_config import MSSQL_DB, MSSQL_SQSH_DB, MSSQL_SQLCMD_DB
 11 | 
 12 | 
 13 | if not MSSQL_DB:
 14 |     pytest.skip("skipping SQLServerDB tests: variable MSSQL_DB not set", allow_module_level=True)
 15 | 
 16 | 
 17 | @pytest.fixture(scope="session")
 18 | def mssql_db(docker_ip, docker_services) -> t.Tuple[str, int]:
 19 |     """Ensures that SQL Server server is running on docker."""
 20 | 
 21 |     docker_port = docker_services.port_for("mssql", 1433)
 22 |     db = db_replace_placeholders(MSSQL_DB, docker_ip, docker_port)
 23 | 
 24 |     # here we need to wait until the SQL Server port is available.
 25 |     docker_services.wait_until_responsive(
 26 |         timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db)
 27 |     )
 28 | 
 29 |     return db
 30 | 
 31 | 
 32 | @pytest.mark.dependency()
 33 | def test_mssql_shell_query_command(mssql_db):
 34 |     command = execute_sql_statement_command(mssql_db, "SELECT 1")
 35 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 36 |     print(pstdout)
 37 |     assert exitcode == 0
 38 | 
 39 | 
 40 | @pytest.mark.dependency(depends=['test_mssql_shell_query_command'])
 41 | def test_mssql_ddl(mssql_db):
 42 |     """Creates DDL scripts required for other tests"""
 43 |     # run 'test_mssql_ddl.sql'
 44 |     ddl_file_path = str((pathlib.Path(__file__).parent / 'test_mssql_ddl.sql').absolute())
 45 |     command = execute_sql_file_command(mssql_db, ddl_file_path)
 46 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 47 |     print(pstdout)
 48 |     assert exitcode == 0
 49 | 
 50 | 
 51 | def test_mssql_sqlalchemy(mssql_db):
 52 |     """
 53 |     A simple test to check if the SQLAlchemy connection works
 54 |     """
 55 |     from ..db_test_helper import _test_sqlalchemy
 56 |     _test_sqlalchemy(mssql_db)
 57 | 
 58 | 
 59 | def test_mssql_connect(mssql_db):
 60 |     """
 61 |     A simple test to check if the connect API works.
 62 |     """
 63 |     from ..db_test_helper import _test_connect
 64 |     _test_connect(mssql_db)
 65 | 
 66 | 
 67 | def test_mssql_cursor_context(mssql_db):
 68 |     """
 69 |     A simple test to check if the cursor context of the db works.
 70 |     """
 71 |     from ..db_test_helper import _test_cursor_context
 72 |     _test_cursor_context(mssql_db)
 73 | 
 74 | 
 75 | 
 76 | """
 77 | #################################################################################################################################
 78 | # Tests specific to sqsh
 79 | """
 80 | 
 81 | @pytest.fixture(scope="session")
 82 | def mssql_sqsh_db(docker_ip, docker_services) -> t.Tuple[str, int]:
 83 |     """Ensures that SQL Server server is running on docker."""
 84 | 
 85 |     docker_port = docker_services.port_for("mssql", 1433)
 86 |     db = db_replace_placeholders(MSSQL_SQSH_DB, docker_ip, docker_port)
 87 | 
 88 |     # here we need to wait until the SQL Server port is available.
 89 |     docker_services.wait_until_responsive(
 90 |         timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db)
 91 |     )
 92 | 
 93 |     return db
 94 | 
 95 | 
 96 | @pytest.mark.dependency()
 97 | def test_mssql_sqsh_shell_query_command(mssql_sqsh_db):
 98 |     command = execute_sql_statement_command(mssql_sqsh_db, "SELECT 1")
 99 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
100 |     print(pstdout)
101 |     assert exitcode == 0
102 | 
103 | 
104 | @pytest.mark.dependency()
105 | def test_mssql_sqsh_shell_copy_to_stout(mssql_sqsh_db):
106 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'")
107 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
108 |     print(pstdout)
109 |     assert exitcode == 0
110 |     assert pstdout == '''Col1,Col2
111 | 1,"FOO"
112 | 2,"BAR"'''
113 | 
114 | 
115 | 
116 | """
117 | #################################################################################################################################
118 | # Tests specific to sqlcmd
119 | """
120 | 
121 | @pytest.fixture(scope="session")
122 | def mssql_sqlcmd_db(docker_ip, docker_services) -> t.Tuple[str, int]:
123 |     """Ensures that SQL Server server is running on docker."""
124 | 
125 |     docker_port = docker_services.port_for("mssql", 1433)
126 |     db = db_replace_placeholders(MSSQL_SQLCMD_DB, docker_ip, docker_port)
127 | 
128 |     # here we need to wait until the SQL Server port is available.
129 |     docker_services.wait_until_responsive(
130 |         timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db)
131 |     )
132 | 
133 |     return db
134 | 
135 | 
136 | @pytest.mark.dependency()
137 | def test_mssql_sqlcmd_shell_query_command(mssql_sqlcmd_db):
138 |     command = execute_sql_statement_command(mssql_sqlcmd_db, "SELECT 1")
139 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
140 |     print(pstdout)
141 |     assert exitcode == 0
142 | 
143 | 
144 | @pytest.mark.dependency()
145 | def test_mssql_sqlcmd_shell_copy_to_stout(mssql_sqlcmd_db):
146 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'")
147 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
148 |     print(pstdout)
149 |     assert exitcode == 0
150 |     assert pstdout == '''1,FOO
151 | 2,BAR'''
152 | 
153 | 
154 | @pytest.mark.dependency(depends=["test_mssql_sqlcmd_shell_query_command", "test_mssql_sqlcmd_shell_copy_to_stout", "test_mssql_ddl"])
155 | @pytest.mark.parametrize(
156 |     "seed_file",
157 |     [
158 |         "names_lf_lastrow.csv",
159 |         "names_crlf_lastrow.csv",
160 |         # BCP only supports unquited, last row ending files
161 |     ]
162 | )
163 | def test_mssql_sqlcmd_shell_copy_from_stdin_csv_noheader(mssql_sqlcmd_db, seed_file):
164 |     # delete rows from table, make sure that the last matrix test does not mess up this test
165 |     command = execute_sql_statement_command(mssql_sqlcmd_db, "DELETE FROM names")
166 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
167 |     assert exitcode == 0
168 | 
169 |     # reading csv file...
170 |     names_csv_file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute())
171 |     command = f'cat {names_csv_file_path} \\\n'
172 |     command += '  | ' + shell.copy_from_stdin_command(mssql_sqlcmd_db,target_table='names',csv_format=True,skip_header=False,delimiter_char=',')
173 |     print(command)
174 | 
175 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
176 |     print(pstdout)
177 |     assert exitcode == 0
178 | 
179 |     # check if writing was successful
180 | 
181 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT COUNT(*) FROM names")
182 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
183 |     assert exitcode == 0
184 |     assert pstdout == "10"
185 | 
186 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT name FROM names WHERE id = 1")
187 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
188 |     assert exitcode == 0
189 |     assert pstdout == "Elinor Meklit"
190 | 
191 | 
192 | @pytest.mark.dependency(depends=["test_mssql_sqlcmd_shell_query_command", "test_mssql_sqlcmd_shell_copy_to_stout", "test_mssql_ddl"])
193 | @pytest.mark.parametrize(
194 |     "seed_file",
195 |     [
196 |         "names_lf_lastrow_header.csv",
197 |         "names_crlf_lastrow_header.csv",
198 |         # BCP only supports unquited, last row ending files
199 |     ]
200 | )
201 | def test_mssql_sqlcmd_shell_copy_from_stdin_csv_skipheader(mssql_sqlcmd_db, seed_file):
202 |     # delete rows from table, make sure that the last matrix test does not mess up this test
203 |     command = execute_sql_statement_command(mssql_sqlcmd_db, "DELETE FROM names_with_header")
204 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
205 |     assert exitcode == 0
206 | 
207 |     # reading csv file...
208 |     names_csv_file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute())
209 |     command = f'cat {names_csv_file_path} \\\n'
210 |     command += '  | ' + shell.copy_from_stdin_command(mssql_sqlcmd_db,target_table='names_with_header',csv_format=True,skip_header=True,delimiter_char=',')
211 |     print(command)
212 | 
213 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
214 |     print(pstdout)
215 |     assert exitcode == 0
216 | 
217 |     # check if writing was successful
218 | 
219 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT COUNT(*) FROM names_with_header")
220 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
221 |     assert exitcode == 0
222 |     assert pstdout == "10"
223 | 
224 |     command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT name FROM names_with_header WHERE id = 1")
225 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
226 |     assert exitcode == 0
227 |     assert pstdout == "Elinor Meklit"
228 | 


--------------------------------------------------------------------------------
/tests/postgres/test_postgres.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import typing as t
  3 | import subprocess
  4 | import pathlib
  5 | 
  6 | from mara_db import shell, formats
  7 | 
  8 | from ..command_helper import *
  9 | from ..db_test_helper import db_is_responsive, db_replace_placeholders
 10 | from ..local_config import POSTGRES_DB
 11 | 
 12 | 
 13 | if not POSTGRES_DB:
 14 |     pytest.skip("skipping PostgreSQL tests: variable POSTGRES_DB not set", allow_module_level=True)
 15 | 
 16 | 
 17 | @pytest.fixture(scope="session")
 18 | def postgres_db(docker_ip, docker_services) -> t.Tuple[str, int]:
 19 |     """Ensures that PostgreSQL server is running on docker."""
 20 | 
 21 |     docker_port = docker_services.port_for("postgres", 5432)
 22 |     db = db_replace_placeholders(POSTGRES_DB, docker_ip, docker_port)
 23 | 
 24 |     # here we need to wait until the PostgreSQL port is available.
 25 |     docker_services.wait_until_responsive(
 26 |         timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db)
 27 |     )
 28 | 
 29 |     return db
 30 | 
 31 | 
 32 | @pytest.mark.dependency()
 33 | def test_postgres_shell_query_command(postgres_db):
 34 |     command = execute_sql_statement_command(postgres_db, "SELECT 1")
 35 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 36 |     print(pstdout)
 37 |     assert exitcode == 0
 38 | 
 39 | 
 40 | @pytest.mark.dependency()
 41 | def test_postgres_shell_copy_to_stout(postgres_db):
 42 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'")
 43 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 44 |     print(pstdout)
 45 |     assert exitcode == 0
 46 |     assert pstdout == '''1,FOO
 47 | 2,BAR'''
 48 | 
 49 | 
 50 | @pytest.mark.dependency()
 51 | def test_postgres_ddl(postgres_db):
 52 |     """Creates DDL scripts required for other tests"""
 53 |     # run 'test_postgres_ddl.sql'
 54 |     ddl_file_path = str((pathlib.Path(__file__).parent / 'test_postgres_ddl.sql').absolute())
 55 |     command = execute_sql_file_command(postgres_db, ddl_file_path)
 56 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 57 |     print(pstdout)
 58 |     assert exitcode == 0
 59 | 
 60 | 
 61 | @pytest.mark.dependency(depends=["test_postgres_shell_query_command", "test_postgres_shell_copy_to_stout", "test_postgres_ddl"])
 62 | @pytest.mark.parametrize(
 63 |     "seed_file",
 64 |     [
 65 |         "names_crlf_lastrow.csv",
 66 |         "names_crlf_quoted_lastrow.csv",
 67 |         "names_crlf_quoted.csv",
 68 |         "names_crlf.csv",
 69 |         "names_lf_lastrow.csv",
 70 |         "names_lf_quoted_lastrow.csv",
 71 |         "names_lf_quoted.csv",
 72 |         "names_lf.csv",
 73 |     ]
 74 | )
 75 | def test_postgres_shell_copy_from_stdin_csv_noheader(postgres_db, seed_file):
 76 |     # delete rows from table, make sure that the last matrix test does not mess up this test
 77 |     command = execute_sql_statement_command(postgres_db, "DELETE FROM names")
 78 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 79 |     assert exitcode == 0
 80 | 
 81 |     # reading csv file...
 82 |     file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute())
 83 |     command = f'cat {file_path} \\\n'
 84 |     command += '  | ' + shell.copy_from_stdin_command(postgres_db,target_table='names',
 85 |                             pipe_format=formats.CsvFormat(header=False, delimiter_char=','))
 86 |     print(command)
 87 | 
 88 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 89 |     print(pstdout)
 90 |     assert exitcode == 0
 91 | 
 92 |     # check if writing was successful
 93 | 
 94 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM names")
 95 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
 96 |     assert exitcode == 0
 97 |     assert pstdout == "10"
 98 | 
 99 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT name FROM names WHERE id = 1")
100 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
101 |     assert exitcode == 0
102 |     assert pstdout == "Elinor Meklit"
103 | 
104 | 
105 | @pytest.mark.dependency(depends=["test_postgres_shell_query_command", "test_postgres_shell_copy_to_stout", "test_postgres_ddl"])
106 | @pytest.mark.parametrize(
107 |     "seed_file",
108 |     [
109 |         "names_crlf_lastrow_header.csv",
110 |         "names_crlf_quoted_lastrow_header.csv",
111 |         "names_crlf_quoted_header.csv",
112 |         "names_crlf_header.csv",
113 |         "names_lf_lastrow_header.csv",
114 |         "names_lf_quoted_lastrow_header.csv",
115 |         "names_lf_quoted_header.csv",
116 |         "names_lf_header.csv",
117 |     ]
118 | )
119 | def test_postgres_shell_copy_from_stdin_csv_skipheader(postgres_db, seed_file):
120 |     # delete rows from table, make sure that the last matrix test does not mess up this test
121 |     command = execute_sql_statement_command(postgres_db, "DELETE FROM names_with_header")
122 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
123 |     assert exitcode == 0
124 | 
125 |     # reading csv file...
126 |     file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute())
127 |     command = f'cat {file_path} \\\n'
128 |     command += '  | ' + shell.copy_from_stdin_command(postgres_db,
129 |                             target_table='names_with_header',
130 |                             pipe_format=formats.CsvFormat(header=True, delimiter_char=','))
131 |     print(command)
132 | 
133 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
134 |     print(pstdout)
135 |     assert exitcode == 0
136 | 
137 |     # check if writing was successful
138 | 
139 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM names_with_header")
140 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
141 |     assert exitcode == 0
142 |     assert pstdout == "10"
143 | 
144 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT name FROM names_with_header WHERE id = 1")
145 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
146 |     assert exitcode == 0
147 |     assert pstdout == "Elinor Meklit"
148 | 
149 | 
150 | @pytest.mark.dependency(depends=["test_postgres_shell_query_command", "test_postgres_shell_copy_to_stout", "test_postgres_ddl"])
151 | @pytest.mark.parametrize(
152 |     "seed_file",
153 |     [
154 |         "accounts_crlf_lastrow.jsonl",
155 |         "accounts_crlf.jsonl",
156 |         "accounts_lf_lastrow.jsonl",
157 |         "accounts_lf.jsonl",
158 |     ]
159 | )
160 | def test_postgres_shell_copy_from_stdin_jsonl(postgres_db, seed_file):
161 |     # delete rows from table, make sure that the last matrix test does not mess up this test
162 |     command = execute_sql_statement_command(postgres_db, "DELETE FROM accounts_json")
163 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
164 |     assert exitcode == 0
165 | 
166 |     # reading csv file...
167 |     file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute())
168 |     command = f'cat {file_path} \\\n'
169 |     command += '  | ' + shell.copy_from_stdin_command(postgres_db,
170 |                             target_table='accounts_json',
171 |                             pipe_format=formats.JsonlFormat())
172 |     print(command)
173 | 
174 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
175 |     print(pstdout)
176 |     assert exitcode == 0
177 | 
178 |     # check if writing was successful
179 | 
180 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM accounts_json")
181 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
182 |     assert exitcode == 0
183 |     assert pstdout == "6"
184 | 
185 |     command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM accounts_json WHERE data IS NOT NULL")
186 |     (exitcode, pstdout) = subprocess.getstatusoutput(command)
187 |     assert exitcode == 0
188 |     assert pstdout == "6"
189 | 
190 | 
191 | def test_postgres_sqlalchemy(postgres_db):
192 |     """
193 |     A simple test to check if the SQLAlchemy connection works
194 |     """
195 |     from ..db_test_helper import _test_sqlalchemy
196 |     _test_sqlalchemy(postgres_db)
197 | 
198 | 
199 | def test_postgres_connect(postgres_db):
200 |     """
201 |     A simple test to check if the connect API works.
202 |     """
203 |     from ..db_test_helper import _test_connect
204 |     _test_connect(postgres_db)
205 | 
206 | 
207 | def test_postgres_cursor_context(postgres_db):
208 |     """
209 |     A simple test to check if the cursor context of the db works.
210 |     """
211 |     from ..db_test_helper import _test_cursor_context
212 |     _test_cursor_context(postgres_db)
213 | 
214 | 
215 | def test_postgres_cursor_context_legacy(postgres_db):
216 |     """
217 |     Legacy call `postgres_cursor_context` test.
218 | 
219 |     Test shall be dropped in version 5.0
220 |     """
221 |     from mara_db.postgresql import postgres_cursor_context
222 | 
223 |     with postgres_cursor_context(postgres_db) as cursor:
224 |         cursor.execute('SELECT 1')
225 |         row = cursor.fetchone()
226 |         assert row[0] == 1
227 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | ## 4.11.0 (2023-12-06)
  4 | 
  5 | - add entry point `mara.commands` (for [mara-cli](https://github.com/mara/mara-cli) support)
  6 | 
  7 | ## 4.10.0 (2023-11-21)
  8 | 
  9 | - add cli group 'mara-db'. Mark old cli commands as deprecated (#74)
 10 | 
 11 | ## 4.9.2 (2023-02-21)
 12 | 
 13 | ### Bugfixes :bug:
 14 | 
 15 | - fix typo in depreciation warn messages and add warn category
 16 | - fix JSONL data import into PostgreSQL db (#73)
 17 | 
 18 | ## 4.9.1 (2023-02-08)
 19 | 
 20 | - hotfix :bug: issue with postgres cursor context (#72)
 21 | 
 22 | ## 4.9.0 (2023-02-07)
 23 | 
 24 | ### Breaking Changes
 25 | 
 26 | - the implementation of the formats option (#56) required to do changes to the functions in `mara_db.shell`. In case you defined custom implementations, you will have to adjust them.
 27 | 
 28 | ### Feat :tada:
 29 | 
 30 | - adding a functional API to get the DB-API 2.0 of a database (#71)
 31 | - add option to specify multiple formats like Parquet, Avro and ORC (#56, #64)
 32 | 
 33 | ### Changes :rocket:
 34 | - refactor the internally used cursor context logic to a more generic one (#68, #71)
 35 | - a default database `mara` is not anymore defined (#67).
 36 | - add deprecation warning when using parameter `timestamp` in query_command. This parameter will be removed in version 5.0. See #44.
 37 | 
 38 | ### Bugfixes :bug:
 39 | 
 40 | - fix sqlalchemy_url for SQLServerDB
 41 | - fix UI error when requesting schema view for Azure Synapse databases (#48)
 42 | 
 43 | ## 4.8.0 (2022-09-01)
 44 | 
 45 | ### Breaking Changes
 46 | 
 47 | - when using BigQuery you need to rename the BigQueryDB db config parameter `service_account_private_key_file` to `service_account_json_file_name` (#45)
 48 | - when using BigQuery with copy/read operations you need to specify parameter `gcloud_gcs_bucket_name`  in the BigQueryDB db config (#45)
 49 | 
 50 | ### Feat :tada:
 51 | 
 52 | - Add Databricks db support (#62)
 53 | - Add Snowflake db support (#52/#61)
 54 | - Add sqlalchemy support for BigQuery (#45/#50)
 55 | - Add sqlalchemy support for SQL Server (#46)
 56 | - Add option to use either MSSQL Tools (sqlcmd/bcp) or sqsh to connect to SQL Server (#57)
 57 | - Add documentation readthedocs.io documentation (#59)
 58 | 
 59 | ### Changes :rocket:
 60 | - Extend BigQuery functionality (#45)
 61 | - add extras per database engine (#50/#53). Postgres and Readshift is still included by default. <br/>**Note:** It is recommended to always specifcy the extras for the database you use.
 62 | - use client-side rendering for graphviz fallback (#51)
 63 | 
 64 | ### Bugfixes :bug:
 65 | - add all package files in wheel
 66 | - a proper error is now thrown when the user tries to look at the schema of a BigQuery database
 67 | 
 68 | ## 4.7.0 - 4.7.1 (2020-10-23)
 69 | 
 70 | - Add BigQuery support
 71 | - Fix copy from PostgreSQLDB to BigQueryDB when delimiter_char is not set (#36)
 72 | - Add config default_echo_queries (#38)
 73 | - Add support for SQLServerDB port (#37)
 74 | - Fix exit command sequence when previous command has exit code not zero (#40)
 75 | 
 76 | ## 4.6.0 - 4.6.1 (2020-07-03)
 77 | 
 78 | - Escape double quotes in copy_from_sdtin_command for PostgreSQL (#33)
 79 | - Add overview page to visualization
 80 | 
 81 | **required changes**
 82 | 
 83 | If you use quotes in tables names in `Copy` commands, check whether they still work.
 84 | 
 85 | 
 86 | ## 4.5.0 - 4.5.1 (2020-04-27)
 87 | 
 88 | - Don't escape dollar sign in queries for SqlServer
 89 | - Support echo sql queries for SqlServer
 90 | - Bugfix copy_to_stdout_command for SqlServerDB
 91 | 
 92 | **required changes**
 93 | 
 94 | If use SQL Server and have queries that contain the `$` sign, then please escape that one manually.
 95 | 
 96 | 
 97 | ## 4.4.1 - 4.4.3 (2020-02-13)
 98 | 
 99 | - Show warning when graphviz is not installed
100 | - Set fetch-count 10000 for the `copy_to_stdout_command` for PostgreSQLDB to handle out of memory error.
101 | - Add schema visualization support for SQL Server
102 | - Set mssql severity level to 10 (#25)
103 | 
104 | 
105 | 
106 | ## 4.4.0 (2019-11-28)
107 | 
108 | - Implement `copy-from-sdtin` command for Redshift (via tmp file on configuratble s3 bucket)
109 | - Refactor database schema visualization so that multiple databases can be implemented
110 | - Implement database schema visualization for MySQL
111 | - Add function mysql.mysql_cursor_context for connecting to MySQL databases via https://github.com/PyMySQL/mysqlclient-python
112 | - Allow to pass a dbs.PostgreSQLDB instance to postgresql.postgres_cursor_context
113 | 
114 | 
115 | ## 4.3.0 - 4.3.1 (2019-07-04)
116 | 
117 | - Add travis integration and PyPi upload
118 | 
119 | 
120 | ## 4.2.0
121 | 
122 | - Add new parameters delimiter_char and csv_format to all copy command functions (allows for better quoting JSONS, arrays, strings with tabs)
123 | - Add warnings for unused parameters
124 | - Make code a bit more pep-8 compliant
125 | 
126 | **required-changes**
127 | 
128 | - Adapt own implementations of `copy_to_stdout_command`, `copy_from_stdin_command` & `copy_command` (add the two new parameters).
129 | - Test whether everything still works (has been working reliably in three big projects for 4 weeks now)
130 | 
131 | 
132 | ## 4.1.0
133 | 
134 | - Revert commit [422c332](https://github.com/mara/mara-db/commit/422c332b09b4e28e19289f0baa27f5102ade9a03) (Fix pg to pg copy command for json data). It was causing too much trouble.
135 | 
136 | 
137 | ## 4.0.0 - 4.0.1 (2019-04-12)
138 | 
139 | - Allow MARA_AUTOMIGRATE_SQLALCHEMY_MODELS to be a function (in order to improve import speed)
140 | - Change MARA_XXX variables to functions to delay importing of imports
141 | - Fix pg to pg copy command for json data
142 | - Move some imports into the functions that use them in order to improve loading speed
143 | - Remove dependency_links from setup.py to regain compatibility with recent pip versions
144 | 
145 | **required changes**
146 | 
147 | - Update `mara-app` to `>=2.0.0`
148 | 
149 | 
150 | ## 3.2.0 - 3.2.3 (2019-04-11)
151 | 
152 | - Add oracle db access
153 | - Add SSL standard parameters to PostgreSQL connection string
154 | - Add missing footer parameter to Oracle copy to stdout command
155 | - Change arguments for sqsh client to return non zero exitcode in error case
156 | - Add single quotes around PostgreSQL passwords to prevent bash errors when the password contains certain characters
157 | 
158 | ## 3.1.0 - 3.1.2 (2018-08-30)
159 | 
160 | - Make graphviz engine in schema visualization selectable
161 | - Implement Redshift DB
162 | - Show enums in schema drawing for constrained tables
163 | - Extend copy_to_stdout_command with "footer" argument for PostgreSQL DB
164 | 
165 | 
166 | ## 3.0.0 - 3.0.2 (2018-04-27)
167 | 
168 | - Move sqlalchemy auto-migration from mara-app to mara-db
169 | - Remove `config.mara_db_alias` function
170 | - Move function `sqlalchemy.postgres_cursor_context` to module `postgresql`
171 | - Remove `sqlalchemy/session_context` context handler
172 | - Import graphviz only when needed
173 | - Update / improve documentation
174 | - Add port to sqlalchemy postgres connection string
175 | - Extend copy_to_stdout_command with "header" argument
176 | 
177 | **required changes**
178 | 
179 | - Replace all occurrences of `mara_db.config.mara_db_alias()` with `'mara'`
180 | - Replace `mara_db.sqlalchemy.postgres_cursor_context` with `mara_db.postgresql.postgres_cursor_context`
181 | - Change all usages of `mara_db.sqlalchemy.session_context` to psycopg2 using `mara_db.postgresql.postgres_cursor_context`
182 | 
183 | 
184 | ## 2.3.0 - 2.3.1 (2018-04-03)
185 | 
186 | - Switch dependency links in setup.py from ssh to https
187 | - Add psycopg2 as dependency
188 | 
189 | 
190 | 
191 | ## 2.2.0 (2018-02-28)
192 | 
193 | - add web ui for visualizing database schemas (postgres only currently)
194 | - improve acl
195 | - Fix bug in schema drawing
196 | - Quote strings when copying from sqlite
197 | - NULL value handling when copying from sqlite
198 | 
199 | 
200 | 
201 | ## 2.1.0 - 2.1.3 (2018-01-19)
202 | 
203 | - add SQLite support
204 | - don't use sqlalchemy session in postgres_cursor_context because it creates to many setup queries on each instantiation
205 | - always append ';\n\go' to queries against SQL Server
206 | - remove default-character-set=utf8mb4 from My SQL queries
207 | 
208 | 
209 | ## 2.0.0 - 2.0.1 (2017-12-20)
210 | 
211 | - change database configuration from sqalchemy urls to custom database specific classes
212 | - create sqlalchemy session contexts from configuration objects
213 | - add functions for creating shell commands for accessing databases
214 | - add documentation
215 | - bug fixes
216 | - various smaller improvements in mara_db/shell.py
217 | 
218 | **required changes**
219 | 
220 | This version is pretty much incompatible with previous versions. See README.md for new usage patterns.
221 | 
222 | 
223 | ## 1.1.0 (2017-12-04)
224 | 
225 | - Replace config function databases with database_urls
226 | - Add functions for client command creation
227 | 
228 | **required changes**
229 | 
230 | - Change database configurations from
231 | 
232 | ```python
233 | from sqlalchemy import engine
234 | 
235 | def databases() -> {str: engine.Engine}:
236 |      """The list of database connections to use, by alias"""
237 |     return {'mara': engine.create_engine('postgresql+psycopg2://root@localhost/mara')}
238 | 
239 | ```
240 | 
241 | to
242 | 
243 | ```python
244 | import sqlalchemy.engine.url
245 | 
246 | def database_urls() -> {str: sqlalchemy.engine.url}:
247 |      """The list of database connections to use, by alias"""
248 |     return {'mara': sqlalchemy.engine.url.make_url('postgresql+psycopg2://root@localhost/mara')}
249 | ```
250 | 
251 | ## 1.0.0 - 1.0.1 (2017-03-08)
252 | 
253 | - Initial version
254 | - Minor bug fixes and code style issues
255 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Mara DB
  2 | 
  3 | [![Build Status](https://github.com/mara/mara-db/actions/workflows/build.yml/badge.svg)](https://github.com/mara/mara-db/actions/workflows/build.yml)
  4 | [![PyPI - License](https://img.shields.io/pypi/l/mara-db.svg)](https://github.com/mara/mara-db/blob/main/LICENSE)
  5 | [![PyPI version](https://badge.fury.io/py/mara-db.svg)](https://badge.fury.io/py/mara-db)
  6 | [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://communityinviter.com/apps/mara-users/public-invite)
  7 | 
  8 | Mini package for configuring and accessing multiple databases in a single project. Decouples the use of databases and their configuration by using "aliases" for databases.
  9 | 
 10 | The file [mara_db/dbs.py](https://github.com/mara/mara-db/blob/main/mara_db/dbs.py) contains abstract database configurations for PostgreSQL, Mysql, SQL Server, Oracle, SQLite and Big Query. The database connections of a project are configured by overwriting the `databases` function in [mara_db/config.py](https://github.com/mara/mara-db/blob/main/mara_db/config.py):
 11 | 
 12 | ```python
 13 | import mara_db.config
 14 | import mara_db.dbs
 15 | 
 16 | ## configure database connections for different aliases
 17 | mara_db.config.databases = lambda: {
 18 |     'mara': mara_db.dbs.PostgreSQLDB(host='localhost', user='root', database='mara'),
 19 |     'dwh': mara_db.dbs.PostgreSQLDB(database='dwh'),
 20 |     'source-1': mara_db.dbs.MysqlDB(host='some-localhost', database='my_app', user='dwh'),
 21 |     'source-2': mara_db.dbs.SQLServerDB(user='dwh_read', password='123abc', database='db1', host='some-sql-server')
 22 | }
 23 | 
 24 | ## access individual database configurations with `dbs.db`:
 25 | print(mara_db.dbs.db('mara'))
 26 | # -> <PostgreSQLDB: host=localhost, database=mara>
 27 | ```
 28 | 
 29 | &nbsp;
 30 | 
 31 | 
 32 | ## Visualization of (PostgreSQL, MySQL, SQL Server) database schemas
 33 | 
 34 | [mara_db/views.py](https://github.com/mara/mara-db/blob/main/mara_db/views.py) contains a schema visualization for all configured databases using graphviz (currently PostgreSQL, Mysql and SQL Server only). It basically show tables of selected schemas together with the foreign key relations between them.
 35 | 
 36 | 
 37 | ![Schema visualization](https://github.com/mara/mara-db/blob/main/docs/_static/schema-visualization.png)
 38 | 
 39 | For finding missing foreign key constraints, columns that follow a specific naming pattern (configurable via `config.schema_ui_foreign_key_column_regex`, default `*_fk`) and that are not part of foreign key constraints are drawn in pink.
 40 | 
 41 | &nbsp;
 42 | 
 43 | 
 44 | ## Fast batch processing: Accessing databases with shell commands
 45 | 
 46 | The file [mara_db/shell.py](https://github.com/mara/mara-db/blob/main/mara_db/shell.py) contains functions that create commands for accessing databases via their command line clients.
 47 | 
 48 | For example, the `query_command` function creates a shell command that can receive an SQL query from stdin and execute it:
 49 | 
 50 | ```python
 51 | import mara_db.shell
 52 | 
 53 | print(mara_db.shell.query_command('source-1'))
 54 | # -> mysql --default-character-set=utf8mb4 --user=dwh --host=some-localhost my_app
 55 | 
 56 | print(mara_db.shell.query_command('dwh', timezone='Europe/Lisbon', echo_queries=False))
 57 | # -> PGTZ=Europe/Lisbon PGOPTIONS=--client-min-messages=warning psql  --no-psqlrc --set ON_ERROR_STOP=on dwh
 58 | ```
 59 | 
 60 | The function `copy_to_stdout_command` creates a shell command that receives a query on stdin and writes the result to stdout in tabular form:
 61 | 
 62 | ```python
 63 | print(mara_db.shell.copy_to_stdout_command('source-1'))
 64 | # -> mysql --default-character-set=utf8mb4 --user=dwh --host=some-localhost my_app --skip-column-names
 65 | ```
 66 | 
 67 | Similarly, `copy_from_stdin_command` creates a client command that receives tabular data from stdin and and writes it to a target table:
 68 | 
 69 | ```python
 70 | print(mara_db.shell.copy_from_stdin_command('dwh', target_table='some_table', delimiter_char=';'))
 71 | # -> PGTZ=Europe/Berlin PGOPTIONS=--client-min-messages=warning psql --echo-all --no-psqlrc --set ON_ERROR_STOP=on dwh \
 72 | #      --command="COPY some_table FROM STDIN WITH DELIMITER AS ';'"
 73 | ```
 74 | 
 75 | Finally, `copy_command` creates a shell command that receives a sql query from stdin, executes the query in `source_db` and then writes the result of to `target_table` in `target_db`:
 76 | 
 77 | ```python
 78 | print(mara_db.shell.copy_command('source-2', 'dwh', target_table='some_table'))
 79 | # -> sed 's/\\\\$/\$/g;s/\$/\\\\$/g' \
 80 | #   | sqsh  -U dwh_read -P 123abc -S some-sql-server -D db1 -m csv \
 81 | #   | PGTZ=Europe/Berlin PGOPTIONS=--client-min-messages=warning psql --echo-all --no-psqlrc --set ON_ERROR_STOP=on dwh \
 82 | #         --command = "COPY some_table FROM STDIN WITH CSV HEADER"
 83 | ```
 84 | 
 85 | &nbsp;
 86 | 
 87 | 
 88 | The following **command line clients** are used to access the various databases:
 89 | 
 90 | | Database | Client binary | Comments |
 91 | | --- | --- | --- |
 92 | | Postgresql / Redshift | `psql` | Included in standard distributions. |
 93 | | MariaDB / Mysql | `mysql` | Included in standard distributions. |
 94 | | SQL Server | `sqsh`<br>- or -<br>`sqlcmd` | **sqsh**: From [https://sourceforge.net/projects/sqsh/](https://sourceforge.net/projects/sqsh/), usually messy to get working. On ubuntu, use [http://ppa.launchpad.net/jasc/sqsh/ubuntu/](http://ppa.launchpad.net/jasc/sqsh/ubuntu/) backport. On Mac, try the homebrew version or install from source.<br>**sqlcmd**: Official Microsoft Utility for SQL Server. See [sqlcmd Utility](https://docs.microsoft.com/en-us/sql/tools/sqlcmd-utility) |
 95 | | Oracle | `sqlplus64` | See the [Oracle Instant Client](https://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html) homepage for details. On Mac, follow [these instructions](https://vanwollingen.nl/install-oracle-instant-client-and-sqlplus-using-homebrew-a233ce224bf). Then ` sudo ln -s /usr/local/bin/sqlplus /usr/local/bin/sqlplus64` to make the binary accessible as `sqlplus64`. |
 96 | | SQLite | `sqlite3` | Available in standard distributions. Version >3.20.x required (not the case on Ubuntu 14.04). |
 97 | | Big Query | `bq` | See the [Google Cloud SDK](https://cloud.google.com/sdk/docs/quickstarts) page for details. |
 98 | | Snowflake | `snowsql` | See [SnowSQL (CLI Client)](https://docs.snowflake.com/en/user-guide/snowsql.html) |
 99 | | Databricks | `dbsqlcli` | Included when using package extra `databricks` via package [databricks-sql-cli](https://pypi.org/project/databricks-sql-cli/). See [Databricks SQL CLI](https://docs.databricks.com/dev-tools/databricks-sql-cli.html#) |
100 | 
101 | &nbsp;
102 | 
103 | 
104 | ## Make it so! Auto-migration of SQLAlchemy models
105 | 
106 | [Alembic has a feature](http://alembic.zzzcomputing.com/en/latest/autogenerate.html) that can create a diff between the state of a database and the ORM models of an application. This feature is used in [mara_db/auto_migrate.py](https://github.com/mara/mara-db/blob/main/mara_db/auto_migrate.py) to automatically perform all necessary database transformations, without intermediate migration files:
107 | 
108 | ```python
109 | # define a model / table
110 | class MyTable(sqlalchemy.ext.declarative.declarative_base()):
111 |     __tablename__ = 'my_table'
112 |     my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
113 |     column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False, index=True)
114 | 
115 | 
116 | db = mara_db.dbs.SQLiteDB(file_name='/tmp/test.sqlite')
117 | 
118 | # create database and table
119 | mara_db.auto_migration.auto_migrate(engine=mara_db.auto_migration.engine(db), models=[MyTable])
120 | # ->
121 | # Created database "sqlite:////tmp/test.sqlite"
122 | #
123 | # CREATE TABLE my_table (
124 | #     my_table_id SERIAL NOT NULL,
125 | #     column_1 TEXT NOT NULL,
126 | #     PRIMARY KEY (my_table_id)
127 | # );
128 | #
129 | # CREATE INDEX ix_my_table_column_1 ON my_table (column_1);
130 | ```
131 | 
132 | When the model is changed later, then `auto_migrate` creates a diff against the existing database and applies it:
133 | 
134 | ```python
135 | # remove index and add another column
136 | class MyTable(sqlalchemy.ext.declarative.declarative_base()):
137 |     __tablename__ = 'my_table'
138 |     my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
139 |     column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False)
140 |     column_2 = sqlalchemy.Column(sqlalchemy.Integer)
141 | 
142 | auto_migrate(engine=engine(db), models=[MyTable])
143 | # ->
144 | # ALTER TABLE my_table ADD COLUMN column_2 INTEGER;
145 | #
146 | # DROP INDEX ix_my_table_text_column_1;
147 | ```
148 | 
149 | **Use with care**! The are lot of changes [that alembic auto-generate can not detect](http://alembic.zzzcomputing.com/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect). We recommend testing each aut-migration on a staging system first before deploying to production. Sometimes manual migration scripts will be necessary.
150 | 
151 | 
152 | 
153 | ## Installation
154 | 
155 | ```bash
156 | pip install mara-db
157 | ```
158 | 
159 | or
160 | 
161 | ```bash
162 | pip install git+https://github.com/mara/mara-db.git
163 | ```
164 | 
165 | ### Optional: Installation of requirements for SQL Server
166 | 
167 | For usage with SQL Server, the python module pyodbc and a odbc driver (e.g. Microsoft ODBC Driver 17 for SQL Server) is required which is not included in the general requirement.
168 | 
169 | To see how to install pyodbc, take a look into [this install guide](https://github.com/mkleehammer/pyodbc/wiki/Install).
170 | To see how to install ODBC 17, take a look into [Installing the Microsoft ODBC Driver for SQL Server on Linux and macOS](https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-ver15).
171 | 
172 | On Linux, you most likely will have to deal with an SSL issue, see [this issue](https://github.com/microsoft/msphpsql/issues/1023). A quick, dirty option in a test/development environment could be to [disable the requirement for TLS 1.2](https://github.com/microsoft/msphpsql/issues/1023#issuecomment-523214695).
173 | 
174 | ### Optional: Installation of requirements for BigQuery
175 | 
176 | For usage with BigQuery, the official `bq` and `gcloud` clients are required.
177 | See the [Google Cloud SDK](https://cloud.google.com/sdk/docs/quickstarts) page for installation details.
178 | 
179 | Enabling the BigQuery API and Service account JSON credentials are also required as listed
180 | in the official documentation [here](https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries#before-you-begin).
181 | 
182 | One time authentication of the service-account used:
183 | ```cmd
184 | gcloud auth activate-service-account --key-file='path-to/service-account.json'
185 | ```
186 | 
187 | Optionally, for loading data from files into BigQuery, the `gcloud_gcs_bucket_name` can be specified in the database initialization.
188 | This will use the Google Cloud Storage bucket specified as cache for loading data and over-coming potential limitations.
189 | For more see [loading-data](https://cloud.google.com/bigquery/docs/bq-command-line-tool#loading_data).
190 | By default, files will directly loaded locally as described in [loading-local-data](https://cloud.google.com/bigquery/docs/loading-data-local#loading_data_from_a_local_data_source).
191 | 
192 | A BigQuery context with a python cursor is also available on demand for easy access to BigQuery databases.
193 | In order to use, install the official Google python client library: [google-cloud-bigquery](https://cloud.google.com/bigquery/docs/reference/libraries#client-libraries-install-python).
194 | 
195 | ## Links
196 | 
197 | * Documentation: https://mara-db.readthedocs.io/
198 | * Changes: https://mara-db.readthedocs.io/en/latest/changes.html
199 | * PyPI Releases: https://pypi.org/project/mara-db/
200 | * Source Code: https://github.com/mara/mara-db
201 | * Issue Tracker: https://github.com/mara/mara-db/issues
202 | 


--------------------------------------------------------------------------------
/mara_db/dbs.py:
--------------------------------------------------------------------------------
  1 | """Abstract definition of database connections"""
  2 | 
  3 | import contextlib
  4 | import functools
  5 | import pathlib
  6 | from typing import Union
  7 | 
  8 | 
  9 | @functools.lru_cache(maxsize=None)
 10 | def db(alias):
 11 |     """Returns a database configuration by alias"""
 12 |     from . import config
 13 |     databases = config.databases()
 14 |     if alias not in databases:
 15 |         raise KeyError(f'database alias "{alias}" not configured')
 16 |     return databases[alias]
 17 | 
 18 | 
 19 | class DB:
 20 |     """Generic database connection definition"""
 21 | 
 22 |     def __repr__(self) -> str:
 23 |         return (f'<{self.__class__.__name__}: '
 24 |                 + ', '.join([f'{var}={"*****" if (var == "password" or "secret" in var) else getattr(self, var)}'
 25 |                              for var in vars(self) if getattr(self, var)])
 26 |                 + '>')
 27 | 
 28 |     @property
 29 |     def sqlalchemy_url(self):
 30 |         """Returns the SQLAlchemy url for a database"""
 31 |         raise NotImplementedError(f'Please implement sqlalchemy_url for type "{self.__class__.__name__}"')
 32 | 
 33 | 
 34 | class PostgreSQLDB(DB):
 35 |     def __init__(self, host: str = None, port: int = None, database: str = None,
 36 |                  user: str = None, password: str = None,
 37 |                  sslmode: str = None, sslrootcert: str = None, sslcert: str = None, sslkey: str = None):
 38 |         """
 39 |         Connection information for a PostgreSQL database
 40 | 
 41 |         For the SSL options, see https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION
 42 |         """
 43 |         self.host = host
 44 |         self.database = database
 45 |         self.port = port
 46 |         self.user = user
 47 |         self.password = password
 48 | 
 49 |         self.sslmode = sslmode
 50 |         self.sslrootcert = sslrootcert
 51 |         self.sslcert = sslcert
 52 |         self.sslkey = sslkey
 53 | 
 54 |     @property
 55 |     def sqlalchemy_url(self):
 56 |         return (f'postgresql+psycopg2://{self.user}{":" + self.password if self.password else ""}@{self.host}'
 57 |                 + f'{":" + str(self.port) if self.port else ""}/{self.database}')
 58 | 
 59 | 
 60 | class RedshiftDB(PostgreSQLDB):
 61 |     def __init__(self, host: str = None, port: int = None, database: str = None,
 62 |                  user: str = None, password: str = None,
 63 |                  aws_access_key_id=None, aws_secret_access_key=None, aws_s3_bucket_name=None):
 64 |         """
 65 |         Connection information for a RedShift database
 66 | 
 67 |         The aws_* parameters are for copying to Redshift from stdin via an s3 bucket
 68 |         (requires the https://pypi.org/project/awscli/) package to be installed)
 69 |         """
 70 |         self.aws_access_key_id = aws_access_key_id
 71 |         self.aws_secret_access_key = aws_secret_access_key
 72 |         self.aws_s3_bucket_name = aws_s3_bucket_name
 73 |         super(RedshiftDB, self).__init__(host, port, database, user, password)
 74 | 
 75 | 
 76 | class BigQueryDB(DB):
 77 |     def __init__(self,
 78 |                  service_account_json_file_name: str,
 79 |                  location: str = None, project: str = None, dataset: str = None,
 80 |                  gcloud_gcs_bucket_name=None, use_legacy_sql: bool = False):
 81 |         """
 82 |         Connection information for a BigQueryDB database
 83 | 
 84 |         Enabling the BigQuery API and Service account json credentials are required. For more:
 85 |         https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries#before-you-begin
 86 | 
 87 |         Args:
 88 |             service_account_json_file_name: The name of the private key file provided by Google when creating a service account (in json format)
 89 |             location: Default geographic location to use when creating datasets or determining where jobs should run
 90 |             project: Default project to use for requests.
 91 |             dataset: Default dataset to use for requests.
 92 |             gcloud_gcs_bucket_name: The Google Cloud Storage bucked used as cache for loading data
 93 |             use_legacy_sql: (default: false) If true, use the old BigQuery SQL dialect is used.
 94 |         """
 95 |         self.service_account_json_file_name = service_account_json_file_name
 96 |         self.location = location
 97 |         self.project = project
 98 |         self.dataset = dataset
 99 |         self.gcloud_gcs_bucket_name = gcloud_gcs_bucket_name
100 |         self.use_legacy_sql = use_legacy_sql
101 | 
102 |     @property
103 |     def sqlalchemy_url(self):
104 |         url = 'bigquery://'
105 |         if self.project:
106 |             url += self.project
107 |             if self.dataset:
108 |                 url += '/' + self.dataset
109 |         return url
110 | 
111 | 
112 | class MysqlDB(DB):
113 |     def __init__(self, host: str = None, port: int = None, database: str = None,
114 |                  user: str = None, password: str = None, ssl: bool = None, charset: str = None):
115 |         self.host = host
116 |         self.database = database
117 |         self.port = port
118 |         self.user = user
119 |         self.password = password
120 |         self.ssl = ssl
121 |         self.charset = charset
122 | 
123 | 
124 | class SQLServerDB(DB):
125 |     def __new__(cls, host: str = None, port: int = None, database: str = None,
126 |                  user: str = None, password: str = None, odbc_driver: str = None,
127 |                  **kargs):
128 |         """
129 |         Connection information for a SQL Server database
130 |         """
131 |         if cls is SQLServerDB:
132 |             # Here we define what happens when the class is directly created in code
133 |             #
134 |             # We defined here that class SqshSQLServerDB shall be used by default. In a newer
135 |             # major version we could change this to SqlcmdSQLServerDB but we do not want to
136 |             # introduce a breaking change here at this point.
137 |             return SqshSQLServerDB(host=host, port=port, database=database, user=user, password=password, odbc_driver=odbc_driver)
138 |         else:
139 |             # This is called when the class is created from a derived class (e.g. SqshSQLServerDB)
140 |             return super(SQLServerDB, cls).__new__(cls)
141 | 
142 |     def __init__(self, host: str = None, port: int = None, database: str = None,
143 |                  user: str = None, password: str = None, odbc_driver: str = None):
144 |         self.host = host
145 |         self.port = port
146 |         self.database = database
147 |         self.user = user
148 |         self.password = password
149 |         if odbc_driver is None:
150 |             self.odbc_driver = 'ODBC Driver 17 for SQL Server' # default odbc driver
151 |         else:
152 |             self.odbc_driver = odbc_driver
153 | 
154 |     @property
155 |     def sqlalchemy_url(self):
156 |         import urllib.parse
157 |         port = self.port if self.port else 1433
158 |         driver = self.odbc_driver.replace(' ','+')
159 |         return f'mssql+pyodbc://{self.user}:{urllib.parse.quote(self.password)}@{self.host}:{port}/{self.database}?driver={driver}'
160 | 
161 | 
162 | class SqshSQLServerDB(SQLServerDB):
163 |     def __init__(self, host: str = None, port: int = None, database: str = None,
164 |                  user: str = None, password: str = None, odbc_driver: str = None):
165 |         """
166 |         Connection information for a SQL Server database using the unix package sqsh
167 |         """
168 |         # NOTE: The support for named instances is not added because the command `sqsh` does not support it
169 |         super().__init__(host=host, port=port, database=database, user=user, password=password, odbc_driver=odbc_driver)
170 | 
171 | 
172 | class SqlcmdSQLServerDB(SQLServerDB):
173 |     def __init__(self, host: str = None, instance: str = None, port: int = None, database: str = None,
174 |                  user: str = None, password: str = None, odbc_driver: str = None,
175 |                  protocol: str = None, quoted_identifier: bool = True,
176 |                  trust_server_certificate: bool = False):
177 |         """
178 |         Connection information for a SQL Server database using the MSSQL Tools e.g. sqlcmd
179 | 
180 |         Args:
181 |             quoted_identifier: If set to true, the SET option QUOTED_IDENTIFIER is set to ON, otherwise OFF.
182 |             protocol: can be tcp (TCP/IP connection), np (named pipe) or lcp (using shared memory).
183 |                       See as well: https://docs.microsoft.com/en-us/sql/ssms/scripting/sqlcmd-connect-to-the-database-engine?view=sql-server-ver15
184 |             trust_server_certificate: Trust the server certificate without validation
185 |         """
186 |         super().__init__(host=host, port=port, database=database, user=user, password=password, odbc_driver=odbc_driver)
187 |         if protocol:
188 |             if protocol not in ['tcp','np','lpc']:
189 |                 raise ValueError(f'Not supported protocol: {protocol}')
190 |             if protocol == 'tcp' and instance:
191 |                 raise ValueError('You can not use protocol tcp with an instance name')
192 |             if protocol in ['np','lcp'] and port:
193 |                 raise ValueError('You can not use protocol np/lcp with a port number')
194 |         if instance is not None and port is not None:
195 |             raise ValueError('You can only use instance or port, not both together')
196 |         self.protocol = protocol
197 |         self.quoted_identifier = quoted_identifier
198 |         self.instance = instance
199 |         self.trust_server_certificate = trust_server_certificate
200 | 
201 |     @property
202 |     def sqlalchemy_url(self):
203 |         return super().sqlalchemy_url \
204 |                 + ('&TrustServerCertificate=yes' if self.trust_server_certificate else '')
205 | 
206 | 
207 | class OracleDB(DB):
208 |     def __init__(self, host: str = None, port: int = 0, endpoint: str = None, user: str = None, password: str = None):
209 |         self.host = host
210 |         self.port = port
211 |         self.endpoint = endpoint
212 |         self.user = user
213 |         self.password = password
214 | 
215 | 
216 | class SQLiteDB(DB):
217 |     def __init__(self, file_name: pathlib.Path) -> None:
218 |         self.file_name = file_name
219 | 
220 |     @property
221 |     def sqlalchemy_url(self):
222 |         return f'sqlite:///{self.file_name}'
223 | 
224 | 
225 | class SnowflakeDB(DB):
226 |     """A database connection to a Snowflake database"""
227 |     def __init__(self, connection: str = None, account: str = None, user: str = None, password: str = None, database: str = None,
228 |                  private_key_file: str = None, private_key_passphrase: str = None) -> None:
229 |         """
230 |         Connection information for a Snowflake database
231 | 
232 |         Args:
233 |             connection: The connection name definend in the snowsql configuration ~/.snowsql/config
234 |             account: The account identifier.
235 |                      See here: https://docs.snowflake.com/en/user-guide/admin-account-identifier.html
236 |             user: The user name
237 |             password: The password of the user
238 |             database: The database name
239 |             private_key_file: Path to private key file in PEM format used for key pair authentication.
240 |                               The private key file must be encrypted.
241 |             private_key_passphrase: The passphrase for the private key file.
242 |         """
243 |         self.connection = connection
244 |         self.account = account
245 |         self.user = user
246 |         self.password = password
247 |         self.database = database
248 |         self.private_key_file = private_key_file
249 |         self.private_key_passphrase = private_key_passphrase
250 | 
251 |     @property
252 |     def sqlalchemy_url(self):
253 |         assert all(v is not None for v in [self.account, self.user, self.password]), \
254 |             "sqlalchemy_url for SnowflakeDB requires a user, password and account"
255 |         return (f'snowflake://{self.user}:{self.password}@{self.account}'
256 |                 + (f'/{self.database}' if self.database else ''))
257 | 
258 | 
259 | class DatabricksDB(DB):
260 |     """A database connection to a Databricks"""
261 |     def __init__(self, host: str = None, http_path: str = None, access_token: str = None) -> None:
262 |         """
263 |         Connection information for a Databricks
264 | 
265 |         Args:
266 |             host: The hostname
267 |             http_path: The http path
268 |             access_token: The Access Token
269 |         """
270 |         self.host = host
271 |         self.http_path = http_path
272 |         self.access_token = access_token
273 | 
274 |     @property
275 |     def sqlalchemy_url(self):
276 |         return f"databricks+connector://token:{self.access_token}@{self.host}:443/"
277 | 
278 | 
279 | 
280 | @functools.singledispatch
281 | def connect(db: object, **kargs) -> object:
282 |     """
283 |     Creating a connection to the database object DB-API 2.0 (PIP-249) compatible.
284 | 
285 |     See also: https://peps.python.org/pep-0249/#connection-objects
286 | 
287 |     Args:
288 |         db: The database for which you want to get the database object (either an alias or a `dbs.DB` object)
289 |         **kargs: Optional arguments.
290 |     """
291 |     raise NotImplementedError(f'Please implement connect for type "{db.__class__.__name__}"')
292 | 
293 | 
294 | @connect.register(str)
295 | def __(alias: str, **kargs) -> object:
296 |     return connect(db(alias), **kargs)
297 | 
298 | 
299 | @connect.register(PostgreSQLDB)
300 | def __(db, **kargs) -> 'psycopg2.extensions.cursor':
301 |     import psycopg2
302 |     return psycopg2.connect(dbname=db.database, user=db.user, password=db.password,
303 |                             host=db.host, port=db.port)
304 | 
305 | 
306 | @connect.register(BigQueryDB)
307 | def __(db, **kargs) -> object:
308 |     from google.oauth2.service_account import Credentials
309 |     from google.cloud.bigquery.client import Client
310 |     from google.cloud.bigquery.dbapi.connection import Connection
311 |     credentials = Credentials.from_service_account_file(db.service_account_json_file_name)
312 |     client = Client(project=credentials.project_id, credentials=credentials, location=db.location)
313 |     return Connection(client)
314 | 
315 | 
316 | @connect.register(MysqlDB)
317 | def __(db, **kargs) -> 'MySQLdb.cursors.Cursor':
318 |     import MySQLdb.cursors # requires https://github.com/PyMySQL/mysqlclient-python
319 |     return MySQLdb.connect(
320 |         host=db.host, user=db.user, passwd=db.password, db=db.database, port=db.port,
321 |         cursorclass=MySQLdb.cursors.Cursor)
322 | 
323 | 
324 | @connect.register(SQLServerDB)
325 | def __(db, **kargs) -> 'pyodbc.Cursor':
326 |     import pyodbc # requires https://github.com/mkleehammer/pyodbc/wiki/Install
327 |     server = db.host
328 |     if db.port: # connecting via TCP/IP port
329 |         server = f"{server},{db.port}"
330 |     return pyodbc.connect(f"DRIVER={{{db.odbc_driver}}};SERVER={server};DATABASE={db.database};UID={db.user};PWD={db.password}" \
331 |                           + (';Encrypt=YES;TrustServerCertificate=YES' if db.trust_server_certificate else ''))
332 | 
333 | 
334 | @connect.register(SQLiteDB)
335 | def __(db, **kargs) -> 'sqlite3.Connection':
336 |     import sqlite3
337 |     return sqlite3.connect(database=db.file_name)
338 | 
339 | 
340 | @connect.register(DatabricksDB)
341 | def __(db, **kargs) -> object:
342 |     from databricks_dbapi import odbc
343 |     return odbc.connect(
344 |         host=db.host,
345 |         http_path=db.http_path,
346 |         token=db.access_token,
347 |         driver_path=db.odbc_driver_path)
348 | 
349 | 
350 | 
351 | @contextlib.contextmanager
352 | def cursor_context(db: Union[str, DB]) -> object:
353 |     """
354 |     A single iteration with a cursor context. When the iteration is
355 |     closed, a commit is executed on the cursor.
356 | 
357 |     Example usage:
358 |         with db.cursor_context() as c:
359 |             c.execute('UPDATE table SET table.c1 = 1 WHERE table.id = 5')
360 |     """
361 |     connection = connect(db)
362 |     try:
363 |         cursor = connection.cursor()
364 |         yield cursor
365 |         connection.commit()
366 |     except Exception:
367 |         connection.rollback()
368 |         raise
369 |     finally:
370 |         cursor.close()
371 |         connection.close()
372 | 


--------------------------------------------------------------------------------
/mara_db/views.py:
--------------------------------------------------------------------------------
  1 | """DB schema visualization"""
  2 | 
  3 | import datetime
  4 | import re
  5 | import typing
  6 | from functools import singledispatch
  7 | from html import escape
  8 | 
  9 | import flask
 10 | from mara_db import config, dbs
 11 | from mara_page import acl, navigation, response, bootstrap, html, _, xml
 12 | 
 13 | blueprint = flask.Blueprint('mara_db', __name__, static_folder='static', template_folder='templates', url_prefix='/db')
 14 | 
 15 | acl_resource = acl.AclResource(name='DB Schema')
 16 | 
 17 | 
 18 | def navigation_entry():
 19 |     return navigation.NavigationEntry(
 20 |         label='DB Schema', icon='star', description='Schemas of all databases connections',
 21 |         children=[navigation.NavigationEntry(
 22 |             label='Overview', icon='list',
 23 |             uri_fn=lambda: flask.url_for('mara_db.index_page'))] +
 24 |                  [
 25 |                      navigation.NavigationEntry(
 26 |                          label=alias, icon='database',
 27 |                          description=f'The schema of the {alias} db',
 28 |                          uri_fn=lambda current_db=alias: flask.url_for('mara_db.schema_page', db_alias=current_db))
 29 |                      for alias, db in config.databases().items()
 30 |                      if supports_extract_schema(db)
 31 |                  ])
 32 | 
 33 | 
 34 | @blueprint.route('/')
 35 | def index_page():
 36 |     """Overview page of mara_db"""
 37 |     return response.Response(
 38 |         title=f'Database schemas',
 39 |         html=bootstrap.card(
 40 |             body=[_.div(style='display:inline-block; margin-top:15px; margin-bottom:15px; margin-right:50px;')[
 41 |                       _.a(href=flask.url_for('mara_db.schema_page', db_alias=db_alias))[
 42 |                           _.span(class_='fa fa-database')[''], ' ', db_alias],
 43 |                       _.br,
 44 |                       _.span(style='color:#888')[escape(str(type(db).__name__))]
 45 |                   ]
 46 |                   for db_alias, db in config.databases().items()]),
 47 | 
 48 |         js_files=[flask.url_for('mara_db.static', filename='schema-page.js')])
 49 | 
 50 | 
 51 | @singledispatch
 52 | def supports_extract_schema(db: object) -> [bool]:
 53 |     """
 54 |     Returns true when the db supports schema extraction
 55 | 
 56 |     Args:
 57 |         db: The database which shall be tested for schema extraction
 58 |     """
 59 |     return False
 60 | 
 61 | 
 62 | @supports_extract_schema.register(str)
 63 | def __(alias: str):
 64 |     return supports_extract_schema(dbs.db(alias))
 65 | 
 66 | 
 67 | @supports_extract_schema.register(dbs.PostgreSQLDB)
 68 | def __(db: dbs.PostgreSQLDB):
 69 |     return True
 70 | 
 71 | 
 72 | @supports_extract_schema.register(dbs.RedshiftDB)
 73 | def __(db: dbs.RedshiftDB):
 74 |     return False
 75 | 
 76 | 
 77 | @supports_extract_schema.register(dbs.BigQueryDB)
 78 | def __(db: dbs.BigQueryDB):
 79 |     # BigQuery does not support primary and foreign key relations
 80 |     return False
 81 | 
 82 | 
 83 | @supports_extract_schema.register(dbs.MysqlDB)
 84 | def __(db: dbs.MysqlDB):
 85 |     return True
 86 | 
 87 | 
 88 | @supports_extract_schema.register(dbs.SQLServerDB)
 89 | def __(db: dbs.SQLServerDB):
 90 |     # check if module pyodbc can be imported
 91 |     import importlib
 92 |     pyodbc_spec = importlib.util.find_spec("pyodbc")
 93 |     return pyodbc_spec is not None
 94 | 
 95 | 
 96 | @blueprint.route('/<string:db_alias>')
 97 | def schema_page(db_alias: str):
 98 |     """A page that visiualizes the schemas of a database"""
 99 |     if db_alias not in config.databases():
100 |         flask.abort(404, f'unkown database {db_alias}')
101 | 
102 |     return response.Response(
103 |         title=f'Schema of database {db_alias}',
104 |         html=[bootstrap.card(sections=[
105 |             html.asynchronous_content(flask.url_for('mara_db.schema_selection', db_alias=db_alias)),
106 |             [_.div(id='schema-container')]]),
107 |             html.spinner_js_function()],
108 |         js_files=[flask.url_for('mara_db.static', filename='schema-page.js')],
109 |         action_buttons=[response.ActionButton(
110 |             action='javascript:schemaPage.downloadSvg()', label='SVG',
111 |             title='Save current chart as SVG file', icon='download')]
112 |     )
113 | 
114 | 
115 | @singledispatch
116 | def schemas_with_foreign_key_constraints(db: object) -> [str]:
117 |     """
118 |     Returns all schemas that are effected by foreign key constraints
119 | 
120 |     Args:
121 |         db: The database in which to run the query (either an alias or a `dbs.DB` object
122 |     """
123 |     raise NotImplementedError(
124 |         f'Please implement schemas_with_foreign_key_constraints for type "{db.__class__.__name__}"')
125 | 
126 | 
127 | @schemas_with_foreign_key_constraints.register(str)
128 | def __(alias: str):
129 |     return schemas_with_foreign_key_constraints(dbs.db(alias))
130 | 
131 | 
132 | @schemas_with_foreign_key_constraints.register(dbs.PostgreSQLDB)
133 | def __(db: dbs.PostgreSQLDB):
134 |     import mara_db.postgresql
135 |     with mara_db.postgresql.postgres_cursor_context(db) as cursor:
136 |         cursor.execute('''
137 | SELECT
138 |   array_cat(array_agg(DISTINCT constrained_table_schema.nspname), array_agg(DISTINCT referenced_table_schema.nspname))
139 | FROM pg_constraint
140 |   JOIN pg_class constrained_table ON constrained_table.oid = pg_constraint.conrelid
141 |   JOIN pg_namespace constrained_table_schema ON constrained_table.relnamespace = constrained_table_schema.oid
142 |   JOIN pg_class referenced_table ON referenced_table.oid = pg_constraint.confrelid
143 |   JOIN pg_namespace referenced_table_schema ON referenced_table.relnamespace = referenced_table_schema.oid''')
144 |         result = cursor.fetchone()
145 |         if result != (None,):
146 |             return list(set(result[0]))
147 | 
148 | 
149 | @schemas_with_foreign_key_constraints.register(dbs.MysqlDB)
150 | def __(db: dbs.MysqlDB):
151 |     import mara_db.mysql
152 |     with mara_db.mysql.mysql_cursor_context(db) as cursor:
153 |         cursor.execute("""
154 | SELECT DISTINCT table_schema
155 | FROM information_schema.table_constraints
156 | WHERE CONSTRAINT_TYPE = 'FOREIGN KEY'
157 | UNION
158 | SELECT DISTINCT REFERENCED_TABLE_SCHEMA
159 | FROM information_schema.key_column_usage
160 | WHERE REFERENCED_TABLE_SCHEMA IS NOT NULL;
161 | """)
162 |         return [row[0] for row in cursor.fetchall()]
163 | 
164 | 
165 | @schemas_with_foreign_key_constraints.register(dbs.SQLServerDB)
166 | def __(db: dbs.SQLServerDB):
167 |     import mara_db.sqlserver
168 |     with mara_db.sqlserver.sqlserver_cursor_context(db) as cursor:
169 |         cursor.execute("""
170 | SELECT DISTINCT fpos.name AS schema_name
171 | FROM sys.foreign_keys AS f
172 | INNER JOIN sys.foreign_key_columns AS fc ON f.object_id = fc.constraint_object_id
173 | INNER JOIN sys.objects AS fpo ON fpo.object_id = f.parent_object_id
174 | INNER JOIN sys.schemas AS fpos ON fpos.schema_id = fpo.schema_id
175 | """)
176 |         return [row[0] for row in cursor.fetchall()]
177 | 
178 | 
179 | @blueprint.route('/<string:db_alias>/.schemas')
180 | def schema_selection(db_alias: str):
181 |     """Asynchronously computes the list of schemas with foreign key constraints"""
182 |     schemas_with_fk_constraints = schemas_with_foreign_key_constraints(db_alias)
183 | 
184 |     if not schemas_with_fk_constraints or len(schemas_with_fk_constraints) == 0:
185 |         return str(_.i['No schemas with foreign key constraints found'])
186 | 
187 |     return ''.join(xml.render([
188 |         [_.div(class_='form-check form-check-inline')[
189 |              _.label(class_='form-check-label')[
190 |                  _.input(class_="form-check-input schema-checkbox", type="checkbox", value=schema_name)[
191 |                      ''], ' ', schema_name]]
192 |          for schema_name in sorted(schemas_with_fk_constraints)],
193 |         ' &#160;&#160;',
194 |         _.div(class_='form-check form-check-inline')[
195 |             _.label(class_='form-check-label')[
196 |                 _.input(class_="form-check-input", id='hide-columns-checkbox', type="checkbox")[
197 |                     ''], ' ', 'hide columns']],
198 |         ' &#160;&#160;',
199 |         _.div(class_='form-check form-check-inline')[
200 |             _.label(class_='form-check-label')[
201 |                 'graphviz engine ',
202 |                 _.select(id='engine', style='border:none;background-color:white;')[
203 |                     [_.option(value=engine)[engine] for engine in ['neato', 'dot', 'twopi', 'fdp']]
204 |                 ]]],
205 |         _.script['''
206 | var schemaPage = SchemaPage("''' + flask.url_for('mara_db.schema_page', db_alias=db_alias) + '''", "''' + db_alias + '''");
207 | ''']]))
208 | 
209 | 
210 | @singledispatch
211 | def extract_schema(db: object, schema_names: [str]) -> (typing.Dict, typing.Set):
212 |     """
213 |     Extracts foreign key constraints and the involved tables from a db
214 | 
215 |     Args:
216 |         db: The database in which to run the query (either an alias or a `dbs.DB` object
217 |         schema_names: the schemas to visualize
218 | 
219 |     Returns:
220 |         A dictionary of tables:
221 |             {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}}
222 | 
223 |         All foreign key constrains as a set of tuples:
224 |             {((table_schema, table_name), (referenced_schema_name, referenced_table_name))}
225 |     """
226 |     raise NotImplementedError(f'Please implement extract_schema for type "{db.__class__.__name__}"')
227 | 
228 | 
229 | @extract_schema.register(str)
230 | def __(alias: str, schema_names: [str]):
231 |     return extract_schema(dbs.db(alias), schema_names)
232 | 
233 | 
234 | @extract_schema.register(dbs.PostgreSQLDB)
235 | def __(db: dbs.PostgreSQLDB, schema_names: [str]):
236 |     import mara_db.postgresql
237 | 
238 |     # get all table inheritance relations as dictionary: {(child_schema, child_table): (parent_schema, parent_table)
239 |     inherited_tables = {}
240 |     with mara_db.postgresql.postgres_cursor_context(db) as cursor:
241 |         cursor.execute("""
242 | SELECT
243 |   rel_namespace.nspname, rel.relname ,
244 |   parent_namespace.nspname, parent.relname
245 | FROM pg_inherits
246 |   JOIN pg_class parent ON parent.oid = pg_inherits.inhparent
247 |   JOIN pg_class rel ON rel.oid = pg_inherits.inhrelid
248 |   JOIN pg_namespace parent_namespace ON parent_namespace.oid = parent.relnamespace
249 |   JOIN pg_namespace rel_namespace ON rel_namespace.oid = rel.relnamespace""")
250 |         for schema_name, table_name, parent_schema_name, parent_table_name in cursor.fetchall():
251 |             inherited_tables[(schema_name, table_name)] = (parent_schema_name, parent_table_name)
252 | 
253 |     # get all tables that have foreign key constrains on them or are referenced by foreign key constraints
254 |     tables = {}  # {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}}
255 |     foreign_key_constraints = set()  # {((table_schema, table_name), (referenced_schema_name, referenced_table_name)}
256 | 
257 |     def empty_table():
258 |         return {'columns': [], 'constrained-columns': set()}
259 | 
260 |     with mara_db.postgresql.postgres_cursor_context(db) as cursor:
261 |         cursor.execute(f'''
262 | SELECT
263 |   constrained_table_schema.nspname,
264 |   constrained_table.relname,
265 |   array_agg(constrained_column.attname),
266 |   referenced_table_schema.nspname,
267 |   referenced_table.relname
268 | FROM pg_constraint
269 |   JOIN pg_class constrained_table ON constrained_table.oid = pg_constraint.conrelid
270 |   JOIN pg_namespace constrained_table_schema ON constrained_table.relnamespace = constrained_table_schema.oid
271 |   JOIN pg_class referenced_table ON referenced_table.oid = pg_constraint.confrelid
272 |   JOIN pg_namespace referenced_table_schema ON referenced_table.relnamespace = referenced_table_schema.oid
273 |   JOIN pg_attribute constrained_column ON constrained_column.attrelid = constrained_table.oid AND attnum = ANY (conkey)
274 | WHERE constrained_table_schema.nspname = ANY ({'%s'})
275 | GROUP BY constrained_table_schema.nspname, constrained_table.relname, referenced_table_schema.nspname, referenced_table.relname;
276 | ''', (schema_names,))
277 |         for schema_name, table_name, table_columns, referenced_schema_name, referenced_table_name in cursor.fetchall():
278 |             referring_table = (schema_name, table_name)
279 |             if referring_table in inherited_tables:
280 |                 referring_table = inherited_tables[referring_table]
281 |             if referring_table not in tables:
282 |                 tables[referring_table] = empty_table()
283 |             tables[referring_table]['constrained-columns'].update(table_columns)
284 | 
285 |             referenced_table = (referenced_schema_name, referenced_table_name)
286 |             if referenced_table in inherited_tables:
287 |                 referenced_table = inherited_tables[referenced_table]
288 | 
289 |             if referenced_table not in tables:
290 |                 tables[referenced_table] = empty_table()
291 | 
292 |             foreign_key_constraints.add((referring_table, referenced_table))
293 | 
294 |     # get enum usages
295 |     with mara_db.postgresql.postgres_cursor_context(db) as cursor:
296 |         cursor.execute(f'''
297 | SELECT
298 |   DISTINCT
299 |   pg_namespace_table.nspname AS table_schema,
300 |   pg_class_table.relname     AS table_name,
301 | 
302 |   pg_namespace_enum.nspname  AS enum_schema,
303 |   pg_type.typname            AS enum_type
304 | FROM pg_attribute
305 |   JOIN pg_class pg_class_table ON pg_class_table.oid = attrelid
306 |   JOIN pg_namespace pg_namespace_table ON pg_namespace_table.oid = pg_class_table.relnamespace
307 |   JOIN pg_type ON atttypid = pg_type.OID
308 |   JOIN pg_namespace pg_namespace_enum ON typnamespace = pg_namespace_enum.oid
309 |   JOIN pg_enum ON pg_enum.enumtypid = pg_type.oid
310 | WHERE pg_namespace_table.nspname = ANY ({'%s'})''', (schema_names,))
311 |         for table_schema, table_name, enum_schema, enum_name in cursor.fetchall():
312 |             if (table_schema, table_name) in tables:
313 |                 if not (enum_schema, enum_name) in tables:
314 |                     tables[(enum_schema, enum_name)] = empty_table()
315 | 
316 |                 foreign_key_constraints.add(((table_schema, table_name), (enum_schema, enum_name)))
317 | 
318 |     # get all columns of all tables
319 |     with mara_db.postgresql.postgres_cursor_context(db) as cursor:
320 |         cursor.execute('''
321 | SELECT
322 |   table_schema, table_name,
323 |   array_agg(column_name :: TEXT ORDER BY ordinal_position)
324 | FROM information_schema.columns
325 | GROUP BY table_schema, table_name''')
326 |         for schema_name, table_name, columns in cursor.fetchall():
327 |             if (schema_name, table_name) in tables:
328 |                 tables[(schema_name, table_name)]['columns'] = columns
329 | 
330 |     return tables, foreign_key_constraints
331 | 
332 | 
333 | @extract_schema.register(dbs.MysqlDB)
334 | def __(db: dbs.MysqlDB, schema_names: [str]):
335 |     import mara_db.mysql
336 | 
337 |     # get all tables that have foreign key constrains on them or are referenced by foreign key constraints
338 |     tables = {}  # {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}}
339 |     foreign_key_constraints = set()  # {((table_schema, table_name), (referenced_schema_name, referenced_table_name)}
340 | 
341 |     def empty_table():
342 |         return {'columns': [], 'constrained-columns': set()}
343 | 
344 |     with mara_db.mysql.mysql_cursor_context(db) as cursor:
345 |         cursor.execute(f'''
346 | SELECT i.table_schema,
347 |        i.table_name,
348 |        k.column_name,
349 |        k.referenced_table_schema,
350 |        k.referenced_table_name
351 | FROM information_schema.table_constraints i
352 |          LEFT JOIN information_schema.KEY_COLUMN_USAGE k
353 |                    ON i.constraint_name = k.constraint_name
354 | WHERE i.constraint_type = 'FOREIGN KEY'
355 |       AND k.referenced_table_name IS NOT NULL
356 |       AND i.table_schema IN {'%s'}; ''', (schema_names,))
357 |         for table_schema, table_name, column_name, referenced_table_schema, referenced_table_name in cursor.fetchall():
358 |             referring_table = (table_schema, table_name)
359 |             referenced_table = (referenced_table_schema, referenced_table_name)
360 |             if not referring_table in tables:
361 |                 tables[referring_table] = empty_table()
362 |             tables[referring_table]['constrained-columns'].add(column_name)
363 | 
364 |             if not referenced_table in tables:
365 |                 tables[referenced_table] = empty_table()
366 | 
367 |             foreign_key_constraints.add((referring_table, referenced_table))
368 | 
369 |     with mara_db.mysql.mysql_cursor_context(db) as cursor:
370 |         cursor.execute(f'''
371 | SELECT table_schema, table_name, column_name
372 | FROM information_schema.COLUMNS
373 | WHERE table_schema IN {'%s'}
374 | ''', (schema_names,))
375 |         for table_schema, table_name, column_name in cursor.fetchall():
376 |             if (table_schema, table_name) in tables:
377 |                 tables[(table_schema, table_name)]['columns'].append(column_name)
378 | 
379 |     return tables, foreign_key_constraints
380 | 
381 | 
382 | @extract_schema.register(dbs.SQLServerDB)
383 | def __(db: dbs.SQLServerDB, schema_names: [str]):
384 |     import mara_db.sqlserver
385 | 
386 |     # get all tables that have foreign key constrains on them or are referenced by foreign key constraints
387 |     tables = {}  # {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}}
388 |     foreign_key_constraints = set()  # {((table_schema, table_name), (referenced_schema_name, referenced_table_name)}
389 | 
390 |     def empty_table():
391 |         return {'columns': [], 'constrained-columns': set()}
392 | 
393 |     with mara_db.sqlserver.sqlserver_cursor_context(db) as cursor:
394 |         cursor.execute(f'''
395 | SELECT
396 | 	s.name AS table_schema,
397 | 	t.name AS table_name,
398 | 	COL_NAME(fkc.parent_object_id, fkc.parent_column_id) AS column_name,
399 | 	fkts.name AS referenced_table_name,
400 | 	OBJECT_NAME (fk.referenced_object_id) AS referenced_table_name
401 | FROM sys.tables t
402 | INNER JOIN sys.schemas s ON
403 | 	s.schema_id = t.schema_id
404 | LEFT JOIN sys.foreign_keys fk ON
405 | 	fk.parent_object_id = t.object_id
406 | LEFT JOIN sys.foreign_key_columns fkc ON
407 | 	fkc.constraint_object_id = fk.object_id
408 | LEFT JOIN sys.tables fkt ON
409 | 	fkt.object_id = fk.referenced_object_id
410 | LEFT JOIN sys.schemas fkts ON
411 | 	fkts.schema_id = fkt.schema_id
412 | WHERE s.name IN ('%s'); ''' % '\',\''.join(schema_names))
413 |         for table_schema, table_name, column_name, referenced_table_schema, referenced_table_name in cursor.fetchall():
414 |             referring_table = (table_schema, table_name)
415 |             referenced_table = (referenced_table_schema, referenced_table_name)
416 |             if not referring_table in tables:
417 |                 tables[referring_table] = empty_table()
418 |             if column_name is not None:
419 |                 tables[referring_table]['constrained-columns'].add(column_name)
420 | 
421 |             # this logic is necessary so that tables with no foreign key are not added to the schema
422 |             if referenced_table_schema is not None and referenced_table_name is not None:
423 |                 if not referenced_table in tables:
424 |                     tables[referenced_table] = empty_table()
425 |                 foreign_key_constraints.add((referring_table, referenced_table))
426 | 
427 |     with mara_db.sqlserver.sqlserver_cursor_context(db) as cursor:
428 |         cursor.execute(f'''
429 | SELECT
430 | 	s.name AS table_schema,
431 | 	t.name AS table_name,
432 | 	c.name AS column_name
433 | FROM sys.columns c
434 | INNER JOIN sys.tables t ON
435 | 	t.object_id = c.object_id
436 | INNER JOIN sys.schemas s ON
437 | 	s.schema_id = t.schema_id
438 | WHERE s.name IN ('%s')
439 | ''' % '\',\''.join(schema_names))
440 |         for table_schema, table_name, column_name in cursor.fetchall():
441 |             if (table_schema, table_name) in tables:
442 |                 tables[(table_schema, table_name)]['columns'].append(column_name)
443 |     return tables, foreign_key_constraints
444 | 
445 | 
446 | @blueprint.route('/<string:db_alias>/<path:schemas>')
447 | @acl.require_permission(acl_resource, do_abort=False)
448 | def draw_schema(db_alias: str, schemas: str):
449 |     """Shows a chart of the tables and FK relationships in a given database and schema list"""
450 | 
451 |     if db_alias not in config.databases():
452 |         flask.abort(404, f'unkown database {db_alias}')
453 | 
454 |     if not supports_extract_schema(db_alias):
455 |         flask.abort(404, f"could not extract schema for database {db_alias}")
456 | 
457 |     schema_names = schemas.split('/')
458 |     hide_columns = flask.request.args.get('hide-columns')
459 |     engine = flask.request.args.get('engine', 'neato')
460 | 
461 |     tables, fk_constraints = extract_schema(db_alias, schema_names)
462 | 
463 |     import graphviz.backend
464 | 
465 |     graph = graphviz.Digraph(engine=engine,
466 |                              graph_attr={'splines': 'True', 'overlap': 'ortho'})
467 | 
468 |     schema_colors = {}
469 |     fk_pattern = re.compile(config.schema_ui_foreign_key_column_regex())
470 |     for schema_name, table_name in sorted(tables):
471 |         if schema_name not in schema_colors:
472 |             colors = ['#ffffcc', '#bbffcc', '#cceeff', '#eedd99', '#ddee99', '#99ddff', '#dddddd']
473 |             schema_colors[schema_name] = colors[len(schema_colors) % len(colors)]
474 | 
475 |         label = '< <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="1" BGCOLOR="' \
476 |                 + schema_colors[schema_name] + '"><TR>'
477 | 
478 |         node_name = schema_name + '.' + table_name
479 |         if hide_columns:
480 |             label += '<TD ALIGN="LEFT"> ' + table_name.replace('_', '<BR/>') + ' </TD></TR>'
481 |         else:
482 |             label += '<TD ALIGN="LEFT"><U><B> ' + table_name + ' </B></U></TD></TR>'
483 |             for column in tables[(schema_name, table_name)]['columns']:
484 |                 label += '<TR><TD ALIGN="LEFT" > '
485 |                 if fk_pattern.match(column) \
486 |                         and column not in tables[(schema_name, table_name)]['constrained-columns']:
487 |                     label += '<B><I><FONT COLOR="#dd55dd"> ' + column + ' </FONT></I></B>'
488 |                 else:
489 |                     label += column
490 |                 label += ' </TD></TR>'
491 | 
492 |         label += '</TABLE> >'
493 | 
494 |         graph.node(name=node_name, label=label,
495 |                    _attributes={'fontname': 'Helvetica, Arial, sans-serif', 'fontsize': '10',
496 |                                 'fontcolor': '#555555', 'shape': 'none'})
497 | 
498 |     for (schema_name, table_name), (referenced_schema_name, referenced_table_name) in fk_constraints:
499 |         graph.edge(schema_name + '.' + table_name, referenced_schema_name + '.' + referenced_table_name,
500 |                    _attributes={'color': '#888888'})
501 | 
502 |     try:
503 |         svg = graph.pipe('svg').decode('utf-8')
504 |     except graphviz.backend.ExecutableNotFound as e:
505 |         import uuid
506 |         # This exception occurs when the graphviz tools are not found.
507 |         # We use here a fallback to client-side rendering using the javascript library d3-graphviz.
508 |         graph_id = f'dependency_graph_{uuid.uuid4().hex}'
509 |         escaped_graph_source = graph.source.replace("`","\\`")
510 |         return str(_.div(id=graph_id)[
511 |             _.tt(style="color:red")[str(e)],
512 |         ]) + str(_.script[
513 |             f'div=d3.select("#{graph_id}");',
514 |             'graph=div.graphviz();',
515 |             'div.text("");',
516 |             f'graph.renderDot(`{escaped_graph_source}`);',
517 |         ])
518 | 
519 |     response = flask.Response(svg)
520 |     response.headers[
521 |         'Content-Disposition'] = f'attachment; filename="{datetime.date.today().isoformat()}-{db_alias}.svg"'
522 |     return response
523 | 


--------------------------------------------------------------------------------