├── tests ├── __init__.py ├── mssql │ ├── __init__.py │ ├── test_mssql_ddl.sql │ ├── README.md │ ├── test_mssql_config.py │ ├── test_mssql_sqsh.py_ │ └── test_mssql.py ├── postgres │ ├── __init__.py │ ├── test_postgres_ddl.sql │ └── test_postgres.py ├── seed │ ├── names_lf.csv │ ├── names_crlf.csv │ ├── names_lf_lastrow.csv │ ├── names_lf_header.csv │ ├── names_crlf_lastrow.csv │ ├── names_lf_quoted.csv │ ├── names_crlf_header.csv │ ├── names_lf_lastrow_header.csv │ ├── names_crlf_quoted.csv │ ├── names_lf_quoted_lastrow.csv │ ├── names_crlf_lastrow_header.csv │ ├── names_lf_quoted_header.csv │ ├── names_crlf_quoted_lastrow.csv │ ├── names_crlf_quoted_header.csv │ ├── names_lf_quoted_lastrow_header.csv │ ├── names_crlf_quoted_lastrow_header.csv │ ├── README.md │ ├── accounts_lf.jsonl │ ├── accounts_crlf.jsonl │ ├── accounts_lf_lastrow.jsonl │ └── accounts_crlf_lastrow.jsonl ├── test1.py ├── docker-compose.yml ├── command_helper.py ├── local_config.py.example ├── test_snowflake.py ├── test_databricks.py └── db_test_helper.py ├── docs ├── changes.md ├── _static │ ├── favicon.ico │ ├── mara-animal.jpg │ └── schema-visualization.png ├── requirements.txt ├── license.rst ├── cli.rst ├── Makefile ├── config.rst ├── api.rst ├── dbs │ ├── SQLite.rst │ ├── Mysql.rst │ ├── Oracle.rst │ ├── PostgreSQL.rst │ ├── Redshift.rst │ ├── Snowflake.rst │ ├── Databricks.rst │ ├── BigQuery.rst │ └── SQLServer.rst ├── installation.md ├── conf.py ├── index.rst └── databases-overview.md ├── setup.py ├── .vscode └── settings.json ├── pyproject.toml ├── .readthedocs.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── mara_db ├── mysql.py ├── databricks.py ├── postgresql.py ├── sqlserver.py ├── __init__.py ├── cli.py ├── config.py ├── sqlalchemy_engine.py ├── static │ └── schema-page.js ├── formats.py ├── bigquery.py ├── auto_migration.py ├── dbs.py └── views.py ├── .github └── workflows │ └── build.yml ├── LICENSE ├── Makefile ├── setup.cfg ├── CHANGELOG.md └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/mssql/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/postgres/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/changes.md: -------------------------------------------------------------------------------- 1 | ```{include} ../CHANGELOG.md 2 | ``` 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup() 4 | -------------------------------------------------------------------------------- /docs/_static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mara/mara-db/HEAD/docs/_static/favicon.ico -------------------------------------------------------------------------------- /docs/_static/mara-animal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mara/mara-db/HEAD/docs/_static/mara-animal.jpg -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==4.5.0 2 | sphinxcontrib-napoleon==0.7 3 | sphinx-tabs==3.3.1 4 | myst-parser==0.18.0 5 | -------------------------------------------------------------------------------- /docs/_static/schema-visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mara/mara-db/HEAD/docs/_static/schema-visualization.png -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "makefile.extensionOutputFolder": "./.vscode", 3 | "esbonio.sphinx.confDir": "" 4 | } 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools >= 40.6.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | -------------------------------------------------------------------------------- /tests/mssql/test_mssql_ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE names 2 | ( 3 | id INT, 4 | name TEXT 5 | ); 6 | 7 | CREATE TABLE names_with_header 8 | ( 9 | id INT, 10 | name TEXT 11 | ); 12 | -------------------------------------------------------------------------------- /tests/seed/names_lf.csv: -------------------------------------------------------------------------------- 1 | 1,Elinor Meklit 2 | 2,Triana Mahalah 3 | 3,Eugraphios Esmae 4 | 4,Agustín Alvilda 5 | 5,Behruz Hathor 6 | 6,Mathilde Tola 7 | 7,Kapel Tupaq 8 | 8,Shet Badulf 9 | 9,Ruslan Vančo 10 | 10,Madhavi Traian -------------------------------------------------------------------------------- /tests/seed/names_crlf.csv: -------------------------------------------------------------------------------- 1 | 1,Elinor Meklit 2 | 2,Triana Mahalah 3 | 3,Eugraphios Esmae 4 | 4,Agustín Alvilda 5 | 5,Behruz Hathor 6 | 6,Mathilde Tola 7 | 7,Kapel Tupaq 8 | 8,Shet Badulf 9 | 9,Ruslan Vančo 10 | 10,Madhavi Traian -------------------------------------------------------------------------------- /tests/seed/names_lf_lastrow.csv: -------------------------------------------------------------------------------- 1 | 1,Elinor Meklit 2 | 2,Triana Mahalah 3 | 3,Eugraphios Esmae 4 | 4,Agustín Alvilda 5 | 5,Behruz Hathor 6 | 6,Mathilde Tola 7 | 7,Kapel Tupaq 8 | 8,Shet Badulf 9 | 9,Ruslan Vančo 10 | 10,Madhavi Traian 11 | -------------------------------------------------------------------------------- /tests/seed/names_lf_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Elinor Meklit 3 | 2,Triana Mahalah 4 | 3,Eugraphios Esmae 5 | 4,Agustín Alvilda 6 | 5,Behruz Hathor 7 | 6,Mathilde Tola 8 | 7,Kapel Tupaq 9 | 8,Shet Badulf 10 | 9,Ruslan Vančo 11 | 10,Madhavi Traian -------------------------------------------------------------------------------- /tests/seed/names_crlf_lastrow.csv: -------------------------------------------------------------------------------- 1 | 1,Elinor Meklit 2 | 2,Triana Mahalah 3 | 3,Eugraphios Esmae 4 | 4,Agustín Alvilda 5 | 5,Behruz Hathor 6 | 6,Mathilde Tola 7 | 7,Kapel Tupaq 8 | 8,Shet Badulf 9 | 9,Ruslan Vančo 10 | 10,Madhavi Traian 11 | -------------------------------------------------------------------------------- /tests/seed/names_lf_quoted.csv: -------------------------------------------------------------------------------- 1 | 1,"Elinor Meklit" 2 | 2,"Triana Mahalah" 3 | 3,"Eugraphios Esmae" 4 | 4,"Agustín Alvilda" 5 | 5,"Behruz Hathor" 6 | 6,"Mathilde Tola" 7 | 7,"Kapel Tupaq" 8 | 8,"Shet Badulf" 9 | 9,"Ruslan Vančo" 10 | 10,"Madhavi Traian" -------------------------------------------------------------------------------- /tests/seed/names_crlf_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Elinor Meklit 3 | 2,Triana Mahalah 4 | 3,Eugraphios Esmae 5 | 4,Agustín Alvilda 6 | 5,Behruz Hathor 7 | 6,Mathilde Tola 8 | 7,Kapel Tupaq 9 | 8,Shet Badulf 10 | 9,Ruslan Vančo 11 | 10,Madhavi Traian -------------------------------------------------------------------------------- /tests/seed/names_lf_lastrow_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Elinor Meklit 3 | 2,Triana Mahalah 4 | 3,Eugraphios Esmae 5 | 4,Agustín Alvilda 6 | 5,Behruz Hathor 7 | 6,Mathilde Tola 8 | 7,Kapel Tupaq 9 | 8,Shet Badulf 10 | 9,Ruslan Vančo 11 | 10,Madhavi Traian 12 | -------------------------------------------------------------------------------- /tests/seed/names_crlf_quoted.csv: -------------------------------------------------------------------------------- 1 | 1,"Elinor Meklit" 2 | 2,"Triana Mahalah" 3 | 3,"Eugraphios Esmae" 4 | 4,"Agustín Alvilda" 5 | 5,"Behruz Hathor" 6 | 6,"Mathilde Tola" 7 | 7,"Kapel Tupaq" 8 | 8,"Shet Badulf" 9 | 9,"Ruslan Vančo" 10 | 10,"Madhavi Traian" -------------------------------------------------------------------------------- /tests/seed/names_lf_quoted_lastrow.csv: -------------------------------------------------------------------------------- 1 | 1,"Elinor Meklit" 2 | 2,"Triana Mahalah" 3 | 3,"Eugraphios Esmae" 4 | 4,"Agustín Alvilda" 5 | 5,"Behruz Hathor" 6 | 6,"Mathilde Tola" 7 | 7,"Kapel Tupaq" 8 | 8,"Shet Badulf" 9 | 9,"Ruslan Vančo" 10 | 10,"Madhavi Traian" 11 | -------------------------------------------------------------------------------- /tests/seed/names_crlf_lastrow_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,Elinor Meklit 3 | 2,Triana Mahalah 4 | 3,Eugraphios Esmae 5 | 4,Agustín Alvilda 6 | 5,Behruz Hathor 7 | 6,Mathilde Tola 8 | 7,Kapel Tupaq 9 | 8,Shet Badulf 10 | 9,Ruslan Vančo 11 | 10,Madhavi Traian 12 | -------------------------------------------------------------------------------- /tests/seed/names_lf_quoted_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,"Elinor Meklit" 3 | 2,"Triana Mahalah" 4 | 3,"Eugraphios Esmae" 5 | 4,"Agustín Alvilda" 6 | 5,"Behruz Hathor" 7 | 6,"Mathilde Tola" 8 | 7,"Kapel Tupaq" 9 | 8,"Shet Badulf" 10 | 9,"Ruslan Vančo" 11 | 10,"Madhavi Traian" -------------------------------------------------------------------------------- /tests/seed/names_crlf_quoted_lastrow.csv: -------------------------------------------------------------------------------- 1 | 1,"Elinor Meklit" 2 | 2,"Triana Mahalah" 3 | 3,"Eugraphios Esmae" 4 | 4,"Agustín Alvilda" 5 | 5,"Behruz Hathor" 6 | 6,"Mathilde Tola" 7 | 7,"Kapel Tupaq" 8 | 8,"Shet Badulf" 9 | 9,"Ruslan Vančo" 10 | 10,"Madhavi Traian" 11 | -------------------------------------------------------------------------------- /tests/seed/names_crlf_quoted_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,"Elinor Meklit" 3 | 2,"Triana Mahalah" 4 | 3,"Eugraphios Esmae" 5 | 4,"Agustín Alvilda" 6 | 5,"Behruz Hathor" 7 | 6,"Mathilde Tola" 8 | 7,"Kapel Tupaq" 9 | 8,"Shet Badulf" 10 | 9,"Ruslan Vančo" 11 | 10,"Madhavi Traian" -------------------------------------------------------------------------------- /tests/seed/names_lf_quoted_lastrow_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,"Elinor Meklit" 3 | 2,"Triana Mahalah" 4 | 3,"Eugraphios Esmae" 5 | 4,"Agustín Alvilda" 6 | 5,"Behruz Hathor" 7 | 6,"Mathilde Tola" 8 | 7,"Kapel Tupaq" 9 | 8,"Shet Badulf" 10 | 9,"Ruslan Vančo" 11 | 10,"Madhavi Traian" 12 | -------------------------------------------------------------------------------- /tests/test1.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | def test_my(): 4 | import mara_db.dbs 5 | 6 | db = mara_db.dbs.SqlcmdSQLServerDB(host='ABC123', user='A', password="a", trust_server_certificate=True) 7 | 8 | odbc = db.sqlalchemy_url 9 | 10 | print(odbc) 11 | 12 | assert False 13 | -------------------------------------------------------------------------------- /tests/seed/names_crlf_quoted_lastrow_header.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,"Elinor Meklit" 3 | 2,"Triana Mahalah" 4 | 3,"Eugraphios Esmae" 5 | 4,"Agustín Alvilda" 6 | 5,"Behruz Hathor" 7 | 6,"Mathilde Tola" 8 | 7,"Kapel Tupaq" 9 | 8,"Shet Badulf" 10 | 9,"Ruslan Vančo" 11 | 10,"Madhavi Traian" 12 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | 4 | MIT Source License 5 | ------------------ 6 | 7 | The MIT license applies to all files in the Mara repository 8 | and source distribution. This includes Mara's source code, the 9 | examples, and tests, as well as the documentation. 10 | 11 | .. include:: ../LICENSE 12 | -------------------------------------------------------------------------------- /tests/postgres/test_postgres_ddl.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE names 2 | ( 3 | id INT, 4 | name TEXT 5 | ); 6 | 7 | CREATE TABLE names_with_header 8 | ( 9 | id INT, 10 | name TEXT 11 | ); 12 | 13 | CREATE TABLE accounts_json 14 | ( 15 | data jsonb, 16 | row BIGINT GENERATED ALWAYS AS IDENTITY 17 | ); 18 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | build: 7 | os: ubuntu-20.04 8 | tools: 9 | python: "3.9" 10 | 11 | sphinx: 12 | configuration: docs/conf.py 13 | 14 | python: 15 | install: 16 | - requirements: docs/requirements.txt 17 | - method: pip 18 | path: . 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | .eggs/ 11 | 12 | # Unit test / coverage reports 13 | .cache 14 | 15 | # Sphinx documentation 16 | docs/_build/ 17 | 18 | # Environments 19 | /.venv 20 | 21 | # Dev tools 22 | .idea 23 | 24 | # == Specific for this repository == 25 | /tests/local_config.py 26 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v3.2.0 6 | hooks: 7 | - id: trailing-whitespace 8 | exclude: ^tests/seed/ 9 | - id: end-of-file-fixer 10 | exclude: ^tests/seed/ 11 | - id: check-toml 12 | - id: check-yaml 13 | - id: check-added-large-files 14 | -------------------------------------------------------------------------------- /tests/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.1' 2 | 3 | services: 4 | postgres: 5 | image: postgres:14 6 | environment: 7 | POSTGRES_DB: mara 8 | POSTGRES_USER: mara 9 | POSTGRES_PASSWORD: mara 10 | POSTGRES_HOST_AUTH_METHOD: md5 11 | ports: 12 | - "5432" 13 | 14 | mssql: 15 | image: mcr.microsoft.com/mssql/server:2019-latest 16 | environment: 17 | - ACCEPT_EULA=Y 18 | - SA_PASSWORD=YourStrong@Passw0rd 19 | ports: 20 | - "1433" 21 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | CLI 2 | === 3 | 4 | .. module:: mara_db.cli 5 | 6 | This part of the documentation covers all the available cli commands of Mara DB. 7 | 8 | 9 | ``migrate`` 10 | ----------- 11 | 12 | .. tabs:: 13 | 14 | .. group-tab:: Mara CLI 15 | 16 | .. code-block:: shell 17 | 18 | mara db migrate 19 | 20 | .. group-tab:: Mara Flask App 21 | 22 | .. code-block:: python 23 | 24 | flask mara-db migrate 25 | 26 | 27 | Compares the current database db alias `mara` with all defined models and applies 28 | the diff using alembic. 29 | -------------------------------------------------------------------------------- /tests/mssql/README.md: -------------------------------------------------------------------------------- 1 | SQL Server Test Matrix 2 | ====================== 3 | 4 | There some notes about which tests failes / are on purpose not implemented: 5 | 6 | sqsh 7 | ---- 8 | * does not support `trust_server_certificate` 9 | 10 | sqlcmd 11 | ------ 12 | All looks fine 13 | 14 | bcp 15 | --- 16 | 17 | Known issues: 18 | * return value is always zero (`0`) even when an import error occurs 19 | * a import file e.g. CSV must have a last empty row (`names_lf_lastrow.csv` is supported, but `names_lf.csv` not) 20 | * db.`trust_server_certificate` is only supported when using mssql tools 18+ and higher 21 | -------------------------------------------------------------------------------- /mara_db/mysql.py: -------------------------------------------------------------------------------- 1 | """Easy access to MySQL databases via mysql-client""" 2 | 3 | import typing 4 | from warnings import warn 5 | 6 | import mara_db.dbs 7 | 8 | 9 | def mysql_cursor_context(db: typing.Union[str, mara_db.dbs.MysqlDB]) -> 'MySQLdb.cursors.Cursor': 10 | """Creates a context with a mysql-client cursor for a database alias or database""" 11 | warn('Function mysql_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.') 12 | 13 | if isinstance(db, str): 14 | db = mara_db.dbs.db(db) 15 | 16 | assert (isinstance(db, mara_db.dbs.MysqlDB)) 17 | 18 | return mara_db.dbs.cursor_context(db) 19 | -------------------------------------------------------------------------------- /mara_db/databricks.py: -------------------------------------------------------------------------------- 1 | """Easy access to Databricks databases via databricks-sql-connector""" 2 | 3 | import typing 4 | from warnings import warn 5 | 6 | import mara_db.dbs 7 | 8 | 9 | def databricks_cursor_context(db: typing.Union[str, mara_db.dbs.DatabricksDB]) \ 10 | -> 'databricks.sql.client.Cursor': 11 | warn('Function databricks_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.', 12 | category=DeprecationWarning) 13 | 14 | if isinstance(db, str): 15 | db = mara_db.dbs.db(db) 16 | 17 | assert (isinstance(db, mara_db.dbs.DatabricksDB)) 18 | 19 | return mara_db.dbs.cursor_context(db) 20 | -------------------------------------------------------------------------------- /mara_db/postgresql.py: -------------------------------------------------------------------------------- 1 | """Easy access to postgres databases via psycopg2""" 2 | 3 | import typing 4 | from warnings import warn 5 | 6 | import mara_db.dbs 7 | 8 | 9 | def postgres_cursor_context(db: typing.Union[str, mara_db.dbs.PostgreSQLDB]) -> 'psycopg2.extensions.cursor': 10 | """Creates a context with a psycopg2 cursor for a database alias""" 11 | warn('Function postgres_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.', 12 | category=DeprecationWarning) 13 | 14 | if isinstance(db, str): 15 | db = mara_db.dbs.db(db) 16 | 17 | assert (isinstance(db, mara_db.dbs.PostgreSQLDB)) 18 | 19 | return mara_db.dbs.cursor_context(db) 20 | -------------------------------------------------------------------------------- /mara_db/sqlserver.py: -------------------------------------------------------------------------------- 1 | """Easy access to SQLServer databases via pyodbc-client""" 2 | 3 | import typing 4 | from warnings import warn 5 | 6 | import mara_db.dbs 7 | 8 | 9 | def sqlserver_cursor_context(db: typing.Union[str, mara_db.dbs.SQLServerDB]) -> 'pyodbc.Cursor': 10 | """Creates a context with a pyodbc-client cursor for a database alias or database""" 11 | warn('Function sqlserver_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.', 12 | category=DeprecationWarning) 13 | 14 | if isinstance(db, str): 15 | db = mara_db.dbs.db(db) 16 | 17 | assert (isinstance(db, mara_db.dbs.SQLServerDB)) 18 | 19 | return mara_db.dbs.cursor_context(db) 20 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /mara_db/__init__.py: -------------------------------------------------------------------------------- 1 | """Make the functionalities of this package auto-discoverable by mara-app""" 2 | __version__ = '4.11.0' 3 | 4 | 5 | def MARA_CONFIG_MODULES(): 6 | from . import config 7 | return [config] 8 | 9 | 10 | def MARA_FLASK_BLUEPRINTS(): 11 | from . import views 12 | return [views.blueprint] 13 | 14 | 15 | def MARA_AUTOMIGRATE_SQLALCHEMY_MODELS(): 16 | return [] 17 | 18 | 19 | def MARA_ACL_RESOURCES(): 20 | from . import views 21 | return {'DB Schema': views.acl_resource} 22 | 23 | 24 | def MARA_CLICK_COMMANDS(): 25 | from . import cli 26 | return [cli.mara_db, 27 | cli._migrate] 28 | 29 | 30 | def MARA_NAVIGATION_ENTRIES(): 31 | from . import views 32 | return {'DB Schema': views.navigation_entry()} 33 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | Configuration 2 | ============= 3 | 4 | 5 | Mara Configuration Values 6 | ------------------------- 7 | 8 | The following configuration values are used by this module. They are defined as python functions in ``mara_db.config`` 9 | and can be changed with the `monkey patch`_ from `Mara App`_. An example can be found `here `_. 10 | 11 | .. _monkey patch: https://github.com/mara/mara-app/blob/master/mara_app/monkey_patch.py 12 | .. _Mara App: https://github.com/mara/mara-app 13 | 14 | 15 | .. module:: mara_db.config 16 | 17 | .. autofunction:: databases 18 | 19 | | 20 | 21 | .. autofunction:: default_timezone 22 | 23 | | 24 | 25 | .. autofunction:: default_echo_queries 26 | 27 | | 28 | 29 | .. autofunction:: schema_ui_foreign_key_column_regex 30 | -------------------------------------------------------------------------------- /mara_db/cli.py: -------------------------------------------------------------------------------- 1 | """Auto-migrate command line interface""" 2 | 3 | import click 4 | import sys 5 | from warnings import warn 6 | 7 | 8 | @click.group() 9 | def mara_db(): 10 | """Commands to interact with the database.""" 11 | pass 12 | 13 | 14 | @mara_db.command() 15 | def migrate(): 16 | """Compares the current database with all defined models and applies the diff""" 17 | import mara_db.auto_migration 18 | 19 | if not mara_db.auto_migration.auto_discover_models_and_migrate(): 20 | sys.exit(-1) 21 | 22 | 23 | # Old cli commands to be dropped in 5.0: 24 | 25 | @click.command("migrate") 26 | def _migrate(): 27 | """Compares the current database with all defined models and applies the diff""" 28 | warn("CLI command ` mara_db.migrate` will be dropped in 5.0. Please use: ` mara-db migrate`") 29 | migrate.callback() 30 | -------------------------------------------------------------------------------- /tests/command_helper.py: -------------------------------------------------------------------------------- 1 | """Helper functions to generate commands for testings""" 2 | import shlex 3 | 4 | from mara_db import shell 5 | 6 | 7 | def execute_sql_statement_command(db, sql_statement): 8 | command = f'echo {shlex.quote(sql_statement)} \\\n' 9 | command += ' | ' + shell.query_command(db) 10 | assert command 11 | print(command) 12 | return command 13 | 14 | def execute_sql_file_command(db, file_path): 15 | command = f'cat {file_path} \\\n' 16 | command += ' | ' + shell.query_command(db) 17 | assert command 18 | print(command) 19 | return command 20 | 21 | def execute_sql_statement_to_stdout_csv_command(db, sql_statement): 22 | command = f'echo {shlex.quote(sql_statement)} \\\n' 23 | command += ' | ' + shell.copy_to_stdout_command(db, delimiter_char=',') 24 | assert command 25 | print(command) 26 | return command 27 | -------------------------------------------------------------------------------- /mara_db/config.py: -------------------------------------------------------------------------------- 1 | """Configuration of database connections""" 2 | import typing 3 | 4 | from mara_db import dbs 5 | 6 | 7 | def databases() -> typing.Dict[str, dbs.DB]: 8 | """The list of database connections to use, by alias""" 9 | return {} 10 | 11 | 12 | def default_timezone() -> str: 13 | """ 14 | The default timezone to be used for database connections 15 | See: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 16 | """ 17 | return 'Europe/Berlin' 18 | 19 | 20 | def default_echo_queries() -> bool: 21 | """ 22 | If queries should be printed on execution by default, if applicable 23 | """ 24 | return True 25 | 26 | 27 | def schema_ui_foreign_key_column_regex() -> typing.Pattern: 28 | """A regex that classifies a table column as being used in a foreign constraint (for coloring missing constraints)""" 29 | return r'.*_fk$' 30 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: mara-db 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | build: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12'] 17 | steps: 18 | - name: Chechout code 19 | uses: actions/checkout@v3.3.0 20 | - name: Setup python 21 | uses: actions/setup-python@v4.5.0 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install application 25 | env: 26 | pythonversion: ${{ matrix.python-version }} 27 | run: | 28 | python -c "import sys; print(sys.version)" 29 | pip install .[test] 30 | echo Finished successful build with Python $pythonversion 31 | - name: Test with pytest 32 | run: | 33 | make tests/local_config.py 34 | pytest -v tests 35 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. module:: mara_db 5 | 6 | This part of the documentation covers all the interfaces of Mara DB. For 7 | parts where the package depends on external libraries, we document the most 8 | important right here and provide links to the canonical documentation. 9 | 10 | 11 | DBs 12 | --- 13 | 14 | .. module:: mara_db.dbs 15 | 16 | .. autofunction:: db 17 | 18 | .. autofunction:: connect 19 | 20 | .. autofunction:: cursor_context 21 | 22 | 23 | Auto migration 24 | -------------- 25 | 26 | .. module:: mara_db.auto_migration 27 | 28 | .. autofunction:: auto_migrate 29 | 30 | .. autofunction:: auto_discover_models_and_migrate 31 | 32 | 33 | Shell 34 | ----- 35 | 36 | .. module:: mara_db.shell 37 | 38 | .. autofunction:: query_command 39 | 40 | .. autofunction:: copy_to_stdout_command 41 | 42 | .. autofunction:: copy_from_stdin_command 43 | 44 | .. autofunction:: copy_command 45 | 46 | 47 | SQLAlchemy 48 | ---------- 49 | 50 | .. module:: mara_db.sqlalchemy_engine 51 | 52 | .. autofunction:: engine 53 | -------------------------------------------------------------------------------- /docs/dbs/SQLite.rst: -------------------------------------------------------------------------------- 1 | SQLite 2 | ====== 3 | 4 | 5 | 6 | Installation 7 | ------------ 8 | 9 | There are no special requirements for SQLite since it is already included in python. 10 | 11 | The shell command `sqlite3` is required. This is available in standard distributions. 12 | Version >3.20.x required (not the case on Ubuntu 14.04). 13 | 14 | 15 | Configuration examples 16 | ---------------------- 17 | 18 | .. tabs:: 19 | 20 | .. group-tab:: Local file 21 | 22 | .. code-block:: python 23 | 24 | import mara_db.dbs 25 | mara_db.config.databases = lambda: { 26 | 'dwh': mara_db.dbs.SQLiteDB( 27 | file_name='database.db'), 28 | } 29 | 30 | | 31 | 32 | | 33 | 34 | API reference 35 | ------------- 36 | 37 | This section contains database specific API in the module. 38 | 39 | 40 | Configuration 41 | ~~~~~~~~~~~~~ 42 | 43 | .. module:: mara_db.dbs 44 | :noindex: 45 | 46 | .. autoclass:: SQLiteDB 47 | :special-members: __init__ 48 | :inherited-members: 49 | :members: 50 | -------------------------------------------------------------------------------- /docs/dbs/Mysql.rst: -------------------------------------------------------------------------------- 1 | MySQL 2 | ===== 3 | 4 | 5 | Installation 6 | ------------ 7 | 8 | Use extras `mysql` to install all required packages. 9 | 10 | .. code-block:: shell 11 | 12 | $ pip install mara-db[mysql] 13 | 14 | 15 | Configuration examples 16 | ---------------------- 17 | 18 | .. tabs:: 19 | 20 | .. group-tab:: Default 21 | 22 | .. code-block:: python 23 | 24 | import mara_db.dbs 25 | mara_db.config.databases = lambda: { 26 | 'dwh': mara_db.dbs.MysqlDB( 27 | host='localhost', 28 | user='root', 29 | password='', 30 | database='dwh'), 31 | } 32 | 33 | | 34 | 35 | | 36 | 37 | API reference 38 | ------------- 39 | 40 | This section contains database specific API in the module. 41 | 42 | .. module:: mara_db.mysql 43 | 44 | Configuration 45 | ~~~~~~~~~~~~~ 46 | 47 | .. module:: mara_db.dbs 48 | :noindex: 49 | 50 | .. autoclass:: MysqlDB 51 | :special-members: __init__ 52 | :inherited-members: 53 | :members: 54 | -------------------------------------------------------------------------------- /tests/local_config.py.example: -------------------------------------------------------------------------------- 1 | # This file contains secrets used by the tests 2 | 3 | from mara_db import dbs 4 | 5 | # supported placeholders 6 | # host='DOCKER_IP' will be replaced with the ip address given from pytest-docker 7 | # port=-1 will be replaced with the ip address given from pytest-docker 8 | 9 | POSTGRES_DB = dbs.PostgreSQLDB(host='DOCKER_IP', port=-1, user="mara", password="mara", database="mara") 10 | MSSQL_DB = None # dbs.SQLServerDB(host='DOCKER_IP', port=-1, user='sa', password='YourStrong@Passw0rd', database='master') 11 | MSSQL_SQSH_DB = None # dbs.SqshSQLServerDB(host='DOCKER_IP', port=-1, user='sa', password='YourStrong@Passw0rd', database='master') 12 | MSSQL_SQLCMD_DB = None # dbs.SqlcmdSQLServerDB(host='DOCKER_IP', port=-1, user='sa', password='YourStrong@Passw0rd', database='master', trust_server_certificate=True) 13 | SNOWFLAKE_DB = None #dbs.SnowflakeDB( account='ACCOUNT_IDENTIFER', user='USER', password='PASSWORD', database='SNOWFLAKE_SAMPLE_DATA') 14 | DATABRICKS_DB = None #dbs.DatabricksDB(host='DBSQLCLI_HOST_NAME', http_path='DBSQLCLI_HTTP_PATH', access_token='DBSQLCLI_ACCESS_TOKEN') 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Mara contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | MODULE_NAME=mara_db 2 | 3 | 4 | all: 5 | # builds virtual env. and starts install in it 6 | make .venv/bin/python 7 | make install 8 | 9 | 10 | install: 11 | # install of module 12 | .venv/bin/pip install . 13 | 14 | 15 | test: 16 | make .venv/bin/python 17 | # test of module 18 | .venv/bin/pip install .[test] 19 | make tests/local_config.py 20 | .venv/bin/pytest 21 | 22 | 23 | publish: 24 | # manually publishing the package 25 | .venv/bin/pip install build twine 26 | .venv/bin/python -m build 27 | .venv/bin/twine upload dist/* 28 | 29 | 30 | clean: 31 | # clean up 32 | rm -rf .venv/ build/ dist/ ${MODULE_NAME}.egg-info/ .pytest_cache/ .eggs/ 33 | 34 | 35 | .PYTHON3:=$(shell PATH='$(subst $(CURDIR)/.venv/bin:,,$(PATH))' which python3) 36 | 37 | .venv/bin/python: 38 | mkdir -p .venv 39 | cd .venv && $(.PYTHON3) -m venv --copies --prompt='[$(shell basename `pwd`)/.venv]' . 40 | 41 | .venv/bin/python -m pip install --upgrade pip 42 | 43 | tests/local_config.py: 44 | cp -v tests/local_config.py.example tests/local_config.py 45 | @ >&2 echo '!!! copied tests/local_config.py.example to tests/local_config.py. Please check' 46 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = mara-db 3 | version = attr: mara_db.__version__ 4 | url = https://github.com/mara/mara-db 5 | description = Configuration and monitoring of database connections 6 | long_description = file: README.md 7 | long_description_content_type = text/markdown 8 | author = Mara contributors 9 | license = MIT 10 | 11 | [options] 12 | packages = mara_db 13 | python_requires = >= 3.6 14 | install_requires = 15 | SQLAlchemy>=1.1.5 16 | sqlalchemy-utils>=0.32.14 17 | alembic>=0.8.10 18 | multimethod>=1.0.0 19 | graphviz>=0.8 20 | mara-page>=1.3.0 21 | psycopg2-binary>=2.7.3 22 | 23 | [options.package_data] 24 | mara_db = static/* 25 | 26 | [options.extras_require] 27 | test = 28 | pytest 29 | pytest_click 30 | pytest-docker 31 | pytest-dependency 32 | SQLAlchemy>=1.2.0 33 | bigquery = 34 | google-cloud-bigquery 35 | google-cloud-bigquery-storage 36 | pyarrow 37 | sqlalchemy-bigquery 38 | mssql = pyodbc 39 | mysql = mysqlclient 40 | postgres = psycopg2-binary>=2.7.3 41 | redshift = 42 | psycopg2-binary>=2.7.3 43 | sqlalchemy-redshift 44 | snowflake = snowflake-sqlalchemy 45 | databricks = 46 | databricks-sql-cli 47 | databricks-sql-connector 48 | sqlalchemy-databricks 49 | 50 | [options.entry_points] 51 | mara.commands = 52 | db = mara_db.cli:mara_db 53 | -------------------------------------------------------------------------------- /docs/dbs/Oracle.rst: -------------------------------------------------------------------------------- 1 | Oracle 2 | ====== 3 | 4 | 5 | Installation 6 | ------------ 7 | 8 | You have to make sure that the `Oracle Instant Client `_ (`sqlplus64`) is installed. 9 | 10 | On Mac, follow `these instructions `_. Then `sudo ln -s /usr/local/bin/sqlplus /usr/local/bin/sqlplus64` to make the binary accessible as `sqlplus64`. 11 | 12 | 13 | Configuration examples 14 | ---------------------- 15 | 16 | .. tabs:: 17 | 18 | .. group-tab:: Default 19 | 20 | .. code-block:: python 21 | 22 | import mara_db.dbs 23 | mara_db.config.databases = lambda: { 24 | 'dwh': mara_db.dbs.OracleDB( 25 | host='localhost', 26 | user='root', 27 | password='', 28 | endpoint='oracle-endpoint'), 29 | } 30 | 31 | | 32 | 33 | | 34 | 35 | API reference 36 | ------------- 37 | 38 | This section contains database specific API in the module. 39 | 40 | 41 | Configuration 42 | ~~~~~~~~~~~~~ 43 | 44 | .. module:: mara_db.dbs 45 | :noindex: 46 | 47 | .. autoclass:: OracleDB 48 | :special-members: __init__ 49 | :inherited-members: 50 | :members: 51 | -------------------------------------------------------------------------------- /tests/test_snowflake.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import subprocess 3 | 4 | from mara_db import shell, sqlalchemy_engine 5 | from tests.local_config import SNOWFLAKE_DB 6 | 7 | 8 | if not SNOWFLAKE_DB: 9 | pytest.skip("skipping SnowflakeDB tests: variable SNOWFLAKE_DB not set", allow_module_level=True) 10 | 11 | 12 | def test_snowflake_query_command(): 13 | command = 'echo "SELECT 1" \\\n' 14 | command += ' | ' + shell.query_command(SNOWFLAKE_DB) 15 | assert command 16 | 17 | print(command) 18 | (exitcode, _) = subprocess.getstatusoutput(command) 19 | assert exitcode == 0 20 | 21 | 22 | def test_snowflake_copy_to_stdout(): 23 | command = 'echo "SELECT 1 AS Col1, \'FOO\' AS Col2 UNION ALL SELECT 2, \'BAR\'" \\\n' 24 | command += ' | ' + shell.copy_to_stdout_command(SNOWFLAKE_DB, 25 | csv_format=True, 26 | header=True, 27 | delimiter_char=',') 28 | assert command 29 | 30 | print(command) 31 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 32 | assert exitcode == 0 33 | print(pstdout) 34 | assert pstdout == '''"COL1","COL2" 35 | "1","FOO" 36 | "2","BAR"''' 37 | 38 | 39 | def test_snowflake_sqlalchemy(): 40 | from sqlalchemy import text 41 | engine = sqlalchemy_engine.engine(SNOWFLAKE_DB) 42 | with engine.connect() as con: 43 | con.execute(statement = text("SELECT 1")) 44 | -------------------------------------------------------------------------------- /tests/seed/README.md: -------------------------------------------------------------------------------- 1 | This folder holds seed test files which are used to test mara_db.shell.copy_from_stdin_command 2 | 3 | | File | Encoding | Has header | delimiter_char | Quoted columns | File ending | Has last line closing 4 | | ---------------------------------- | ------------------- | ---------- | -------------- | -------------- | ----------- | ---------------------- 5 | | names_lf.csv | UTF-8 (without BOM) | No | `,` | No | LF | No 6 | | names_lf_header.csv | UTF-8 (without BOM) | Yes | `,` | No | LF | No 7 | | names_lf_lastrow.csv | UTF-8 (without BOM) | No | `,` | No | LF | Yes 8 | | names_lf_lastrow_header.csv | UTF-8 (without BOM) | Yes | `,` | No | LF | Yes 9 | | names_lf_quoted.csv | UTF-8 (without BOM) | No | `,` | Yes | LF | No 10 | | names_lf_quoted_header.csv | UTF-8 (without BOM) | Yes | `,` | Yes | LF | No 11 | | names_lf_quoted_lastrow.csv | UTF-8 (without BOM) | No | `,` | Yes | LF | Yes 12 | | names_lf_quoted_lastrow_header.csv | UTF-8 (without BOM) | Yes | `,` | Yes | LF | Yes 13 | -------------------------------------------------------------------------------- /mara_db/sqlalchemy_engine.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import sqlalchemy.engine 4 | import sqlalchemy.sql.schema 5 | 6 | import mara_db.dbs 7 | 8 | 9 | @functools.singledispatch 10 | def engine(db: object) -> sqlalchemy.engine.Engine: 11 | """ 12 | Returns a sql alchemy engine for a configured database connection 13 | 14 | Args: 15 | db: The database to use (either an alias or a `dbs.DB` object 16 | 17 | Returns: 18 | The generated sqlalchemy engine 19 | 20 | Example: 21 | >>> print(engine('mara')) 22 | Engine(postgresql+psycopg2://None@localhost/mara) 23 | """ 24 | pass 25 | 26 | 27 | @engine.register(str) 28 | def __(alias: str, **_): 29 | return engine(mara_db.dbs.db(alias)) 30 | 31 | 32 | @engine.register(mara_db.dbs.DB) 33 | def __(db: mara_db.dbs.DB, **_): 34 | return sqlalchemy.create_engine(db.sqlalchemy_url) 35 | 36 | 37 | @engine.register(mara_db.dbs.BigQueryDB) 38 | def __(db: mara_db.dbs.BigQueryDB): 39 | # creates bigquery dialect 40 | url = db.sqlalchemy_url 41 | 42 | return sqlalchemy.create_engine(url, 43 | credentials_path=db.service_account_json_file_name, 44 | location=db.location) 45 | 46 | 47 | @engine.register(mara_db.dbs.DatabricksDB) 48 | def __(db: mara_db.dbs.DatabricksDB): 49 | url = db.sqlalchemy_url 50 | 51 | return sqlalchemy.create_engine(url, 52 | connect_args={ 53 | "http_path": db.http_path 54 | }) 55 | -------------------------------------------------------------------------------- /tests/test_databricks.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import subprocess 3 | 4 | from mara_db import shell, sqlalchemy_engine 5 | from tests.local_config import DATABRICKS_DB 6 | 7 | 8 | if not DATABRICKS_DB: 9 | pytest.skip("skipping DatabricksDB tests: variable DATABRICKS_DB not set", allow_module_level=True) 10 | 11 | 12 | def test_databricks_query_command(): 13 | command = 'echo "SELECT 1" \\\n' 14 | command += ' | ' + shell.query_command(DATABRICKS_DB) 15 | assert command 16 | 17 | print(command) 18 | (exitcode, _) = subprocess.getstatusoutput(command) 19 | assert exitcode == 0 20 | 21 | 22 | def test_databricks_copy_to_stdout(): 23 | command = 'echo "SELECT 1 AS Col1, \'FOO\' AS Col2 UNION ALL SELECT 2, \'BAR\'" \\\n' 24 | command += ' | ' + shell.copy_to_stdout_command(DATABRICKS_DB, 25 | csv_format=True, 26 | header=True, 27 | delimiter_char=',') 28 | assert command 29 | 30 | print(command) 31 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 32 | assert exitcode == 0 33 | print(pstdout) 34 | assert pstdout == '''Col1,Col2 35 | 1,FOO 36 | 2,BAR''' 37 | 38 | 39 | def test_databricks_sqlalchemy(): 40 | from sqlalchemy import text 41 | engine = sqlalchemy_engine.engine(DATABRICKS_DB) 42 | with engine.connect() as con: 43 | con.execute(statement = text("SELECT 1")) 44 | 45 | 46 | def test_databricks_connect(): 47 | """ 48 | A simple test to check if the connect API works. 49 | """ 50 | from .db_test_helper import _test_connect 51 | _test_connect(DATABRICKS_DB) 52 | 53 | 54 | def test_databricks_cursor_context(): 55 | """ 56 | A simple test to check if the cursor context of the db works. 57 | """ 58 | from .db_test_helper import _test_cursor_context 59 | _test_cursor_context(DATABRICKS_DB) 60 | -------------------------------------------------------------------------------- /docs/dbs/PostgreSQL.rst: -------------------------------------------------------------------------------- 1 | PostgreSQL 2 | ========== 3 | 4 | PostgreSQL is the main database engines which is currently installed by default. 5 | 6 | .. warning:: 7 | From version 5 the requirements for PostgreSQL will not be installed by default anymore. 8 | Please make sure to include extras ``postgres`` in your requirements.txt file, see below. 9 | 10 | 11 | Installation 12 | ------------ 13 | 14 | Use extras `postgres` to install all required packages. 15 | 16 | .. code-block:: shell 17 | 18 | $ pip install mara-db[postgres] 19 | 20 | The ``psql`` client is required which can be installed on Ubuntu/Debian via 21 | 22 | .. code-block:: shell 23 | 24 | $ sudo apt-get install postgresql-client 25 | 26 | Configuration examples 27 | ---------------------- 28 | 29 | .. tabs:: 30 | 31 | .. group-tab:: Trusted authentication 32 | 33 | .. code-block:: python 34 | 35 | import mara_db.dbs 36 | mara_db.config.databases = lambda: { 37 | 'dwh': mara_db.dbs.PostgreSQLDB( 38 | host='localhost', 39 | user='root', 40 | database='dwh'), 41 | } 42 | 43 | .. group-tab:: Password authentication 44 | 45 | .. code-block:: python 46 | 47 | import mara_db.dbs 48 | mara_db.config.databases = lambda: { 49 | 'dwh': mara_db.dbs.PostgreSQLDB( 50 | host='localhost', 51 | user='root', 52 | password='', 53 | database='dwh'), 54 | } 55 | 56 | | 57 | 58 | | 59 | 60 | API reference 61 | ------------- 62 | 63 | This section contains database specific API in the module. 64 | 65 | Configuration 66 | ~~~~~~~~~~~~~ 67 | 68 | .. module:: mara_db.dbs 69 | :noindex: 70 | 71 | .. autoclass:: PostgreSQLDB 72 | :special-members: __init__ 73 | :inherited-members: 74 | :members: 75 | -------------------------------------------------------------------------------- /tests/mssql/test_mssql_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | For SQL Server we use a special configuration: 3 | - we use a generic 'SQLServerDB' config instance which instanciates the default provider 4 | - we support two different connection modes: via sqsh (SqshSQLServerDB) or via sqlcmd (SqlcmdSQLServerDB) 5 | 6 | To make sure that the config is properly implemented, the following unit tests are added. 7 | """ 8 | import functools 9 | 10 | from mara_db import dbs 11 | 12 | 13 | @functools.singledispatch 14 | def check_dbconfig(db) -> str: 15 | """A test functiontools overloading""" 16 | raise Exception("Not expected to end up here in the test") 17 | 18 | @check_dbconfig.register(dbs.SQLServerDB) 19 | def __(db: dbs.SQLServerDB) -> str: 20 | return 'undefined' 21 | 22 | @check_dbconfig.register(dbs.SqlcmdSQLServerDB) 23 | def __(db: dbs.SqlcmdSQLServerDB) -> str: 24 | return 'sqlcmd' 25 | 26 | @check_dbconfig.register(dbs.SqshSQLServerDB) 27 | def __(db: dbs.SqshSQLServerDB) -> str: 28 | return 'sqsh' 29 | 30 | 31 | def test_mssql_dbconfig(): 32 | """Test the behavior of instancing SQLServerDB""" 33 | 34 | sqlcmd_db = dbs.SqlcmdSQLServerDB(host="localhost") 35 | sqsh_db = dbs.SqshSQLServerDB(host="localhost") 36 | default_db = dbs.SQLServerDB(host="localhost") 37 | 38 | # check if singledispatch uses the right class 39 | assert check_dbconfig(sqlcmd_db) == "sqlcmd" 40 | assert check_dbconfig(sqsh_db) == "sqsh" 41 | assert check_dbconfig(default_db) in ["sqsh", "sqlcmd"] 42 | 43 | # check if all db config instances are detected via 'isinstance(..., SQLServerDB)' 44 | assert isinstance(sqlcmd_db, dbs.SQLServerDB) 45 | assert isinstance(sqsh_db, dbs.SQLServerDB) 46 | assert isinstance(default_db, dbs.SQLServerDB) 47 | 48 | # check that we get 'SqlcmdSQLServerDB' or 'SqshSQLServerDB' when instantiating 'dbs.SQLServerDB(...)' 49 | assert isinstance(default_db, dbs.SqlcmdSQLServerDB) or isinstance(default_db, dbs.SqshSQLServerDB) 50 | -------------------------------------------------------------------------------- /tests/db_test_helper.py: -------------------------------------------------------------------------------- 1 | import sqlalchemy 2 | from mara_db import dbs 3 | 4 | 5 | def db_is_responsive(db: dbs.DB) -> bool: 6 | """Returns True when the DB is available on the given port, otherwise False""" 7 | engine = sqlalchemy.create_engine(db.sqlalchemy_url, pool_pre_ping=True) 8 | 9 | try: 10 | with engine.connect() as conn: 11 | return True 12 | except: 13 | return False 14 | 15 | 16 | def db_replace_placeholders(db: dbs.DB, docker_ip: str, docker_port: int) -> dbs.DB: 17 | """Replaces the internal placeholders with the docker ip and docker port""" 18 | if db.host == 'DOCKER_IP': 19 | db.host = docker_ip 20 | if db.port == -1: 21 | db.port = docker_port 22 | return db 23 | 24 | 25 | """ 26 | Basic tests which can be used for different DB engines. 27 | """ 28 | 29 | def _test_sqlalchemy(db: dbs.DB): 30 | """ 31 | A simple test to check if the SQLAlchemy connection works 32 | """ 33 | from mara_db.sqlalchemy_engine import engine 34 | from sqlalchemy import select 35 | 36 | eng = engine(db) 37 | 38 | with eng.connect() as conn: 39 | # run a SELECT 1. use a core select() so that 40 | # the SELECT of a scalar value without a table is 41 | # appropriately formatted for the backend 42 | assert conn.scalar(select(1)) == 1 43 | 44 | def _test_connect(db: dbs.DB): 45 | connection = dbs.connect(db) 46 | cursor = connection.cursor() 47 | try: 48 | cursor.execute('SELECT 1') 49 | row = cursor.fetchone() 50 | assert row[0] == 1 51 | connection.commit() 52 | except Exception as e: 53 | connection.rollback() 54 | raise e 55 | finally: 56 | cursor.close() 57 | connection.close() 58 | 59 | def _test_cursor_context(db: dbs.DB): 60 | with dbs.cursor_context(db) as cursor: 61 | cursor.execute('SELECT 1') 62 | row = cursor.fetchone() 63 | assert row[0] == 1 64 | -------------------------------------------------------------------------------- /mara_db/static/schema-page.js: -------------------------------------------------------------------------------- 1 | var SchemaPage = function (baseUrl, dbAlias) { 2 | 3 | function localStorageKey(schema) { 4 | return 'db-schema-' + dbAlias + '-' + schema; 5 | } 6 | 7 | $('.schema-checkbox').each(function (n, checkbox) { 8 | if (localStorage.getItem(localStorageKey(checkbox.value)) == 'true') { 9 | checkbox.checked = true; 10 | } 11 | }); 12 | 13 | if (localStorage.getItem('db-schema-hide-columns') == 'true') { 14 | $('#hide-columns-checkbox')[0].checked = true; 15 | } 16 | 17 | if (localStorage.getItem('db-schema-engine')) { 18 | $('#engine').val(localStorage.getItem('db-schema-engine')); 19 | } 20 | 21 | var url = ''; 22 | 23 | function updateUI() { 24 | var selectedSchemas = []; 25 | $('.schema-checkbox').each(function (n, checkbox) { 26 | if (checkbox.checked) { 27 | selectedSchemas.push(checkbox.value); 28 | } 29 | localStorage.setItem(localStorageKey(checkbox.value), checkbox.checked); 30 | }); 31 | localStorage.setItem('db-schema-hide-columns', $('#hide-columns-checkbox')[0].checked); 32 | localStorage.setItem('db-schema-engine', $('#engine').val()); 33 | 34 | if (selectedSchemas.length > 0) { 35 | $('#schema-container').html(spinner()); 36 | url = baseUrl + '/' + selectedSchemas.join('/') + '?engine=' + $('#engine').val(); 37 | if ($('#hide-columns-checkbox')[0].checked) { 38 | url += '&hide-columns=true' 39 | } 40 | loadContentAsynchronously('schema-container', url); 41 | } else { 42 | $('#schema-container').html('No schemas selected'); 43 | } 44 | 45 | } 46 | 47 | function downloadSvg() { 48 | window.location.href = url; 49 | } 50 | 51 | $('.schema-checkbox').change(updateUI); 52 | $('#hide-columns-checkbox').change(updateUI); 53 | $('#engine').change(updateUI); 54 | 55 | updateUI(); 56 | 57 | return {'downloadSvg': downloadSvg} 58 | }; 59 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Python Version 5 | -------------- 6 | 7 | We recommend using the latest version of Python. Mara supports Python 8 | 3.6 and newer. 9 | 10 | Dependencies 11 | ------------ 12 | 13 | These packages will be installed automatically when installing Mara DB. 14 | 15 | * [SQLAlchemy] the Database SQL Toolkit and Object Relation Mapper (ORM) for python 16 | * [SQLAlchemy-Utils] various utility functions, new data types and helpers for SQLAlchemy 17 | * [Alembic] a lightweight database migration tool 18 | * [Multimethod] provides a decorator for adding multiple argument dispatching to functions 19 | * [Graphviz] facilitates the creation and rendering of graph descriptions in the [DOT](https://www.graphviz.org/doc/info/lang.html) language of the [Graphviz](https://www.graphviz.org/) graph drawing software from Python. 20 | * [Mara Page] mara core module for defining pages of Flask-based backends 21 | * [psycopg2-binary] required fro PostgreSQL database support 22 | 23 | [SQLAlchemy]: https://www.sqlalchemy.org/ 24 | [SQLAlchemy-Utils]: https://sqlalchemy-utils.readthedocs.io/ 25 | [Alembic]: https://pygments.org/ 26 | [Multimethod]: https://pypi.org/project/multimethod/ 27 | [Graphviz]: https://graphviz.readthedocs.io/ 28 | [Mara Page]: https://mara-page.readthedocs.io/ 29 | [psycopg2-binary]: https://pypi.org/project/psycopg2-binary/ 30 | 31 | ```{warning} 32 | The package ``psycopg2-binary`` is planned to be removed as default requirement. When using PostgreSQL as database 33 | backend, please use extras ``postgres`` like `mara-db[postgres]` to make sure that the module gets installed. 34 | ``` 35 | 36 | Install Mara DB 37 | --------------- 38 | 39 | To use the library directly, use pip: 40 | 41 | ``` bash 42 | $ pip install mara-db 43 | ``` 44 | 45 | or 46 | 47 | ``` bash 48 | $ pip install git+https://github.com/mara/mara-db.git 49 | ``` 50 | 51 | ```{note} 52 | For most of the database engines additional python packages are required which can be installed via extras. 53 | 54 | For example, for PostgreSQL use 55 | 56 | ``$ pip install mara-db[postgres]`` 57 | 58 | to make sure all additional required packages are installed. 59 | ``` 60 | -------------------------------------------------------------------------------- /docs/dbs/Redshift.rst: -------------------------------------------------------------------------------- 1 | Amazon Redshift 2 | =============== 3 | 4 | .. warning:: 5 | From version 5 the package ``psycopg2-binary``` will not be installed by default anymore. 6 | Please make sure to include extras ``redshift`` in your requirements.txt file, see below. 7 | 8 | 9 | Installation 10 | ------------ 11 | 12 | Use extras `redshift` to install all required packages. 13 | 14 | .. code-block:: shell 15 | 16 | $ pip install mara-db[redshift] 17 | 18 | The ``psql`` client is required which can be installed on Ubuntu/Debian via 19 | 20 | .. code-block:: shell 21 | 22 | $ sudo apt-get install postgresql-client 23 | 24 | To read from STDIN an additional S3 bucket is required as temp storage. You need to install the `awscli `_ package in addition: 25 | 26 | .. code-block:: shell 27 | 28 | $ pip install awscli 29 | 30 | 31 | Configuration examples 32 | ---------------------- 33 | 34 | .. tabs:: 35 | 36 | .. group-tab:: Default 37 | 38 | .. code-block:: python 39 | 40 | import mara_db.dbs 41 | mara_db.config.databases = lambda: { 42 | 'dwh': mara_db.dbs.RedshiftDB( 43 | host='localhost', 44 | user='root', 45 | password='', 46 | database='dwh'), 47 | } 48 | 49 | .. group-tab:: With S3 bucket 50 | 51 | .. code-block:: python 52 | 53 | import mara_db.dbs 54 | mara_db.config.databases = lambda: { 55 | 'dwh': mara_db.dbs.RedshiftDB( 56 | host='localhost', 57 | user='root', 58 | password='', 59 | database='dwh', 60 | aws_access_key_id='..., 61 | aws_secret_access_key='...', 62 | =aws_s3_bucket_name='my-s3-bucket'), 63 | } 64 | 65 | | 66 | 67 | | 68 | 69 | API reference 70 | ------------- 71 | 72 | This section contains database specific API in the module. 73 | 74 | 75 | Configuration 76 | ~~~~~~~~~~~~~ 77 | 78 | .. module:: mara_db.dbs 79 | :noindex: 80 | 81 | .. autoclass:: RedshiftDB 82 | :special-members: __init__ 83 | :inherited-members: 84 | :members: 85 | -------------------------------------------------------------------------------- /docs/dbs/Snowflake.rst: -------------------------------------------------------------------------------- 1 | Snowflake 2 | ========= 3 | 4 | 5 | Installation 6 | ------------ 7 | 8 | Use extras `snowflake` to install all required packages. 9 | 10 | .. code-block:: shell 11 | 12 | $ pip install mara-db[snowflake] 13 | 14 | The official `snowsql` client is required. See the `Installing SnowSQL `_ page for installation details. 15 | 16 | 17 | Configuration examples 18 | ---------------------- 19 | 20 | .. tabs:: 21 | 22 | .. group-tab:: Use account 23 | 24 | .. code-block:: python 25 | 26 | import mara_db.dbs 27 | mara_db.config.databases = lambda: { 28 | 'dwh': mara_db.dbs.SnowflakeDB( 29 | account='kaXXXXX.regio.cloud', 30 | user='', 31 | password='', 32 | database='dwh'), 33 | } 34 | 35 | .. group-tab:: Private key file 36 | 37 | .. code-block:: python 38 | 39 | import mara_db.dbs 40 | mara_db.config.databases = lambda: { 41 | 'dwh': mara_db.dbs.BigQueryDB( 42 | account='kaXXXXX.regio.cloud', 43 | user='', 44 | private_key_file='/rsa_key.p8', 45 | private_key_passphrase='', 46 | database='dwh'), 47 | } 48 | 49 | .. group-tab:: Local connection configuration 50 | 51 | You can configure a named connection in the snowsql config file. See `here `_. 52 | 53 | .. code-block:: python 54 | 55 | import mara_db.dbs 56 | mara_db.config.databases = lambda: { 57 | 'dwh': mara_db.dbs.BigQueryDB( 58 | connection='my_example_connection', 59 | database='dwh'), 60 | } 61 | 62 | | 63 | 64 | | 65 | 66 | API reference 67 | ------------- 68 | 69 | This section contains database specific API in the module. 70 | 71 | 72 | Configuration 73 | ~~~~~~~~~~~~~ 74 | 75 | .. module:: mara_db.dbs 76 | :noindex: 77 | 78 | .. autoclass:: SnowflakeDB 79 | :special-members: __init__ 80 | :inherited-members: 81 | :members: 82 | -------------------------------------------------------------------------------- /docs/dbs/Databricks.rst: -------------------------------------------------------------------------------- 1 | Databricks 2 | ========== 3 | 4 | 5 | Installation 6 | ------------ 7 | 8 | Use extras `databricks` to install all required packages. 9 | 10 | .. code-block:: shell 11 | 12 | $ pip install mara-db[databricks] 13 | 14 | The official `dbsqlcli` client is required. See the `Install the Databricks SQL CLI `_ page for installation details. 15 | 16 | 17 | Configuration examples 18 | ---------------------- 19 | 20 | .. tabs:: 21 | 22 | .. group-tab:: Use access token 23 | 24 | .. code-block:: python 25 | 26 | import mara_db.dbs 27 | mara_db.config.databases = lambda: { 28 | 'dwh': mara_db.dbs.DatabricksDB( 29 | host='dbc-a1b2345c-d6e78.cloud.databricks.com', 30 | http_path='/sql/1.0/warehouses/1abc2d3456e7f890a', 31 | access_token='dapi1234567890b2cd34ef5a67bc8de90fa12b'), 32 | } 33 | 34 | .. group-tab:: Environment variables 35 | 36 | .. code-block:: python 37 | 38 | import mara_db.dbs 39 | mara_db.config.databases = lambda: { 40 | 'dwh': mara_db.dbs.DatabricksDB(), 41 | } 42 | 43 | You need to define the environment variables `DBSQLCLI_HOST_NAME`, `DBSQLCLI_HTTP_PATH` and `DBSQLCLI_ACCESS_TOKEN`. See as well `Environment variables `_ 44 | 45 | .. group-tab:: Settings file 46 | 47 | .. code-block:: python 48 | 49 | import mara_db.dbs 50 | mara_db.config.databases = lambda: { 51 | 'dwh': mara_db.dbs.DatabricksDB(), 52 | } 53 | 54 | You need to define the database connection in the `dbsqlclirc` settings file. See as well `Settings file `_. Note that using a custom settings file is currently not supported in Mara. 55 | 56 | | 57 | 58 | | 59 | 60 | API reference 61 | ------------- 62 | 63 | This section contains database specific API in the module. 64 | 65 | .. module:: mara_db.databricks 66 | 67 | Configuration 68 | ~~~~~~~~~~~~~ 69 | 70 | .. module:: mara_db.dbs 71 | :noindex: 72 | 73 | .. autoclass:: DatabricksDB 74 | :special-members: __init__ 75 | :inherited-members: 76 | :members: 77 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'Mara DB' 21 | copyright = '2017-2022, Mara contributors' 22 | author = 'Mara contributors' 23 | 24 | # The short X.Y version. 25 | from mara_db import __version__ 26 | version = __version__ 27 | # The full version, including alpha/beta/rc tags 28 | release = version 29 | 30 | 31 | # -- General configuration --------------------------------------------------- 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 35 | # ones. 36 | extensions = [ 37 | 'sphinx.ext.autodoc', 38 | 'sphinx_tabs.tabs', 39 | 'sphinxcontrib.napoleon', 40 | 'myst_parser', 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # List of patterns, relative to source directory, that match files and 47 | # directories to ignore when looking for source files. 48 | # This pattern also affects html_static_path and html_extra_path. 49 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 50 | 51 | 52 | # -- Options for HTML output ------------------------------------------------- 53 | 54 | # The theme to use for HTML and HTML Help pages. See the documentation for 55 | # a list of builtin themes. 56 | # 57 | html_theme = 'alabaster' 58 | 59 | # Add any paths that contain custom static files (such as style sheets) here, 60 | # relative to this directory. They are copied after the builtin static files, 61 | # so a file named "default.css" will overwrite the builtin "default.css". 62 | html_static_path = ['_static'] 63 | html_favicon = "_static/favicon.ico" 64 | html_logo = "_static/mara-animal.jpg" 65 | html_title = f"Mara DB Documentation ({version})" 66 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. rst-class:: hide-header 2 | 3 | Mara DB documentation 4 | ======================= 5 | 6 | Welcome to Mara DB’s documentation. This is one of the core modules of the `Mara Framework `_ 7 | for configuring and accessing multiple databases. Decouples the use of databases and their configuration by using "aliases" for databases. 8 | 9 | The module ``mara_db.dbs`` contains abstract database configurations for various database backends. The database connections of a project 10 | are configured by overwriting the ``databases`` function in ``mara_db.config``. 11 | 12 | .. code-block:: python 13 | 14 | import mara_db.config 15 | import mara_db.dbs 16 | 17 | ## configure database connections for different aliases 18 | mara_db.config.databases = lambda: { 19 | 'mara': mara_db.dbs.PostgreSQLDB(host='localhost', user='root', database='mara'), 20 | 'dwh': mara_db.dbs.PostgreSQLDB(database='dwh'), 21 | 'source-1': mara_db.dbs.MysqlDB(host='some-localhost', database='my_app', user='dwh'), 22 | 'source-2': mara_db.dbs.SQLServerDB(user='dwh_read', password='123abc', database='db1', host='some-sql-server') 23 | } 24 | 25 | ## access individual database configurations with `dbs.db`: 26 | print(mara_db.dbs.db('mara')) 27 | # -> 28 | 29 | 30 | User's Guide 31 | ------------ 32 | 33 | This part of the documentation focuses on step-by-step instructions how to use this module. 34 | 35 | .. toctree:: 36 | :maxdepth: 2 37 | 38 | installation 39 | config 40 | 41 | 42 | Databases 43 | --------- 44 | 45 | This section focuses on the supported database engines. 46 | 47 | .. toctree:: 48 | :maxdepth: 2 49 | 50 | databases-overview 51 | dbs/PostgreSQL 52 | dbs/Redshift 53 | dbs/BigQuery 54 | dbs/Databricks 55 | dbs/Oracle 56 | dbs/SQLServer 57 | dbs/Mysql 58 | dbs/Snowflake 59 | dbs/SQLite 60 | 61 | 62 | CLI commands 63 | ------------ 64 | 65 | When you are looking at available CLI commands, here you are at the right place. 66 | 67 | .. toctree:: 68 | :maxdepth: 2 69 | 70 | cli 71 | 72 | 73 | API Reference 74 | ------------- 75 | 76 | If you are looking for information on a specific function, class or 77 | method, this part of the documentation is for you. 78 | 79 | .. toctree:: 80 | :maxdepth: 2 81 | 82 | api 83 | 84 | 85 | Additional Notes 86 | ---------------- 87 | 88 | Legal information and changelog are here for the interested. 89 | 90 | .. toctree:: 91 | :maxdepth: 2 92 | 93 | license 94 | changes 95 | -------------------------------------------------------------------------------- /mara_db/formats.py: -------------------------------------------------------------------------------- 1 | """Different formats for piping""" 2 | from typing import Optional 3 | 4 | 5 | class Format: 6 | """Base format definition""" 7 | 8 | def __repr__(self) -> str: 9 | return (f'<{self.__class__.__name__}: ' 10 | + ', '.join([f'{var}={getattr(self, var)}' 11 | for var in vars(self) if getattr(self, var)]) 12 | + '>') 13 | 14 | 15 | class NativeFormat(Format): 16 | """Use the native format of e.g. a database.""" 17 | def __init__(self): 18 | pass 19 | 20 | 21 | class CsvFormat(Format): 22 | """ 23 | CSV file format. See https://tools.ietf.org/html/rfc4180 24 | """ 25 | def __init__(self, delimiter_char: str = ',', quote_char: Optional[str] = None, header: bool = False, footer: bool = False, null_value_string: Optional[str] = None): 26 | """ 27 | CSV file format. See https://tools.ietf.org/html/rfc4180 28 | 29 | Args: 30 | delimiter_char: The character that separates columns 31 | quote_char: The character for quoting strings 32 | header: Whether a csv header with the column name(s) is part of the CSV file. 33 | footer: Whether a footer will be included or not. False by default. 34 | null_value_string: The string used to indicate NULL. 35 | """ 36 | self.delimiter_char = delimiter_char or ',' 37 | self.quote_char = quote_char 38 | self.header = header or False 39 | self.footer = footer or False 40 | self.null_value_string = null_value_string 41 | 42 | 43 | class JsonlFormat(Format): 44 | """New line delimited JSON stream. See https://en.wikipedia.org/wiki/JSON_streaming""" 45 | def __init__(self): 46 | pass 47 | 48 | 49 | class AvroFormat(Format): 50 | """Apache Avro""" 51 | def __init__(self): 52 | pass 53 | 54 | 55 | class ParquetFormat(Format): 56 | """Apache Parquet""" 57 | def __init__(self): 58 | pass 59 | 60 | 61 | class OrcFormat(Format): 62 | """Apache ORC""" 63 | def __init__(self): 64 | pass 65 | 66 | 67 | def _check_format_with_args_used(pipe_format: Format, header: Optional[bool] = None, footer: Optional[bool] = None, delimiter_char: Optional[str] = None, 68 | csv_format: Optional[bool] = None, quote_char: Optional[str] = None, null_value_string: Optional[str] = None): 69 | if pipe_format: 70 | assert all(v is None for v in [header, footer, delimiter_char, csv_format, quote_char, null_value_string]), "You cannot pass format and an old parameter (header, footer, delimiter, csv_format, quote_char, null_value_string) at the same time" 71 | 72 | 73 | def _get_format_from_args(header: Optional[bool] = None, footer: Optional[bool] = None, delimiter_char: Optional[str] = None, csv_format: Optional[bool] = None, 74 | quote_char: Optional[str] = None, null_value_string: Optional[str] = None) -> Format: 75 | """A internal method handling old parameter settings""" 76 | if csv_format or delimiter_char and csv_format is None: 77 | return CsvFormat(delimiter_char=delimiter_char, 78 | quote_char=quote_char, 79 | header=header, 80 | footer=footer, 81 | null_value_string=null_value_string) 82 | else: 83 | return NativeFormat() 84 | -------------------------------------------------------------------------------- /docs/dbs/BigQuery.rst: -------------------------------------------------------------------------------- 1 | Google Big Query 2 | ================ 3 | 4 | Optionally, for loading data from files into BigQuery, the `gcloud_gcs_bucket_name` can be specified in the database initialization. 5 | This will use the Google Cloud Storage bucket specified as cache for loading data and over-coming potential limitations. 6 | For more see `loading-data `_. 7 | By default, files will directly loaded locally as described in `loading-local-data `_. 8 | 9 | Installation 10 | ------------ 11 | 12 | Use extras `bigquery` to install all required packages. 13 | 14 | .. code-block:: shell 15 | 16 | $ pip install mara-db[bigquery] 17 | 18 | The official `bq` and `gcloud` clients are required. 19 | See the `Google Cloud SDK `_ page for installation details. 20 | 21 | Enabling the BigQuery API and Service account JSON credentials are also required as listed 22 | in the official documentation `here `_. 23 | 24 | One time authentication of the service-account used: 25 | 26 | .. code-block:: bash 27 | 28 | $ gcloud auth activate-service-account --key-file='path-to/service-account.json' 29 | 30 | To read from STDIN an additional Google Cloud Storage bucket is required as temp storage. 31 | 32 | Configuration examples 33 | ---------------------- 34 | 35 | .. tabs:: 36 | 37 | .. group-tab:: Service account 38 | 39 | .. code-block:: python 40 | 41 | import mara_db.dbs 42 | mara_db.config.databases = lambda: { 43 | 'dwh': mara_db.dbs.BigQueryDB( 44 | service_account_json_file_name='service-account.json', 45 | location='EU', 46 | project='my-project-name', 47 | dataset='dwh'), 48 | } 49 | 50 | .. group-tab:: ... with GSC bucket 51 | 52 | .. code-block:: python 53 | 54 | import mara_db.dbs 55 | mara_db.config.databases = lambda: { 56 | 'dwh': mara_db.dbs.BigQueryDB( 57 | service_account_json_file_name='service-account.json', 58 | location='EU', 59 | project='my-project-name', 60 | dataset='dwh', 61 | gcloud_gcs_bucket_name='my-temp-bucket'), 62 | } 63 | 64 | | 65 | 66 | | 67 | 68 | API reference 69 | ------------- 70 | 71 | This section contains database specific API in the module. 72 | 73 | .. module:: mara_db.bigquery 74 | 75 | Configuration 76 | ~~~~~~~~~~~~~ 77 | 78 | .. module:: mara_db.dbs 79 | :noindex: 80 | 81 | .. autoclass:: BigQueryDB 82 | :special-members: __init__ 83 | :inherited-members: 84 | :members: 85 | 86 | 87 | General helper functions 88 | ~~~~~~~~~~~~~~~~~~~~~~~~ 89 | 90 | .. module:: mara_db.bigquery 91 | :noindex: 92 | 93 | .. autofunction:: bigquery_credentials 94 | 95 | .. autofunction:: bigquery_client 96 | 97 | Data modelling helper functions 98 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 99 | 100 | .. module:: mara_db.bigquery 101 | :noindex: 102 | 103 | .. autofunction:: create_bigquery_table_from_postgresql_query 104 | 105 | .. autofunction:: replace_dataset 106 | -------------------------------------------------------------------------------- /tests/mssql/test_mssql_sqsh.py_: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | import subprocess 4 | import typing as t 5 | 6 | from mara_db import dbs, shell 7 | 8 | from ..command_helper import * 9 | from ..db_test_helper import db_is_responsive 10 | from .test_mssql import MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE 11 | 12 | # make sure that the tests of 'test_mssql' are run before the tests in this file: 13 | from .test_mssql import test_mssql_ddl 14 | 15 | 16 | @pytest.fixture(scope="session") 17 | def mssql_sqsh_db(docker_ip, docker_services) -> t.Tuple[str, int]: 18 | """Ensures that SQL Server server is running on docker.""" 19 | 20 | docker_port = docker_services.port_for("mssql", 1433) 21 | db = dbs.SqshSQLServerDB(host=docker_ip, port=docker_port, user=MSSQL_USER, password=MSSQL_PASSWORD, database=MSSQL_DATABASE) 22 | 23 | # here we need to wait until the SQL Server port is available. 24 | docker_services.wait_until_responsive( 25 | timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db) 26 | ) 27 | 28 | return db 29 | 30 | 31 | @pytest.mark.dependency() 32 | def test_mssql_sqsh_shell_query_command(mssql_sqsh_db): 33 | command = execute_sql_statement_command(mssql_sqsh_db, "SELECT 1") 34 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 35 | print(pstdout) 36 | assert exitcode == 0 37 | 38 | 39 | @pytest.mark.dependency() 40 | def test_mssql_sqsh_shell_copy_to_stout(mssql_sqsh_db): 41 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'") 42 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 43 | print(pstdout) 44 | assert exitcode == 0 45 | assert pstdout == '''1,FOO 46 | 2,BAR''' 47 | 48 | 49 | @pytest.mark.dependency(depends=["test_mssql_sqsh_shell_query_command", "test_mssql_sqsh_shell_copy_to_stout"]) 50 | def test_mssql_sqsh_shell_copy_from_stdin_csv_noheader(mssql_sqsh_db): 51 | # reading csv file... 52 | names_csv_file_path = str((pathlib.Path(__file__).parent / '../seed/names.csv').absolute()) 53 | command = f'cat {names_csv_file_path} \\\n' 54 | command += ' | ' + shell.copy_from_stdin_command(mssql_sqsh_db,target_table='names',csv_format=True,skip_header=False)#,delimiter_char=',') 55 | print(command) 56 | 57 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 58 | print(pstdout) 59 | assert exitcode == 0 60 | 61 | # check if writing was successful 62 | 63 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT COUNT(*) FROM names") 64 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 65 | assert exitcode == 0 66 | assert pstdout == "10" 67 | 68 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT name FROM names WHERE id = 1") 69 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 70 | assert exitcode == 0 71 | assert pstdout == "Elinor Meklit" 72 | 73 | 74 | @pytest.mark.dependency(depends=["test_mssql_sqsh_shell_query_command", "test_mssql_sqsh_shell_copy_to_stout", "test_mssql_ddl"]) 75 | def test_mssql_sqsh_shell_copy_from_stdin_csv_skipheader(mssql_sqsh_db): 76 | # reading csv file... 77 | names_csv_file_path = str((pathlib.Path(__file__).parent / '../seed/names_header.csv').absolute()) 78 | command = f'cat {names_csv_file_path} \\\n' 79 | command += ' | ' + shell.copy_from_stdin_command(mssql_sqsh_db,target_table='names_with_header',csv_format=True,skip_header=True,delimiter_char=',') 80 | print(command) 81 | 82 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 83 | print(pstdout) 84 | assert exitcode == 0 85 | 86 | # check if writing was successful 87 | 88 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT COUNT(*) FROM names_with_header") 89 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 90 | assert exitcode == 0 91 | assert pstdout == "10" 92 | 93 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT name FROM names_with_header WHERE id = 1") 94 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 95 | assert exitcode == 0 96 | assert pstdout == "Elinor Meklit" 97 | -------------------------------------------------------------------------------- /docs/dbs/SQLServer.rst: -------------------------------------------------------------------------------- 1 | Microsoft SQL Server 2 | ==================== 3 | 4 | There are two ways to use SQL Server with mara: 5 | 6 | 1. using the official MSSQL Tools for SQL Server on linux (`sqlcmd`, `bcp`) 7 | 2. using the linux sql client tool `sqsh` (legacy) 8 | 9 | Currently by default `sqsh` is used. This will be changed in a future version to the official MSSQL Tools from Microsoft. You can explicitly 10 | specify the client tool you want to use, see below. 11 | 12 | 13 | Prerequisites 14 | ------------- 15 | 16 | On Ubuntu/Debian make sure you have the ODBC header files before installing 17 | 18 | .. code-block:: shell 19 | 20 | $ sudo apt install unixodbc-dev 21 | 22 | The python module `pyodbc `_ requires a ODBC driver to be installed. By default Microsoft ODBC Driver 17 for SQL Server is used. You can find the installation guide here: 23 | `Installing the Microsoft ODBC Driver for SQL Server (Linux) `_. 24 | 25 | 26 | Installation 27 | ------------ 28 | 29 | Use extras `mssql` to install all required packages. 30 | 31 | .. code-block:: shell 32 | 33 | $ pip install mara-db[mssql] 34 | 35 | Use MSSQL Tools 36 | ~~~~~~~~~~~~~~~ 37 | 38 | To see how to install the MSSQL Tools, follow this guide: 39 | `Install the SQL Server command-line tools sqlcmd and bcp on Linux `_ 40 | 41 | 42 | Use sqsh 43 | ~~~~~~~~ 44 | To install the `sqsh` shell tool, see here https://sourceforge.net/projects/sqsh/. Usually messy to get working. 45 | On ubuntu, use http://ppa.launchpad.net/jasc/sqsh/ubuntu/ backport. On Mac, try the homebrew version or install from source. 46 | 47 | 48 | Configuration examples 49 | ---------------------- 50 | 51 | .. tabs:: 52 | 53 | .. group-tab:: Default 54 | 55 | .. code-block:: python 56 | 57 | import mara_db.dbs 58 | mara_db.config.databases = lambda: { 59 | 'dwh': mara_db.dbs.SQLServerDB( 60 | host='localhost', 61 | user='sa', 62 | password='', 63 | database='dwh'), 64 | } 65 | 66 | # explicitly define to use the MSSQL Tools (RECOMMENDED) 67 | mara_db.config.databases = lambda: { 68 | 'dwh': mara_db.dbs.SqlcmdSQLServerDB( 69 | host='localhost', 70 | user='sa', 71 | password='', 72 | database='dwh'), 73 | } 74 | 75 | # explicitly define to use sqsh 76 | mara_db.config.databases = lambda: { 77 | 'dwh': mara_db.dbs.SqshSQLServerDB( 78 | host='localhost', 79 | user='sa', 80 | password='', 81 | database='dwh'), 82 | } 83 | 84 | .. group-tab:: Use ODBC Driver 18 85 | 86 | .. code-block:: python 87 | 88 | import mara_db.dbs 89 | mara_db.config.databases = lambda: { 90 | 'dwh': mara_db.dbs.SQLServerDB( 91 | host='localhost', 92 | user='sa', 93 | password='', 94 | database='dwh', 95 | odbc_driver='ODBC Driver 18 for SQL Server'), 96 | } 97 | 98 | # explicitly define to use the MSSQL Tools (RECOMMENDED) 99 | mara_db.config.databases = lambda: { 100 | 'dwh': mara_db.dbs.SqlcmdSQLServerDB( 101 | host='localhost', 102 | user='sa', 103 | password='', 104 | database='dwh', 105 | odbc_driver='ODBC Driver 18 for SQL Server'), 106 | } 107 | 108 | # explicitly define to use sqsh 109 | mara_db.config.databases = lambda: { 110 | 'dwh': mara_db.dbs.SqshSQLServerDB( 111 | host='localhost', 112 | user='sa', 113 | password='', 114 | database='dwh', 115 | odbc_driver='ODBC Driver 18 for SQL Server'), 116 | } 117 | 118 | | 119 | 120 | | 121 | 122 | API reference 123 | ------------- 124 | 125 | This section contains database specific API in the module. 126 | 127 | .. module:: mara_db.sqlserver 128 | 129 | Configuration 130 | ~~~~~~~~~~~~~ 131 | 132 | .. module:: mara_db.dbs 133 | :noindex: 134 | 135 | .. autoclass:: SQLServerDB 136 | :special-members: __init__ 137 | :inherited-members: 138 | :members: 139 | 140 | .. autoclass:: SqlcmdSQLServerDB 141 | :special-members: __init__ 142 | :inherited-members: 143 | :members: 144 | 145 | .. autoclass:: SqshSQLServerDB 146 | :special-members: __init__ 147 | :inherited-members: 148 | :members: 149 | -------------------------------------------------------------------------------- /docs/databases-overview.md: -------------------------------------------------------------------------------- 1 | Overview 2 | ======== 3 | 4 | The following database engines are supported: 5 | 6 | | Database | Configuration class | SQLAlchemy Engine / dialect | 7 | | ------------------------- | ------------------- | --------------------------- | 8 | | [PostgreSQL] | PostgreSQLDB | postgresql+psycopg2 9 | | [Amazon Redshift] | RedshiftDB | postgresql+psycopg2 10 | | [Google Big Query] | BigQueryDB | bigquery 11 | | [Databricks] | DatabricksDB | databricks+connector 12 | | [MariaDB] | MysqlDB | - 13 | | [MySQL] | MysqlDB | - 14 | | [Microsoft SQL Server] | SQLServerDB | mssql+pyodbc 15 | | [Azure Synapse Analytics] | SQLServerDB | mssql+pyodbc 16 | | [Oracle Database] | OracleDB | - 17 | | [Snowflake] | SnowflakeDB | snowflake 18 | | [SQLite] | SQLiteDB | sqlite 19 | 20 | 21 | [PostgreSQL]: https://www.postgresql.org/ 22 | [Amazon Redshift]: https://aws.amazon.com/de/redshift/ 23 | [Google Big Query]: https://cloud.google.com/bigquery 24 | [Databricks]: https://www.databricks.com/ 25 | [MariaDB]: https://mariadb.com/ 26 | [MySQL]: https://www.mysql.com/ 27 | [Oracle Database]: https://www.oracle.com/database/ 28 | [Snowflake]: https://www.snowflake.com/ 29 | [SQLite]: https://www.sqlite.org/ 30 | [Microsoft SQL Server]: https://www.microsoft.com/en-us/sql-server 31 | [Azure Synapse Analytics]: https://azure.microsoft.com/en-us/services/synapse-analytics/ 32 | 33 | 34 | Function support matrix 35 | ----------------------- 36 | 37 | Shows which functions are supported with which database engine: 38 | 39 | | Configuration class | Querying | Write STDOUT | Read STDIN | DB-API 2.0 | UI schema support | 40 | | ------------------- | -------- | ------------ | ---------- | ---------- | ----------------- | 41 | | PostgreSQLDB | Yes | Yes | Yes | Yes | Yes 42 | | RedshiftDB | Yes | Yes | Yes | Yes | Yes 43 | | BigQueryDB | Yes | Yes | Yes | Yes | *no foreign key support by engine* 44 | | DatabricksDB | Yes | Yes | - | Yes | 45 | | MysqlDB | Yes | Yes | - | Yes | Yes 46 | | SQLServerDB | Yes | Yes | Yes | Yes | Yes 47 | | OracleDB | Yes | Yes | - | - | 48 | | SnowflakeDB | Yes | Yes | - | - | 49 | | SQLiteDB | Yes | Yes | - | Yes | 50 | 51 | *Write STDOUT* gives the possibility to write a query to STDOUT 52 | 53 | *Read STDIN* gives the possiblity to read a file to a predefined SQL table 54 | 55 | 56 | Format support 57 | -------------- 58 | 59 | Shows the formats supported per database engine 60 | 61 | ### Read STDIN 62 | 63 | | Configuration class | CSV | JsonL | Avro | Parquet | ORC | 64 | | ------------------- | ----| ----- | ---- | ------- | --- | 65 | | PostgreSQLDB | Yes | Yes | - | - | - | 66 | | RedshiftDB | Yes | Yes | - | - | - | 67 | | BigQueryDB | Yes | Yes | Yes | Yes | Yes | 68 | | SQLServerDB | Yes | - | - | - | - | 69 | 70 | 71 | ### Write STDOUT 72 | 73 | | Configuration class | CSV | JsonL | Avro | Parquet | ORC | 74 | | ------------------- | ----| ----- | ---- | ------- | --- | 75 | | PostgreSQLDB | Yes | - | - | - | - | 76 | | RedshiftDB | Yes | - | - | - | - | 77 | | BigQueryDB | Yes | - | - | - | - | 78 | | DatabricksDB | Yes | - | - | - | - | 79 | | MysqlDB | Yes | - | - | - | - | 80 | | SQLServerDB | Yes | - | - | - | - | 81 | | OracleDB | Yes | - | - | - | - | 82 | | SnowflakeDB | Yes | - | - | - | - | 83 | | SQLiteDB | Yes | - | - | - | - | 84 | 85 | 86 | Copy matrix 87 | ----------- 88 | 89 | Shows which copy operations are implemented by default. 90 | 91 | | from / to | PostgreSQLDB | RedshiftDB | BigQueryDB | DatabricksDB | MysqlDB | SQLServerDB | OracleDB | SnowflakeDB | SQLiteDB | 92 | | ------------ | ------------ | ---------- | ---------- | ------------ | ------- | ----------- | -------- | ----------- | -------- | 93 | | PostgreSQLDB | Yes | Yes | Yes | - | - | - | - | - | - | 94 | | RedshiftDB | Yes | Yes | Yes | - | - | - | - | - | - | 95 | | BigQueryDB | Yes | Yes | - | - | - | - | - | - | - | 96 | | DatabricksDB | - | - | - | - | - | - | - | - | - | 97 | | MysqlDB | Yes | Yes | Yes | - | - | - | - | - | - | 98 | | SQLServerDB | Yes | Yes | Yes | - | - | - | - | - | - | 99 | | OracleDB | Yes | Yes | Yes | - | - | - | - | - | - | 100 | | SnowflakeDB | - | - | - | - | - | - | - | - | - | 101 | | SQLiteDB | Yes | Yes | Yes | - | - | - | - | - | - | 102 | -------------------------------------------------------------------------------- /tests/seed/accounts_lf.jsonl: -------------------------------------------------------------------------------- 1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"} 2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"} 3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"} 4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"} 5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"} 6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"} -------------------------------------------------------------------------------- /tests/seed/accounts_crlf.jsonl: -------------------------------------------------------------------------------- 1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"} 2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"} 3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"} 4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"} 5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"} 6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"} -------------------------------------------------------------------------------- /tests/seed/accounts_lf_lastrow.jsonl: -------------------------------------------------------------------------------- 1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"} 2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"} 3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"} 4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"} 5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"} 6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"} 7 | -------------------------------------------------------------------------------- /tests/seed/accounts_crlf_lastrow.jsonl: -------------------------------------------------------------------------------- 1 | {"_id":"63f4e20bc595c039ae346f3d","index":0,"guid":"cfdb98f5-97d9-49e2-9e58-70e2b2008a86","isActive":true,"balance":"$3,442.22","picture":"http://placehold.it/32x32","age":31,"eyeColor":"brown","name":"Harding Melton","gender":"male","company":"ZIORE","email":"hardingmelton@ziore.com","phone":"+1 (876) 570-3086","address":"240 Louise Terrace, Nescatunga, West Virginia, 3694","about":"Eiusmod dolor enim sit tempor mollit anim laboris proident duis voluptate. Consequat non in commodo esse ut ex ut ut aute. Do tempor irure ad cillum ea ea qui sint deserunt aliqua duis Lorem proident irure. Ut commodo eu elit id ut commodo sunt voluptate.\r\n","registered":"2015-03-10T01:06:59 -01:00","latitude":-42.546453,"longitude":55.051751,"tags":["cupidatat","consectetur","nisi","commodo","irure","sint","exercitation"],"friends":[{"id":0,"name":"Carly Francis"},{"id":1,"name":"Daugherty Mccall"},{"id":2,"name":"Ortiz Howe"}],"greeting":"Hello, Harding Melton! You have 6 unread messages.","favoriteFruit":"apple"} 2 | {"_id":"63f4e20b0ee13f29c71af014","index":1,"guid":"d3874525-fb7a-46e8-9d94-0fd6b12c0903","isActive":false,"balance":"$3,913.68","picture":"http://placehold.it/32x32","age":36,"eyeColor":"green","name":"Ruiz Castillo","gender":"male","company":"VURBO","email":"ruizcastillo@vurbo.com","phone":"+1 (827) 461-3371","address":"848 Moore Place, Neahkahnie, Nebraska, 5562","about":"Do nostrud velit non consequat do aute laboris consequat quis nisi sint voluptate quis. Ut nisi velit velit consequat. Duis enim aliqua quis est sit velit amet veniam reprehenderit cupidatat et sit.\r\n","registered":"2021-10-29T05:20:04 -02:00","latitude":59.08799,"longitude":-36.282546,"tags":["cupidatat","pariatur","exercitation","dolor","et","magna","sit"],"friends":[{"id":0,"name":"Adele Douglas"},{"id":1,"name":"Minnie Gillespie"},{"id":2,"name":"Casandra Alford"}],"greeting":"Hello, Ruiz Castillo! You have 2 unread messages.","favoriteFruit":"strawberry"} 3 | {"_id":"63f4e20bf847585032704223","index":2,"guid":"738306f5-7d1c-49c1-abf4-b1e2fbd94b98","isActive":false,"balance":"$2,446.57","picture":"http://placehold.it/32x32","age":37,"eyeColor":"green","name":"Landry Bryant","gender":"male","company":"WARETEL","email":"landrybryant@waretel.com","phone":"+1 (821) 406-2170","address":"935 Dearborn Court, Blandburg, Kansas, 3741","about":"Ullamco laboris ad do tempor ut et in qui consequat. Labore est occaecat anim consectetur. Sunt sit labore sit laborum ad ex. Voluptate cillum veniam Lorem incididunt nulla qui laboris cupidatat ut dolor mollit.\r\n","registered":"2016-10-14T03:54:30 -02:00","latitude":2.67078,"longitude":9.19132,"tags":["nulla","irure","exercitation","consectetur","in","officia","anim"],"friends":[{"id":0,"name":"Hewitt Smith"},{"id":1,"name":"Hilda Fields"},{"id":2,"name":"Zelma Walters"}],"greeting":"Hello, Landry Bryant! You have 8 unread messages.","favoriteFruit":"apple"} 4 | {"_id":"63f4e20b2866d26b25475cce","index":3,"guid":"4d748b35-9ca3-473d-9a32-c07d335163f8","isActive":false,"balance":"$3,220.93","picture":"http://placehold.it/32x32","age":20,"eyeColor":"green","name":"Brandi Mccullough","gender":"female","company":"PYRAMIS","email":"brandimccullough@pyramis.com","phone":"+1 (965) 537-3191","address":"314 Schweikerts Walk, Bannock, Ohio, 6598","about":"Est non excepteur aliqua labore in nostrud consequat irure anim excepteur occaecat ipsum. Irure elit et eiusmod excepteur laborum ipsum anim. Magna aliquip pariatur aliqua sit mollit in fugiat cupidatat sit dolore. Minim eiusmod officia mollit et commodo officia adipisicing cupidatat quis irure irure sit proident. Labore commodo adipisicing incididunt anim exercitation veniam. Duis veniam nulla fugiat officia enim reprehenderit eiusmod voluptate pariatur velit adipisicing.\r\n","registered":"2022-03-12T05:35:31 -01:00","latitude":-70.771196,"longitude":82.03046,"tags":["fugiat","eu","enim","dolore","veniam","dolor","consequat"],"friends":[{"id":0,"name":"Kim Beasley"},{"id":1,"name":"Gould Villarreal"},{"id":2,"name":"Therese Salas"}],"greeting":"Hello, Brandi Mccullough! You have 1 unread messages.","favoriteFruit":"strawberry"} 5 | {"_id":"63f4e20bffebf41e18fc1b88","index":4,"guid":"e2da8a8c-6ddb-48b1-a329-17a4d6f5f9ba","isActive":false,"balance":"$3,431.47","picture":"http://placehold.it/32x32","age":24,"eyeColor":"brown","name":"Gail Davenport","gender":"female","company":"PHUEL","email":"gaildavenport@phuel.com","phone":"+1 (999) 550-3089","address":"104 Conduit Boulevard, Buxton, Tennessee, 3220","about":"Do quis qui anim aliquip qui aute commodo fugiat exercitation et fugiat ea consequat non. Ullamco pariatur magna ex et exercitation nostrud magna. Lorem esse do do laboris non aliqua nostrud sint.\r\n","registered":"2019-05-31T09:03:18 -02:00","latitude":2.16119,"longitude":-122.728692,"tags":["do","eu","dolor","qui","cupidatat","sint","aliquip"],"friends":[{"id":0,"name":"Kerri Joyce"},{"id":1,"name":"Farmer Duncan"},{"id":2,"name":"Sears Coffey"}],"greeting":"Hello, Gail Davenport! You have 3 unread messages.","favoriteFruit":"apple"} 6 | {"_id":"63f4e20b9413b0ebd85cf187","index":5,"guid":"e38ee4f1-c78c-47cf-a7d7-02504689b856","isActive":false,"balance":"$3,883.08","picture":"http://placehold.it/32x32","age":30,"eyeColor":"green","name":"Ella Hawkins","gender":"female","company":"ZIGGLES","email":"ellahawkins@ziggles.com","phone":"+1 (805) 491-2254","address":"809 Columbus Place, Avoca, Georgia, 5345","about":"In anim ex nostrud elit. Reprehenderit voluptate id reprehenderit mollit tempor culpa et esse commodo voluptate fugiat. Deserunt dolor enim tempor voluptate irure.\r\n","registered":"2017-09-14T03:36:53 -02:00","latitude":11.685129,"longitude":102.823545,"tags":["nulla","ea","qui","nulla","sint","elit","ea"],"friends":[{"id":0,"name":"Lorna Dean"},{"id":1,"name":"Ada Haynes"},{"id":2,"name":"Mayer Harrington"}],"greeting":"Hello, Ella Hawkins! You have 6 unread messages.","favoriteFruit":"banana"} 7 | -------------------------------------------------------------------------------- /mara_db/bigquery.py: -------------------------------------------------------------------------------- 1 | """Easy access to BigQuery databases via google.cloud.bigquery""" 2 | 3 | import typing 4 | from warnings import warn 5 | 6 | import mara_db.dbs 7 | import sys 8 | import time 9 | from google.api_core.exceptions import BadRequest 10 | 11 | 12 | def bigquery_credentials(db: typing.Union[str, mara_db.dbs.BigQueryDB]) -> 'google.oauth2.service_account.Credentials': 13 | """Get the parsed service account""" 14 | from google.oauth2.service_account import Credentials 15 | 16 | if isinstance(db, str): 17 | db = mara_db.dbs.db(db) 18 | 19 | return Credentials.from_service_account_file(db.service_account_json_file_name) 20 | 21 | 22 | def bigquery_client(db: typing.Union[str, mara_db.dbs.BigQueryDB]) -> 'google.cloud.bigquery.client.Client': 23 | """Get an bigquery client for a bq database alias""" 24 | from google.cloud.bigquery.client import Client 25 | 26 | if isinstance(db, str): 27 | db = mara_db.dbs.db(db) 28 | 29 | credentials = bigquery_credentials(db) 30 | 31 | return Client(project=credentials.project_id, credentials=credentials, location=db.location) 32 | 33 | 34 | def bigquery_cursor_context(db: typing.Union[str, mara_db.dbs.BigQueryDB]) \ 35 | -> 'google.cloud.bigquery.dbapi.cursor.Cursor': 36 | """Creates a context with a bigquery cursor for a database alias""" 37 | warn('Function bigquery_cursor_context(db) is deprecated. Please use mara_db.dbs.cursor_context(db) instead.', 38 | category=DeprecationWarning) 39 | 40 | if isinstance(db, str): 41 | db = mara_db.dbs.db(db) 42 | 43 | assert (isinstance(db, mara_db.dbs.BigQueryDB)) 44 | 45 | return mara_db.dbs.cursor_context(db) 46 | 47 | 48 | def create_bigquery_table_from_postgresql_query( 49 | postgresql_query: str, postgresql_db_alias: str, 50 | bigquery_db_alias: str, bigquery_dataset_id: str, bigquery_table_name: str): 51 | """ 52 | Creates a table for bigquery from a Postgresql SELECT query. Will print the query 53 | 54 | Useful for copying PostgreSQL tables to BigQuery (create table first and then copy) 55 | 56 | Example: 57 | >>> create_bigquery_table_from_postgresql_query( 58 | >>> postgresql_db_alias='dwh', 59 | >>> postgresql_query='SELECT 1::SMALLINT AS a, now() as b', 60 | >>> bigquery_db_alias='reporting', 61 | >>> bigquery_dataset_id='foo', 62 | >>> bigquery_table_name='bar') 63 | CREATE OR REPLACE TABLE `foo`.`bar` ( 64 | `a` INT64, 65 | `b` TIMESTAMP 66 | ) 67 | 68 | Args: 69 | postgresql_query: The query to execute in PostgreSQL, must not end with a semicolon 70 | postgresql_db_alias: The postgresql database to execute the query in 71 | bigquery_db_alias: The mara db alias of the bigquery connection 72 | bigquery_dataset_id: The id of the bigquery dataset in which the table is to be created 73 | bigquery_table_name: The name of the to be created table 74 | """ 75 | from mara_db.postgresql import postgres_cursor_context 76 | with mara_db.postgresql.postgres_cursor_context(postgresql_db_alias) as cursor: 77 | cursor.execute('SELECT oid, typname FROM pg_type;') 78 | pg_types = {} 79 | for oid, type_name in cursor.fetchall(): 80 | pg_types[oid] = type_name 81 | 82 | # https://cloud.google.com/bigquery/docs/reference/standard-sql/federated_query_functions#postgressql_mapping 83 | pg_to_bigquery_type_mapping = { 84 | 'bool': 'BOOL', 85 | 'bytea': 'BYTES', 86 | 'date': 'DATE', 87 | 'int2': 'INT64', 88 | 'int4': 'INT64', 89 | 'int8': 'INT64', 90 | 'json': 'STRING', 91 | 'jsonb': 'STRING', 92 | 'numeric': 'NUMERIC', 93 | 'float4': 'FLOAT64', 94 | 'float8': 'FLOAT64', 95 | 'varchar': 'STRING', 96 | 'text': 'STRING', 97 | 'time': 'TIME', 98 | 'timestamp': 'DATETIME', 99 | 'timestamptz': 'TIMESTAMP', 100 | } 101 | 102 | cursor.execute(postgresql_query + ' LIMIT 0') 103 | 104 | column_specs = [] 105 | for column in cursor.description: 106 | pg_type = pg_types[column.type_code] 107 | assert pg_type in pg_to_bigquery_type_mapping, f"Unmapped type '{pg_type}'" 108 | column_specs.append(f'`{column.name}` {pg_to_bigquery_type_mapping[pg_type]}') 109 | 110 | query = f""" 111 | CREATE OR REPLACE TABLE `{bigquery_dataset_id}`.`{bigquery_table_name}` ( 112 | """ + ',\n '.join(column_specs) + "\n)" 113 | 114 | print(query) 115 | 116 | client = bigquery_client(bigquery_db_alias) 117 | client.query(query) 118 | 119 | 120 | def replace_dataset(db_alias: str, dataset_id: str, next_dataset_id: str): 121 | """ 122 | Replaces the a bigquery dataset with the contents of another one. 123 | 124 | Args: 125 | db_alias: the mara db alias of the bigquery connection 126 | dataset_id: the dataset that will be replaced 127 | nextdata_set_id: the contents of the new dataset 128 | """ 129 | print(f'replacing dataset `{dataset_id}` with contents of `{next_dataset_id}`') 130 | from mara_db.bigquery import bigquery_client 131 | 132 | client = bigquery_client(db_alias) 133 | 134 | # create target dataset if not exists 135 | client.create_dataset(dataset=dataset_id, exists_ok=True) 136 | 137 | # all tables in the next dataset 138 | next_tables = set([table.table_id for table in client.list_tables(next_dataset_id)]) 139 | 140 | ddl = '\n' 141 | 142 | # delete tables in target dataset that are not in next dataset 143 | for table in client.list_tables(dataset_id): 144 | if table.table_id not in next_tables: 145 | ddl += f'DROP TABLE `{dataset_id}`.`{table.table_id}`; \n' 146 | 147 | # hopefully atomic operation 148 | for table_id in next_tables: 149 | ddl += f'CREATE OR REPLACE TABLE `{dataset_id}`.`{table_id}` AS SELECT * FROM `{next_dataset_id}`.`{table_id}`;\n' 150 | ddl += f'DROP TABLE `{next_dataset_id}`.`{table_id}`;\n' 151 | 152 | print(ddl) 153 | client.query(ddl) 154 | 155 | print(f'deleting dataset {next_dataset_id}') 156 | retries = 1 157 | # for some reason the 'DROP TABLE ...' statements take some time, retry the data set deletion 158 | while True: 159 | try: 160 | client.delete_dataset(next_dataset_id) 161 | return 162 | except BadRequest as e: 163 | if retries <= 10: 164 | print(e, file=sys.stderr) 165 | seconds_to_sleep = retries * 4 166 | print(f'Waiting {seconds_to_sleep} seconds') 167 | time.sleep(seconds_to_sleep) 168 | retries += 1 169 | else: 170 | raise e 171 | -------------------------------------------------------------------------------- /mara_db/auto_migration.py: -------------------------------------------------------------------------------- 1 | """Auto-migration of sql alchemy models with alembic. Use with care""" 2 | 3 | import copy 4 | import io 5 | import sys 6 | import typing 7 | 8 | import sqlalchemy.engine 9 | import sqlalchemy.sql.schema 10 | from sqlalchemy import * # unfortunately needed to get the eval part further down working 11 | # noinspection PyUnresolvedReferences 12 | from sqlalchemy.dialects import * # unfortunately needed to get the eval part further down working 13 | 14 | import mara_db.dbs 15 | from .sqlalchemy_engine import engine 16 | 17 | 18 | def auto_migrate(engine: sqlalchemy.engine.Engine, models: typing.List[sqlalchemy.sql.schema.MetaData]): 19 | """ 20 | Compares a database with a list of defined orm models and applies the diff. Prints executed SQL statements to stdout. 21 | 22 | Based on `alembic automigrations`_, but doesn't require intermediate migration files. 23 | 24 | Use with care, does not work in many cases. 25 | 26 | Args: 27 | engine: the database to use 28 | models: A list of orm models 29 | 30 | Returns: 31 | True in case of no failures 32 | 33 | .. _alembic automigrations: 34 | http://alembic.zzzcomputing.com/en/latest/autogenerate.html 35 | """ 36 | import alembic.runtime.migration 37 | import alembic.autogenerate 38 | import sqlalchemy_utils 39 | 40 | try: 41 | # create database if it does not exist 42 | if not sqlalchemy_utils.database_exists(engine.url): 43 | sqlalchemy_utils.create_database(engine.url) 44 | print(f'Created database "{engine.url!r}"\n') 45 | except Exception as e: 46 | print(f'Could not access or create database "{engine.url!r}":\n{e}', file=sys.stderr) 47 | return False 48 | 49 | # merge all models into a single metadata object 50 | combined_meta_data = MetaData() 51 | for model in models: 52 | model.metadata.tables[model.__tablename__].tometadata(combined_meta_data) 53 | 54 | # create diff between models and current db and translate to ddl 55 | ddl = [] 56 | with engine.connect() as connection: 57 | output = io.StringIO() 58 | 59 | diff_context = alembic.runtime.migration.MigrationContext(connection.dialect, connection, opts={}) 60 | 61 | autogen_context = alembic.autogenerate.api.AutogenContext(diff_context, 62 | opts={'sqlalchemy_module_prefix': 'sqlalchemy.', 63 | 'alembic_module_prefix': 'executor.'}) 64 | 65 | execution_context = alembic.runtime.migration.MigrationContext(connection.dialect, connection, 66 | opts={'output_buffer': output, 'as_sql': True}) 67 | 68 | # needed for the eval below 69 | executor = alembic.operations.Operations(execution_context) 70 | 71 | # Step 1: create a diff between the meta data and the data base 72 | # operations is a list of MigrateOperation instances, e.g. a DropTableOp 73 | operations = alembic.autogenerate.produce_migrations(diff_context, combined_meta_data).upgrade_ops.ops 74 | 75 | for operation in operations: 76 | # Step 2: autogenerate a python statement from the operation, e.g. "executor.drop_table('bar')" 77 | renderer = alembic.autogenerate.renderers.dispatch(operation) 78 | statements = renderer(autogen_context, operation) 79 | if not isinstance(statements, list): 80 | statements = [statements] 81 | 82 | for statement in statements: 83 | # Step 3: "execute" python statement and get sql from buffer, e.g. "DROP TABLE bar;" 84 | try: 85 | eval(statement) 86 | except Exception as e: 87 | print('statement: ' + statement) 88 | raise (e) 89 | ddl.append(output.getvalue()) 90 | output.truncate(0) 91 | output.seek(0) 92 | 93 | with engine.begin() as connection: 94 | for statement in ddl: 95 | sys.stdout.write('\033[1;32m' + statement + '\033[0;0m') 96 | connection.execute(statement) 97 | 98 | return True 99 | 100 | 101 | def auto_discover_models_and_migrate() -> bool: 102 | """ 103 | Auto-migrates all sqlalchemy models that been marked for auto-migration database with the alias 'mara'. 104 | 105 | Models are marked for auto-migration by being put into a module-level `MARA_AUTOMIGRATE_SQLALCHEMY_MODELS` 106 | variable. E.g. 107 | 108 | MARA_AUTOMIGRATE_SQLALCHEMY_MODELS = [MyModel] 109 | 110 | For this, all modules that contain sqlalchemy models need to be loaded first 111 | 112 | Returns: 113 | True when no failure happened 114 | """ 115 | models = [] 116 | for name, module in copy.copy(sys.modules).items(): 117 | if 'MARA_AUTOMIGRATE_SQLALCHEMY_MODELS' in dir(module): 118 | module_models = getattr(module, 'MARA_AUTOMIGRATE_SQLALCHEMY_MODELS') 119 | if isinstance(module_models, typing.Callable): 120 | module_models = module_models() 121 | if isinstance(models, typing.Dict): 122 | module_models = module_models.values() 123 | assert (isinstance(module_models, typing.Iterable)) 124 | models += module_models 125 | return auto_migrate(engine('mara'), models) 126 | 127 | 128 | if __name__ == "__main__": 129 | # Example 130 | import sqlalchemy.ext.declarative 131 | import tempfile 132 | import pathlib 133 | 134 | with tempfile.TemporaryDirectory() as dir: 135 | db = mara_db.dbs.SQLiteDB(file_name=pathlib.Path(dir) / 'test.sqlite') 136 | 137 | 138 | # define a model / table 139 | class MyTable(sqlalchemy.ext.declarative.declarative_base()): 140 | __tablename__ = 'my_table' 141 | my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 142 | column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False, index=True) 143 | 144 | 145 | auto_migrate(engine=engine(db), models=[MyTable]) 146 | 147 | 148 | # -> 149 | # Created database "sqlite:////var/folders/gg/8117h7rj08zd9rpt55l315_1xx044y/T/tmpl_sdop4j/test.sqlite" 150 | # 151 | # CREATE TABLE my_table ( 152 | # my_table_id SERIAL NOT NULL, 153 | # column_1 TEXT NOT NULL, 154 | # PRIMARY KEY (my_table_id) 155 | # ); 156 | # 157 | # CREATE INDEX ix_my_table_column_1 ON my_table (column_1); 158 | 159 | # remove index and add another column 160 | class MyTable(sqlalchemy.ext.declarative.declarative_base()): 161 | __tablename__ = 'my_table' 162 | my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 163 | column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False) 164 | column_2 = sqlalchemy.Column(sqlalchemy.Integer) 165 | 166 | 167 | auto_migrate(engine=engine(db), models=[MyTable]) 168 | # -> 169 | # ALTER TABLE my_table ADD COLUMN column_2 INTEGER; 170 | # 171 | # DROP INDEX ix_my_table_text_column_1; 172 | -------------------------------------------------------------------------------- /tests/mssql/test_mssql.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import pytest 3 | import subprocess 4 | import typing as t 5 | 6 | from mara_db import dbs 7 | 8 | from ..command_helper import * 9 | from ..db_test_helper import db_is_responsive, db_replace_placeholders 10 | from ..local_config import MSSQL_DB, MSSQL_SQSH_DB, MSSQL_SQLCMD_DB 11 | 12 | 13 | if not MSSQL_DB: 14 | pytest.skip("skipping SQLServerDB tests: variable MSSQL_DB not set", allow_module_level=True) 15 | 16 | 17 | @pytest.fixture(scope="session") 18 | def mssql_db(docker_ip, docker_services) -> t.Tuple[str, int]: 19 | """Ensures that SQL Server server is running on docker.""" 20 | 21 | docker_port = docker_services.port_for("mssql", 1433) 22 | db = db_replace_placeholders(MSSQL_DB, docker_ip, docker_port) 23 | 24 | # here we need to wait until the SQL Server port is available. 25 | docker_services.wait_until_responsive( 26 | timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db) 27 | ) 28 | 29 | return db 30 | 31 | 32 | @pytest.mark.dependency() 33 | def test_mssql_shell_query_command(mssql_db): 34 | command = execute_sql_statement_command(mssql_db, "SELECT 1") 35 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 36 | print(pstdout) 37 | assert exitcode == 0 38 | 39 | 40 | @pytest.mark.dependency(depends=['test_mssql_shell_query_command']) 41 | def test_mssql_ddl(mssql_db): 42 | """Creates DDL scripts required for other tests""" 43 | # run 'test_mssql_ddl.sql' 44 | ddl_file_path = str((pathlib.Path(__file__).parent / 'test_mssql_ddl.sql').absolute()) 45 | command = execute_sql_file_command(mssql_db, ddl_file_path) 46 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 47 | print(pstdout) 48 | assert exitcode == 0 49 | 50 | 51 | def test_mssql_sqlalchemy(mssql_db): 52 | """ 53 | A simple test to check if the SQLAlchemy connection works 54 | """ 55 | from ..db_test_helper import _test_sqlalchemy 56 | _test_sqlalchemy(mssql_db) 57 | 58 | 59 | def test_mssql_connect(mssql_db): 60 | """ 61 | A simple test to check if the connect API works. 62 | """ 63 | from ..db_test_helper import _test_connect 64 | _test_connect(mssql_db) 65 | 66 | 67 | def test_mssql_cursor_context(mssql_db): 68 | """ 69 | A simple test to check if the cursor context of the db works. 70 | """ 71 | from ..db_test_helper import _test_cursor_context 72 | _test_cursor_context(mssql_db) 73 | 74 | 75 | 76 | """ 77 | ################################################################################################################################# 78 | # Tests specific to sqsh 79 | """ 80 | 81 | @pytest.fixture(scope="session") 82 | def mssql_sqsh_db(docker_ip, docker_services) -> t.Tuple[str, int]: 83 | """Ensures that SQL Server server is running on docker.""" 84 | 85 | docker_port = docker_services.port_for("mssql", 1433) 86 | db = db_replace_placeholders(MSSQL_SQSH_DB, docker_ip, docker_port) 87 | 88 | # here we need to wait until the SQL Server port is available. 89 | docker_services.wait_until_responsive( 90 | timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db) 91 | ) 92 | 93 | return db 94 | 95 | 96 | @pytest.mark.dependency() 97 | def test_mssql_sqsh_shell_query_command(mssql_sqsh_db): 98 | command = execute_sql_statement_command(mssql_sqsh_db, "SELECT 1") 99 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 100 | print(pstdout) 101 | assert exitcode == 0 102 | 103 | 104 | @pytest.mark.dependency() 105 | def test_mssql_sqsh_shell_copy_to_stout(mssql_sqsh_db): 106 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqsh_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'") 107 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 108 | print(pstdout) 109 | assert exitcode == 0 110 | assert pstdout == '''Col1,Col2 111 | 1,"FOO" 112 | 2,"BAR"''' 113 | 114 | 115 | 116 | """ 117 | ################################################################################################################################# 118 | # Tests specific to sqlcmd 119 | """ 120 | 121 | @pytest.fixture(scope="session") 122 | def mssql_sqlcmd_db(docker_ip, docker_services) -> t.Tuple[str, int]: 123 | """Ensures that SQL Server server is running on docker.""" 124 | 125 | docker_port = docker_services.port_for("mssql", 1433) 126 | db = db_replace_placeholders(MSSQL_SQLCMD_DB, docker_ip, docker_port) 127 | 128 | # here we need to wait until the SQL Server port is available. 129 | docker_services.wait_until_responsive( 130 | timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db) 131 | ) 132 | 133 | return db 134 | 135 | 136 | @pytest.mark.dependency() 137 | def test_mssql_sqlcmd_shell_query_command(mssql_sqlcmd_db): 138 | command = execute_sql_statement_command(mssql_sqlcmd_db, "SELECT 1") 139 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 140 | print(pstdout) 141 | assert exitcode == 0 142 | 143 | 144 | @pytest.mark.dependency() 145 | def test_mssql_sqlcmd_shell_copy_to_stout(mssql_sqlcmd_db): 146 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'") 147 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 148 | print(pstdout) 149 | assert exitcode == 0 150 | assert pstdout == '''1,FOO 151 | 2,BAR''' 152 | 153 | 154 | @pytest.mark.dependency(depends=["test_mssql_sqlcmd_shell_query_command", "test_mssql_sqlcmd_shell_copy_to_stout", "test_mssql_ddl"]) 155 | @pytest.mark.parametrize( 156 | "seed_file", 157 | [ 158 | "names_lf_lastrow.csv", 159 | "names_crlf_lastrow.csv", 160 | # BCP only supports unquited, last row ending files 161 | ] 162 | ) 163 | def test_mssql_sqlcmd_shell_copy_from_stdin_csv_noheader(mssql_sqlcmd_db, seed_file): 164 | # delete rows from table, make sure that the last matrix test does not mess up this test 165 | command = execute_sql_statement_command(mssql_sqlcmd_db, "DELETE FROM names") 166 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 167 | assert exitcode == 0 168 | 169 | # reading csv file... 170 | names_csv_file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute()) 171 | command = f'cat {names_csv_file_path} \\\n' 172 | command += ' | ' + shell.copy_from_stdin_command(mssql_sqlcmd_db,target_table='names',csv_format=True,skip_header=False,delimiter_char=',') 173 | print(command) 174 | 175 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 176 | print(pstdout) 177 | assert exitcode == 0 178 | 179 | # check if writing was successful 180 | 181 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT COUNT(*) FROM names") 182 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 183 | assert exitcode == 0 184 | assert pstdout == "10" 185 | 186 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT name FROM names WHERE id = 1") 187 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 188 | assert exitcode == 0 189 | assert pstdout == "Elinor Meklit" 190 | 191 | 192 | @pytest.mark.dependency(depends=["test_mssql_sqlcmd_shell_query_command", "test_mssql_sqlcmd_shell_copy_to_stout", "test_mssql_ddl"]) 193 | @pytest.mark.parametrize( 194 | "seed_file", 195 | [ 196 | "names_lf_lastrow_header.csv", 197 | "names_crlf_lastrow_header.csv", 198 | # BCP only supports unquited, last row ending files 199 | ] 200 | ) 201 | def test_mssql_sqlcmd_shell_copy_from_stdin_csv_skipheader(mssql_sqlcmd_db, seed_file): 202 | # delete rows from table, make sure that the last matrix test does not mess up this test 203 | command = execute_sql_statement_command(mssql_sqlcmd_db, "DELETE FROM names_with_header") 204 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 205 | assert exitcode == 0 206 | 207 | # reading csv file... 208 | names_csv_file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute()) 209 | command = f'cat {names_csv_file_path} \\\n' 210 | command += ' | ' + shell.copy_from_stdin_command(mssql_sqlcmd_db,target_table='names_with_header',csv_format=True,skip_header=True,delimiter_char=',') 211 | print(command) 212 | 213 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 214 | print(pstdout) 215 | assert exitcode == 0 216 | 217 | # check if writing was successful 218 | 219 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT COUNT(*) FROM names_with_header") 220 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 221 | assert exitcode == 0 222 | assert pstdout == "10" 223 | 224 | command = execute_sql_statement_to_stdout_csv_command(mssql_sqlcmd_db, "SELECT name FROM names_with_header WHERE id = 1") 225 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 226 | assert exitcode == 0 227 | assert pstdout == "Elinor Meklit" 228 | -------------------------------------------------------------------------------- /tests/postgres/test_postgres.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import typing as t 3 | import subprocess 4 | import pathlib 5 | 6 | from mara_db import shell, formats 7 | 8 | from ..command_helper import * 9 | from ..db_test_helper import db_is_responsive, db_replace_placeholders 10 | from ..local_config import POSTGRES_DB 11 | 12 | 13 | if not POSTGRES_DB: 14 | pytest.skip("skipping PostgreSQL tests: variable POSTGRES_DB not set", allow_module_level=True) 15 | 16 | 17 | @pytest.fixture(scope="session") 18 | def postgres_db(docker_ip, docker_services) -> t.Tuple[str, int]: 19 | """Ensures that PostgreSQL server is running on docker.""" 20 | 21 | docker_port = docker_services.port_for("postgres", 5432) 22 | db = db_replace_placeholders(POSTGRES_DB, docker_ip, docker_port) 23 | 24 | # here we need to wait until the PostgreSQL port is available. 25 | docker_services.wait_until_responsive( 26 | timeout=30.0, pause=0.1, check=lambda: db_is_responsive(db) 27 | ) 28 | 29 | return db 30 | 31 | 32 | @pytest.mark.dependency() 33 | def test_postgres_shell_query_command(postgres_db): 34 | command = execute_sql_statement_command(postgres_db, "SELECT 1") 35 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 36 | print(pstdout) 37 | assert exitcode == 0 38 | 39 | 40 | @pytest.mark.dependency() 41 | def test_postgres_shell_copy_to_stout(postgres_db): 42 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT 1 AS Col1, 'FOO' AS Col2 UNION ALL SELECT 2, 'BAR'") 43 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 44 | print(pstdout) 45 | assert exitcode == 0 46 | assert pstdout == '''1,FOO 47 | 2,BAR''' 48 | 49 | 50 | @pytest.mark.dependency() 51 | def test_postgres_ddl(postgres_db): 52 | """Creates DDL scripts required for other tests""" 53 | # run 'test_postgres_ddl.sql' 54 | ddl_file_path = str((pathlib.Path(__file__).parent / 'test_postgres_ddl.sql').absolute()) 55 | command = execute_sql_file_command(postgres_db, ddl_file_path) 56 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 57 | print(pstdout) 58 | assert exitcode == 0 59 | 60 | 61 | @pytest.mark.dependency(depends=["test_postgres_shell_query_command", "test_postgres_shell_copy_to_stout", "test_postgres_ddl"]) 62 | @pytest.mark.parametrize( 63 | "seed_file", 64 | [ 65 | "names_crlf_lastrow.csv", 66 | "names_crlf_quoted_lastrow.csv", 67 | "names_crlf_quoted.csv", 68 | "names_crlf.csv", 69 | "names_lf_lastrow.csv", 70 | "names_lf_quoted_lastrow.csv", 71 | "names_lf_quoted.csv", 72 | "names_lf.csv", 73 | ] 74 | ) 75 | def test_postgres_shell_copy_from_stdin_csv_noheader(postgres_db, seed_file): 76 | # delete rows from table, make sure that the last matrix test does not mess up this test 77 | command = execute_sql_statement_command(postgres_db, "DELETE FROM names") 78 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 79 | assert exitcode == 0 80 | 81 | # reading csv file... 82 | file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute()) 83 | command = f'cat {file_path} \\\n' 84 | command += ' | ' + shell.copy_from_stdin_command(postgres_db,target_table='names', 85 | pipe_format=formats.CsvFormat(header=False, delimiter_char=',')) 86 | print(command) 87 | 88 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 89 | print(pstdout) 90 | assert exitcode == 0 91 | 92 | # check if writing was successful 93 | 94 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM names") 95 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 96 | assert exitcode == 0 97 | assert pstdout == "10" 98 | 99 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT name FROM names WHERE id = 1") 100 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 101 | assert exitcode == 0 102 | assert pstdout == "Elinor Meklit" 103 | 104 | 105 | @pytest.mark.dependency(depends=["test_postgres_shell_query_command", "test_postgres_shell_copy_to_stout", "test_postgres_ddl"]) 106 | @pytest.mark.parametrize( 107 | "seed_file", 108 | [ 109 | "names_crlf_lastrow_header.csv", 110 | "names_crlf_quoted_lastrow_header.csv", 111 | "names_crlf_quoted_header.csv", 112 | "names_crlf_header.csv", 113 | "names_lf_lastrow_header.csv", 114 | "names_lf_quoted_lastrow_header.csv", 115 | "names_lf_quoted_header.csv", 116 | "names_lf_header.csv", 117 | ] 118 | ) 119 | def test_postgres_shell_copy_from_stdin_csv_skipheader(postgres_db, seed_file): 120 | # delete rows from table, make sure that the last matrix test does not mess up this test 121 | command = execute_sql_statement_command(postgres_db, "DELETE FROM names_with_header") 122 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 123 | assert exitcode == 0 124 | 125 | # reading csv file... 126 | file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute()) 127 | command = f'cat {file_path} \\\n' 128 | command += ' | ' + shell.copy_from_stdin_command(postgres_db, 129 | target_table='names_with_header', 130 | pipe_format=formats.CsvFormat(header=True, delimiter_char=',')) 131 | print(command) 132 | 133 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 134 | print(pstdout) 135 | assert exitcode == 0 136 | 137 | # check if writing was successful 138 | 139 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM names_with_header") 140 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 141 | assert exitcode == 0 142 | assert pstdout == "10" 143 | 144 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT name FROM names_with_header WHERE id = 1") 145 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 146 | assert exitcode == 0 147 | assert pstdout == "Elinor Meklit" 148 | 149 | 150 | @pytest.mark.dependency(depends=["test_postgres_shell_query_command", "test_postgres_shell_copy_to_stout", "test_postgres_ddl"]) 151 | @pytest.mark.parametrize( 152 | "seed_file", 153 | [ 154 | "accounts_crlf_lastrow.jsonl", 155 | "accounts_crlf.jsonl", 156 | "accounts_lf_lastrow.jsonl", 157 | "accounts_lf.jsonl", 158 | ] 159 | ) 160 | def test_postgres_shell_copy_from_stdin_jsonl(postgres_db, seed_file): 161 | # delete rows from table, make sure that the last matrix test does not mess up this test 162 | command = execute_sql_statement_command(postgres_db, "DELETE FROM accounts_json") 163 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 164 | assert exitcode == 0 165 | 166 | # reading csv file... 167 | file_path = str((pathlib.Path(__file__).parent / f'../seed/{seed_file}').absolute()) 168 | command = f'cat {file_path} \\\n' 169 | command += ' | ' + shell.copy_from_stdin_command(postgres_db, 170 | target_table='accounts_json', 171 | pipe_format=formats.JsonlFormat()) 172 | print(command) 173 | 174 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 175 | print(pstdout) 176 | assert exitcode == 0 177 | 178 | # check if writing was successful 179 | 180 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM accounts_json") 181 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 182 | assert exitcode == 0 183 | assert pstdout == "6" 184 | 185 | command = execute_sql_statement_to_stdout_csv_command(postgres_db, "SELECT COUNT(*) FROM accounts_json WHERE data IS NOT NULL") 186 | (exitcode, pstdout) = subprocess.getstatusoutput(command) 187 | assert exitcode == 0 188 | assert pstdout == "6" 189 | 190 | 191 | def test_postgres_sqlalchemy(postgres_db): 192 | """ 193 | A simple test to check if the SQLAlchemy connection works 194 | """ 195 | from ..db_test_helper import _test_sqlalchemy 196 | _test_sqlalchemy(postgres_db) 197 | 198 | 199 | def test_postgres_connect(postgres_db): 200 | """ 201 | A simple test to check if the connect API works. 202 | """ 203 | from ..db_test_helper import _test_connect 204 | _test_connect(postgres_db) 205 | 206 | 207 | def test_postgres_cursor_context(postgres_db): 208 | """ 209 | A simple test to check if the cursor context of the db works. 210 | """ 211 | from ..db_test_helper import _test_cursor_context 212 | _test_cursor_context(postgres_db) 213 | 214 | 215 | def test_postgres_cursor_context_legacy(postgres_db): 216 | """ 217 | Legacy call `postgres_cursor_context` test. 218 | 219 | Test shall be dropped in version 5.0 220 | """ 221 | from mara_db.postgresql import postgres_cursor_context 222 | 223 | with postgres_cursor_context(postgres_db) as cursor: 224 | cursor.execute('SELECT 1') 225 | row = cursor.fetchone() 226 | assert row[0] == 1 227 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 4.11.0 (2023-12-06) 4 | 5 | - add entry point `mara.commands` (for [mara-cli](https://github.com/mara/mara-cli) support) 6 | 7 | ## 4.10.0 (2023-11-21) 8 | 9 | - add cli group 'mara-db'. Mark old cli commands as deprecated (#74) 10 | 11 | ## 4.9.2 (2023-02-21) 12 | 13 | ### Bugfixes :bug: 14 | 15 | - fix typo in depreciation warn messages and add warn category 16 | - fix JSONL data import into PostgreSQL db (#73) 17 | 18 | ## 4.9.1 (2023-02-08) 19 | 20 | - hotfix :bug: issue with postgres cursor context (#72) 21 | 22 | ## 4.9.0 (2023-02-07) 23 | 24 | ### Breaking Changes 25 | 26 | - the implementation of the formats option (#56) required to do changes to the functions in `mara_db.shell`. In case you defined custom implementations, you will have to adjust them. 27 | 28 | ### Feat :tada: 29 | 30 | - adding a functional API to get the DB-API 2.0 of a database (#71) 31 | - add option to specify multiple formats like Parquet, Avro and ORC (#56, #64) 32 | 33 | ### Changes :rocket: 34 | - refactor the internally used cursor context logic to a more generic one (#68, #71) 35 | - a default database `mara` is not anymore defined (#67). 36 | - add deprecation warning when using parameter `timestamp` in query_command. This parameter will be removed in version 5.0. See #44. 37 | 38 | ### Bugfixes :bug: 39 | 40 | - fix sqlalchemy_url for SQLServerDB 41 | - fix UI error when requesting schema view for Azure Synapse databases (#48) 42 | 43 | ## 4.8.0 (2022-09-01) 44 | 45 | ### Breaking Changes 46 | 47 | - when using BigQuery you need to rename the BigQueryDB db config parameter `service_account_private_key_file` to `service_account_json_file_name` (#45) 48 | - when using BigQuery with copy/read operations you need to specify parameter `gcloud_gcs_bucket_name` in the BigQueryDB db config (#45) 49 | 50 | ### Feat :tada: 51 | 52 | - Add Databricks db support (#62) 53 | - Add Snowflake db support (#52/#61) 54 | - Add sqlalchemy support for BigQuery (#45/#50) 55 | - Add sqlalchemy support for SQL Server (#46) 56 | - Add option to use either MSSQL Tools (sqlcmd/bcp) or sqsh to connect to SQL Server (#57) 57 | - Add documentation readthedocs.io documentation (#59) 58 | 59 | ### Changes :rocket: 60 | - Extend BigQuery functionality (#45) 61 | - add extras per database engine (#50/#53). Postgres and Readshift is still included by default.
**Note:** It is recommended to always specifcy the extras for the database you use. 62 | - use client-side rendering for graphviz fallback (#51) 63 | 64 | ### Bugfixes :bug: 65 | - add all package files in wheel 66 | - a proper error is now thrown when the user tries to look at the schema of a BigQuery database 67 | 68 | ## 4.7.0 - 4.7.1 (2020-10-23) 69 | 70 | - Add BigQuery support 71 | - Fix copy from PostgreSQLDB to BigQueryDB when delimiter_char is not set (#36) 72 | - Add config default_echo_queries (#38) 73 | - Add support for SQLServerDB port (#37) 74 | - Fix exit command sequence when previous command has exit code not zero (#40) 75 | 76 | ## 4.6.0 - 4.6.1 (2020-07-03) 77 | 78 | - Escape double quotes in copy_from_sdtin_command for PostgreSQL (#33) 79 | - Add overview page to visualization 80 | 81 | **required changes** 82 | 83 | If you use quotes in tables names in `Copy` commands, check whether they still work. 84 | 85 | 86 | ## 4.5.0 - 4.5.1 (2020-04-27) 87 | 88 | - Don't escape dollar sign in queries for SqlServer 89 | - Support echo sql queries for SqlServer 90 | - Bugfix copy_to_stdout_command for SqlServerDB 91 | 92 | **required changes** 93 | 94 | If use SQL Server and have queries that contain the `$` sign, then please escape that one manually. 95 | 96 | 97 | ## 4.4.1 - 4.4.3 (2020-02-13) 98 | 99 | - Show warning when graphviz is not installed 100 | - Set fetch-count 10000 for the `copy_to_stdout_command` for PostgreSQLDB to handle out of memory error. 101 | - Add schema visualization support for SQL Server 102 | - Set mssql severity level to 10 (#25) 103 | 104 | 105 | 106 | ## 4.4.0 (2019-11-28) 107 | 108 | - Implement `copy-from-sdtin` command for Redshift (via tmp file on configuratble s3 bucket) 109 | - Refactor database schema visualization so that multiple databases can be implemented 110 | - Implement database schema visualization for MySQL 111 | - Add function mysql.mysql_cursor_context for connecting to MySQL databases via https://github.com/PyMySQL/mysqlclient-python 112 | - Allow to pass a dbs.PostgreSQLDB instance to postgresql.postgres_cursor_context 113 | 114 | 115 | ## 4.3.0 - 4.3.1 (2019-07-04) 116 | 117 | - Add travis integration and PyPi upload 118 | 119 | 120 | ## 4.2.0 121 | 122 | - Add new parameters delimiter_char and csv_format to all copy command functions (allows for better quoting JSONS, arrays, strings with tabs) 123 | - Add warnings for unused parameters 124 | - Make code a bit more pep-8 compliant 125 | 126 | **required-changes** 127 | 128 | - Adapt own implementations of `copy_to_stdout_command`, `copy_from_stdin_command` & `copy_command` (add the two new parameters). 129 | - Test whether everything still works (has been working reliably in three big projects for 4 weeks now) 130 | 131 | 132 | ## 4.1.0 133 | 134 | - Revert commit [422c332](https://github.com/mara/mara-db/commit/422c332b09b4e28e19289f0baa27f5102ade9a03) (Fix pg to pg copy command for json data). It was causing too much trouble. 135 | 136 | 137 | ## 4.0.0 - 4.0.1 (2019-04-12) 138 | 139 | - Allow MARA_AUTOMIGRATE_SQLALCHEMY_MODELS to be a function (in order to improve import speed) 140 | - Change MARA_XXX variables to functions to delay importing of imports 141 | - Fix pg to pg copy command for json data 142 | - Move some imports into the functions that use them in order to improve loading speed 143 | - Remove dependency_links from setup.py to regain compatibility with recent pip versions 144 | 145 | **required changes** 146 | 147 | - Update `mara-app` to `>=2.0.0` 148 | 149 | 150 | ## 3.2.0 - 3.2.3 (2019-04-11) 151 | 152 | - Add oracle db access 153 | - Add SSL standard parameters to PostgreSQL connection string 154 | - Add missing footer parameter to Oracle copy to stdout command 155 | - Change arguments for sqsh client to return non zero exitcode in error case 156 | - Add single quotes around PostgreSQL passwords to prevent bash errors when the password contains certain characters 157 | 158 | ## 3.1.0 - 3.1.2 (2018-08-30) 159 | 160 | - Make graphviz engine in schema visualization selectable 161 | - Implement Redshift DB 162 | - Show enums in schema drawing for constrained tables 163 | - Extend copy_to_stdout_command with "footer" argument for PostgreSQL DB 164 | 165 | 166 | ## 3.0.0 - 3.0.2 (2018-04-27) 167 | 168 | - Move sqlalchemy auto-migration from mara-app to mara-db 169 | - Remove `config.mara_db_alias` function 170 | - Move function `sqlalchemy.postgres_cursor_context` to module `postgresql` 171 | - Remove `sqlalchemy/session_context` context handler 172 | - Import graphviz only when needed 173 | - Update / improve documentation 174 | - Add port to sqlalchemy postgres connection string 175 | - Extend copy_to_stdout_command with "header" argument 176 | 177 | **required changes** 178 | 179 | - Replace all occurrences of `mara_db.config.mara_db_alias()` with `'mara'` 180 | - Replace `mara_db.sqlalchemy.postgres_cursor_context` with `mara_db.postgresql.postgres_cursor_context` 181 | - Change all usages of `mara_db.sqlalchemy.session_context` to psycopg2 using `mara_db.postgresql.postgres_cursor_context` 182 | 183 | 184 | ## 2.3.0 - 2.3.1 (2018-04-03) 185 | 186 | - Switch dependency links in setup.py from ssh to https 187 | - Add psycopg2 as dependency 188 | 189 | 190 | 191 | ## 2.2.0 (2018-02-28) 192 | 193 | - add web ui for visualizing database schemas (postgres only currently) 194 | - improve acl 195 | - Fix bug in schema drawing 196 | - Quote strings when copying from sqlite 197 | - NULL value handling when copying from sqlite 198 | 199 | 200 | 201 | ## 2.1.0 - 2.1.3 (2018-01-19) 202 | 203 | - add SQLite support 204 | - don't use sqlalchemy session in postgres_cursor_context because it creates to many setup queries on each instantiation 205 | - always append ';\n\go' to queries against SQL Server 206 | - remove default-character-set=utf8mb4 from My SQL queries 207 | 208 | 209 | ## 2.0.0 - 2.0.1 (2017-12-20) 210 | 211 | - change database configuration from sqalchemy urls to custom database specific classes 212 | - create sqlalchemy session contexts from configuration objects 213 | - add functions for creating shell commands for accessing databases 214 | - add documentation 215 | - bug fixes 216 | - various smaller improvements in mara_db/shell.py 217 | 218 | **required changes** 219 | 220 | This version is pretty much incompatible with previous versions. See README.md for new usage patterns. 221 | 222 | 223 | ## 1.1.0 (2017-12-04) 224 | 225 | - Replace config function databases with database_urls 226 | - Add functions for client command creation 227 | 228 | **required changes** 229 | 230 | - Change database configurations from 231 | 232 | ```python 233 | from sqlalchemy import engine 234 | 235 | def databases() -> {str: engine.Engine}: 236 | """The list of database connections to use, by alias""" 237 | return {'mara': engine.create_engine('postgresql+psycopg2://root@localhost/mara')} 238 | 239 | ``` 240 | 241 | to 242 | 243 | ```python 244 | import sqlalchemy.engine.url 245 | 246 | def database_urls() -> {str: sqlalchemy.engine.url}: 247 | """The list of database connections to use, by alias""" 248 | return {'mara': sqlalchemy.engine.url.make_url('postgresql+psycopg2://root@localhost/mara')} 249 | ``` 250 | 251 | ## 1.0.0 - 1.0.1 (2017-03-08) 252 | 253 | - Initial version 254 | - Minor bug fixes and code style issues 255 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mara DB 2 | 3 | [![Build Status](https://github.com/mara/mara-db/actions/workflows/build.yml/badge.svg)](https://github.com/mara/mara-db/actions/workflows/build.yml) 4 | [![PyPI - License](https://img.shields.io/pypi/l/mara-db.svg)](https://github.com/mara/mara-db/blob/main/LICENSE) 5 | [![PyPI version](https://badge.fury.io/py/mara-db.svg)](https://badge.fury.io/py/mara-db) 6 | [![Slack Status](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://communityinviter.com/apps/mara-users/public-invite) 7 | 8 | Mini package for configuring and accessing multiple databases in a single project. Decouples the use of databases and their configuration by using "aliases" for databases. 9 | 10 | The file [mara_db/dbs.py](https://github.com/mara/mara-db/blob/main/mara_db/dbs.py) contains abstract database configurations for PostgreSQL, Mysql, SQL Server, Oracle, SQLite and Big Query. The database connections of a project are configured by overwriting the `databases` function in [mara_db/config.py](https://github.com/mara/mara-db/blob/main/mara_db/config.py): 11 | 12 | ```python 13 | import mara_db.config 14 | import mara_db.dbs 15 | 16 | ## configure database connections for different aliases 17 | mara_db.config.databases = lambda: { 18 | 'mara': mara_db.dbs.PostgreSQLDB(host='localhost', user='root', database='mara'), 19 | 'dwh': mara_db.dbs.PostgreSQLDB(database='dwh'), 20 | 'source-1': mara_db.dbs.MysqlDB(host='some-localhost', database='my_app', user='dwh'), 21 | 'source-2': mara_db.dbs.SQLServerDB(user='dwh_read', password='123abc', database='db1', host='some-sql-server') 22 | } 23 | 24 | ## access individual database configurations with `dbs.db`: 25 | print(mara_db.dbs.db('mara')) 26 | # -> 27 | ``` 28 | 29 |   30 | 31 | 32 | ## Visualization of (PostgreSQL, MySQL, SQL Server) database schemas 33 | 34 | [mara_db/views.py](https://github.com/mara/mara-db/blob/main/mara_db/views.py) contains a schema visualization for all configured databases using graphviz (currently PostgreSQL, Mysql and SQL Server only). It basically show tables of selected schemas together with the foreign key relations between them. 35 | 36 | 37 | ![Schema visualization](https://github.com/mara/mara-db/blob/main/docs/_static/schema-visualization.png) 38 | 39 | For finding missing foreign key constraints, columns that follow a specific naming pattern (configurable via `config.schema_ui_foreign_key_column_regex`, default `*_fk`) and that are not part of foreign key constraints are drawn in pink. 40 | 41 |   42 | 43 | 44 | ## Fast batch processing: Accessing databases with shell commands 45 | 46 | The file [mara_db/shell.py](https://github.com/mara/mara-db/blob/main/mara_db/shell.py) contains functions that create commands for accessing databases via their command line clients. 47 | 48 | For example, the `query_command` function creates a shell command that can receive an SQL query from stdin and execute it: 49 | 50 | ```python 51 | import mara_db.shell 52 | 53 | print(mara_db.shell.query_command('source-1')) 54 | # -> mysql --default-character-set=utf8mb4 --user=dwh --host=some-localhost my_app 55 | 56 | print(mara_db.shell.query_command('dwh', timezone='Europe/Lisbon', echo_queries=False)) 57 | # -> PGTZ=Europe/Lisbon PGOPTIONS=--client-min-messages=warning psql --no-psqlrc --set ON_ERROR_STOP=on dwh 58 | ``` 59 | 60 | The function `copy_to_stdout_command` creates a shell command that receives a query on stdin and writes the result to stdout in tabular form: 61 | 62 | ```python 63 | print(mara_db.shell.copy_to_stdout_command('source-1')) 64 | # -> mysql --default-character-set=utf8mb4 --user=dwh --host=some-localhost my_app --skip-column-names 65 | ``` 66 | 67 | Similarly, `copy_from_stdin_command` creates a client command that receives tabular data from stdin and and writes it to a target table: 68 | 69 | ```python 70 | print(mara_db.shell.copy_from_stdin_command('dwh', target_table='some_table', delimiter_char=';')) 71 | # -> PGTZ=Europe/Berlin PGOPTIONS=--client-min-messages=warning psql --echo-all --no-psqlrc --set ON_ERROR_STOP=on dwh \ 72 | # --command="COPY some_table FROM STDIN WITH DELIMITER AS ';'" 73 | ``` 74 | 75 | Finally, `copy_command` creates a shell command that receives a sql query from stdin, executes the query in `source_db` and then writes the result of to `target_table` in `target_db`: 76 | 77 | ```python 78 | print(mara_db.shell.copy_command('source-2', 'dwh', target_table='some_table')) 79 | # -> sed 's/\\\\$/\$/g;s/\$/\\\\$/g' \ 80 | # | sqsh -U dwh_read -P 123abc -S some-sql-server -D db1 -m csv \ 81 | # | PGTZ=Europe/Berlin PGOPTIONS=--client-min-messages=warning psql --echo-all --no-psqlrc --set ON_ERROR_STOP=on dwh \ 82 | # --command = "COPY some_table FROM STDIN WITH CSV HEADER" 83 | ``` 84 | 85 |   86 | 87 | 88 | The following **command line clients** are used to access the various databases: 89 | 90 | | Database | Client binary | Comments | 91 | | --- | --- | --- | 92 | | Postgresql / Redshift | `psql` | Included in standard distributions. | 93 | | MariaDB / Mysql | `mysql` | Included in standard distributions. | 94 | | SQL Server | `sqsh`
- or -
`sqlcmd` | **sqsh**: From [https://sourceforge.net/projects/sqsh/](https://sourceforge.net/projects/sqsh/), usually messy to get working. On ubuntu, use [http://ppa.launchpad.net/jasc/sqsh/ubuntu/](http://ppa.launchpad.net/jasc/sqsh/ubuntu/) backport. On Mac, try the homebrew version or install from source.
**sqlcmd**: Official Microsoft Utility for SQL Server. See [sqlcmd Utility](https://docs.microsoft.com/en-us/sql/tools/sqlcmd-utility) | 95 | | Oracle | `sqlplus64` | See the [Oracle Instant Client](https://www.oracle.com/technetwork/database/database-technologies/instant-client/overview/index.html) homepage for details. On Mac, follow [these instructions](https://vanwollingen.nl/install-oracle-instant-client-and-sqlplus-using-homebrew-a233ce224bf). Then ` sudo ln -s /usr/local/bin/sqlplus /usr/local/bin/sqlplus64` to make the binary accessible as `sqlplus64`. | 96 | | SQLite | `sqlite3` | Available in standard distributions. Version >3.20.x required (not the case on Ubuntu 14.04). | 97 | | Big Query | `bq` | See the [Google Cloud SDK](https://cloud.google.com/sdk/docs/quickstarts) page for details. | 98 | | Snowflake | `snowsql` | See [SnowSQL (CLI Client)](https://docs.snowflake.com/en/user-guide/snowsql.html) | 99 | | Databricks | `dbsqlcli` | Included when using package extra `databricks` via package [databricks-sql-cli](https://pypi.org/project/databricks-sql-cli/). See [Databricks SQL CLI](https://docs.databricks.com/dev-tools/databricks-sql-cli.html#) | 100 | 101 |   102 | 103 | 104 | ## Make it so! Auto-migration of SQLAlchemy models 105 | 106 | [Alembic has a feature](http://alembic.zzzcomputing.com/en/latest/autogenerate.html) that can create a diff between the state of a database and the ORM models of an application. This feature is used in [mara_db/auto_migrate.py](https://github.com/mara/mara-db/blob/main/mara_db/auto_migrate.py) to automatically perform all necessary database transformations, without intermediate migration files: 107 | 108 | ```python 109 | # define a model / table 110 | class MyTable(sqlalchemy.ext.declarative.declarative_base()): 111 | __tablename__ = 'my_table' 112 | my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 113 | column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False, index=True) 114 | 115 | 116 | db = mara_db.dbs.SQLiteDB(file_name='/tmp/test.sqlite') 117 | 118 | # create database and table 119 | mara_db.auto_migration.auto_migrate(engine=mara_db.auto_migration.engine(db), models=[MyTable]) 120 | # -> 121 | # Created database "sqlite:////tmp/test.sqlite" 122 | # 123 | # CREATE TABLE my_table ( 124 | # my_table_id SERIAL NOT NULL, 125 | # column_1 TEXT NOT NULL, 126 | # PRIMARY KEY (my_table_id) 127 | # ); 128 | # 129 | # CREATE INDEX ix_my_table_column_1 ON my_table (column_1); 130 | ``` 131 | 132 | When the model is changed later, then `auto_migrate` creates a diff against the existing database and applies it: 133 | 134 | ```python 135 | # remove index and add another column 136 | class MyTable(sqlalchemy.ext.declarative.declarative_base()): 137 | __tablename__ = 'my_table' 138 | my_table_id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True) 139 | column_1 = sqlalchemy.Column(sqlalchemy.TEXT, nullable=False) 140 | column_2 = sqlalchemy.Column(sqlalchemy.Integer) 141 | 142 | auto_migrate(engine=engine(db), models=[MyTable]) 143 | # -> 144 | # ALTER TABLE my_table ADD COLUMN column_2 INTEGER; 145 | # 146 | # DROP INDEX ix_my_table_text_column_1; 147 | ``` 148 | 149 | **Use with care**! The are lot of changes [that alembic auto-generate can not detect](http://alembic.zzzcomputing.com/en/latest/autogenerate.html#what-does-autogenerate-detect-and-what-does-it-not-detect). We recommend testing each aut-migration on a staging system first before deploying to production. Sometimes manual migration scripts will be necessary. 150 | 151 | 152 | 153 | ## Installation 154 | 155 | ```bash 156 | pip install mara-db 157 | ``` 158 | 159 | or 160 | 161 | ```bash 162 | pip install git+https://github.com/mara/mara-db.git 163 | ``` 164 | 165 | ### Optional: Installation of requirements for SQL Server 166 | 167 | For usage with SQL Server, the python module pyodbc and a odbc driver (e.g. Microsoft ODBC Driver 17 for SQL Server) is required which is not included in the general requirement. 168 | 169 | To see how to install pyodbc, take a look into [this install guide](https://github.com/mkleehammer/pyodbc/wiki/Install). 170 | To see how to install ODBC 17, take a look into [Installing the Microsoft ODBC Driver for SQL Server on Linux and macOS](https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-ver15). 171 | 172 | On Linux, you most likely will have to deal with an SSL issue, see [this issue](https://github.com/microsoft/msphpsql/issues/1023). A quick, dirty option in a test/development environment could be to [disable the requirement for TLS 1.2](https://github.com/microsoft/msphpsql/issues/1023#issuecomment-523214695). 173 | 174 | ### Optional: Installation of requirements for BigQuery 175 | 176 | For usage with BigQuery, the official `bq` and `gcloud` clients are required. 177 | See the [Google Cloud SDK](https://cloud.google.com/sdk/docs/quickstarts) page for installation details. 178 | 179 | Enabling the BigQuery API and Service account JSON credentials are also required as listed 180 | in the official documentation [here](https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries#before-you-begin). 181 | 182 | One time authentication of the service-account used: 183 | ```cmd 184 | gcloud auth activate-service-account --key-file='path-to/service-account.json' 185 | ``` 186 | 187 | Optionally, for loading data from files into BigQuery, the `gcloud_gcs_bucket_name` can be specified in the database initialization. 188 | This will use the Google Cloud Storage bucket specified as cache for loading data and over-coming potential limitations. 189 | For more see [loading-data](https://cloud.google.com/bigquery/docs/bq-command-line-tool#loading_data). 190 | By default, files will directly loaded locally as described in [loading-local-data](https://cloud.google.com/bigquery/docs/loading-data-local#loading_data_from_a_local_data_source). 191 | 192 | A BigQuery context with a python cursor is also available on demand for easy access to BigQuery databases. 193 | In order to use, install the official Google python client library: [google-cloud-bigquery](https://cloud.google.com/bigquery/docs/reference/libraries#client-libraries-install-python). 194 | 195 | ## Links 196 | 197 | * Documentation: https://mara-db.readthedocs.io/ 198 | * Changes: https://mara-db.readthedocs.io/en/latest/changes.html 199 | * PyPI Releases: https://pypi.org/project/mara-db/ 200 | * Source Code: https://github.com/mara/mara-db 201 | * Issue Tracker: https://github.com/mara/mara-db/issues 202 | -------------------------------------------------------------------------------- /mara_db/dbs.py: -------------------------------------------------------------------------------- 1 | """Abstract definition of database connections""" 2 | 3 | import contextlib 4 | import functools 5 | import pathlib 6 | from typing import Union 7 | 8 | 9 | @functools.lru_cache(maxsize=None) 10 | def db(alias): 11 | """Returns a database configuration by alias""" 12 | from . import config 13 | databases = config.databases() 14 | if alias not in databases: 15 | raise KeyError(f'database alias "{alias}" not configured') 16 | return databases[alias] 17 | 18 | 19 | class DB: 20 | """Generic database connection definition""" 21 | 22 | def __repr__(self) -> str: 23 | return (f'<{self.__class__.__name__}: ' 24 | + ', '.join([f'{var}={"*****" if (var == "password" or "secret" in var) else getattr(self, var)}' 25 | for var in vars(self) if getattr(self, var)]) 26 | + '>') 27 | 28 | @property 29 | def sqlalchemy_url(self): 30 | """Returns the SQLAlchemy url for a database""" 31 | raise NotImplementedError(f'Please implement sqlalchemy_url for type "{self.__class__.__name__}"') 32 | 33 | 34 | class PostgreSQLDB(DB): 35 | def __init__(self, host: str = None, port: int = None, database: str = None, 36 | user: str = None, password: str = None, 37 | sslmode: str = None, sslrootcert: str = None, sslcert: str = None, sslkey: str = None): 38 | """ 39 | Connection information for a PostgreSQL database 40 | 41 | For the SSL options, see https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION 42 | """ 43 | self.host = host 44 | self.database = database 45 | self.port = port 46 | self.user = user 47 | self.password = password 48 | 49 | self.sslmode = sslmode 50 | self.sslrootcert = sslrootcert 51 | self.sslcert = sslcert 52 | self.sslkey = sslkey 53 | 54 | @property 55 | def sqlalchemy_url(self): 56 | return (f'postgresql+psycopg2://{self.user}{":" + self.password if self.password else ""}@{self.host}' 57 | + f'{":" + str(self.port) if self.port else ""}/{self.database}') 58 | 59 | 60 | class RedshiftDB(PostgreSQLDB): 61 | def __init__(self, host: str = None, port: int = None, database: str = None, 62 | user: str = None, password: str = None, 63 | aws_access_key_id=None, aws_secret_access_key=None, aws_s3_bucket_name=None): 64 | """ 65 | Connection information for a RedShift database 66 | 67 | The aws_* parameters are for copying to Redshift from stdin via an s3 bucket 68 | (requires the https://pypi.org/project/awscli/) package to be installed) 69 | """ 70 | self.aws_access_key_id = aws_access_key_id 71 | self.aws_secret_access_key = aws_secret_access_key 72 | self.aws_s3_bucket_name = aws_s3_bucket_name 73 | super(RedshiftDB, self).__init__(host, port, database, user, password) 74 | 75 | 76 | class BigQueryDB(DB): 77 | def __init__(self, 78 | service_account_json_file_name: str, 79 | location: str = None, project: str = None, dataset: str = None, 80 | gcloud_gcs_bucket_name=None, use_legacy_sql: bool = False): 81 | """ 82 | Connection information for a BigQueryDB database 83 | 84 | Enabling the BigQuery API and Service account json credentials are required. For more: 85 | https://cloud.google.com/bigquery/docs/quickstarts/quickstart-client-libraries#before-you-begin 86 | 87 | Args: 88 | service_account_json_file_name: The name of the private key file provided by Google when creating a service account (in json format) 89 | location: Default geographic location to use when creating datasets or determining where jobs should run 90 | project: Default project to use for requests. 91 | dataset: Default dataset to use for requests. 92 | gcloud_gcs_bucket_name: The Google Cloud Storage bucked used as cache for loading data 93 | use_legacy_sql: (default: false) If true, use the old BigQuery SQL dialect is used. 94 | """ 95 | self.service_account_json_file_name = service_account_json_file_name 96 | self.location = location 97 | self.project = project 98 | self.dataset = dataset 99 | self.gcloud_gcs_bucket_name = gcloud_gcs_bucket_name 100 | self.use_legacy_sql = use_legacy_sql 101 | 102 | @property 103 | def sqlalchemy_url(self): 104 | url = 'bigquery://' 105 | if self.project: 106 | url += self.project 107 | if self.dataset: 108 | url += '/' + self.dataset 109 | return url 110 | 111 | 112 | class MysqlDB(DB): 113 | def __init__(self, host: str = None, port: int = None, database: str = None, 114 | user: str = None, password: str = None, ssl: bool = None, charset: str = None): 115 | self.host = host 116 | self.database = database 117 | self.port = port 118 | self.user = user 119 | self.password = password 120 | self.ssl = ssl 121 | self.charset = charset 122 | 123 | 124 | class SQLServerDB(DB): 125 | def __new__(cls, host: str = None, port: int = None, database: str = None, 126 | user: str = None, password: str = None, odbc_driver: str = None, 127 | **kargs): 128 | """ 129 | Connection information for a SQL Server database 130 | """ 131 | if cls is SQLServerDB: 132 | # Here we define what happens when the class is directly created in code 133 | # 134 | # We defined here that class SqshSQLServerDB shall be used by default. In a newer 135 | # major version we could change this to SqlcmdSQLServerDB but we do not want to 136 | # introduce a breaking change here at this point. 137 | return SqshSQLServerDB(host=host, port=port, database=database, user=user, password=password, odbc_driver=odbc_driver) 138 | else: 139 | # This is called when the class is created from a derived class (e.g. SqshSQLServerDB) 140 | return super(SQLServerDB, cls).__new__(cls) 141 | 142 | def __init__(self, host: str = None, port: int = None, database: str = None, 143 | user: str = None, password: str = None, odbc_driver: str = None): 144 | self.host = host 145 | self.port = port 146 | self.database = database 147 | self.user = user 148 | self.password = password 149 | if odbc_driver is None: 150 | self.odbc_driver = 'ODBC Driver 17 for SQL Server' # default odbc driver 151 | else: 152 | self.odbc_driver = odbc_driver 153 | 154 | @property 155 | def sqlalchemy_url(self): 156 | import urllib.parse 157 | port = self.port if self.port else 1433 158 | driver = self.odbc_driver.replace(' ','+') 159 | return f'mssql+pyodbc://{self.user}:{urllib.parse.quote(self.password)}@{self.host}:{port}/{self.database}?driver={driver}' 160 | 161 | 162 | class SqshSQLServerDB(SQLServerDB): 163 | def __init__(self, host: str = None, port: int = None, database: str = None, 164 | user: str = None, password: str = None, odbc_driver: str = None): 165 | """ 166 | Connection information for a SQL Server database using the unix package sqsh 167 | """ 168 | # NOTE: The support for named instances is not added because the command `sqsh` does not support it 169 | super().__init__(host=host, port=port, database=database, user=user, password=password, odbc_driver=odbc_driver) 170 | 171 | 172 | class SqlcmdSQLServerDB(SQLServerDB): 173 | def __init__(self, host: str = None, instance: str = None, port: int = None, database: str = None, 174 | user: str = None, password: str = None, odbc_driver: str = None, 175 | protocol: str = None, quoted_identifier: bool = True, 176 | trust_server_certificate: bool = False): 177 | """ 178 | Connection information for a SQL Server database using the MSSQL Tools e.g. sqlcmd 179 | 180 | Args: 181 | quoted_identifier: If set to true, the SET option QUOTED_IDENTIFIER is set to ON, otherwise OFF. 182 | protocol: can be tcp (TCP/IP connection), np (named pipe) or lcp (using shared memory). 183 | See as well: https://docs.microsoft.com/en-us/sql/ssms/scripting/sqlcmd-connect-to-the-database-engine?view=sql-server-ver15 184 | trust_server_certificate: Trust the server certificate without validation 185 | """ 186 | super().__init__(host=host, port=port, database=database, user=user, password=password, odbc_driver=odbc_driver) 187 | if protocol: 188 | if protocol not in ['tcp','np','lpc']: 189 | raise ValueError(f'Not supported protocol: {protocol}') 190 | if protocol == 'tcp' and instance: 191 | raise ValueError('You can not use protocol tcp with an instance name') 192 | if protocol in ['np','lcp'] and port: 193 | raise ValueError('You can not use protocol np/lcp with a port number') 194 | if instance is not None and port is not None: 195 | raise ValueError('You can only use instance or port, not both together') 196 | self.protocol = protocol 197 | self.quoted_identifier = quoted_identifier 198 | self.instance = instance 199 | self.trust_server_certificate = trust_server_certificate 200 | 201 | @property 202 | def sqlalchemy_url(self): 203 | return super().sqlalchemy_url \ 204 | + ('&TrustServerCertificate=yes' if self.trust_server_certificate else '') 205 | 206 | 207 | class OracleDB(DB): 208 | def __init__(self, host: str = None, port: int = 0, endpoint: str = None, user: str = None, password: str = None): 209 | self.host = host 210 | self.port = port 211 | self.endpoint = endpoint 212 | self.user = user 213 | self.password = password 214 | 215 | 216 | class SQLiteDB(DB): 217 | def __init__(self, file_name: pathlib.Path) -> None: 218 | self.file_name = file_name 219 | 220 | @property 221 | def sqlalchemy_url(self): 222 | return f'sqlite:///{self.file_name}' 223 | 224 | 225 | class SnowflakeDB(DB): 226 | """A database connection to a Snowflake database""" 227 | def __init__(self, connection: str = None, account: str = None, user: str = None, password: str = None, database: str = None, 228 | private_key_file: str = None, private_key_passphrase: str = None) -> None: 229 | """ 230 | Connection information for a Snowflake database 231 | 232 | Args: 233 | connection: The connection name definend in the snowsql configuration ~/.snowsql/config 234 | account: The account identifier. 235 | See here: https://docs.snowflake.com/en/user-guide/admin-account-identifier.html 236 | user: The user name 237 | password: The password of the user 238 | database: The database name 239 | private_key_file: Path to private key file in PEM format used for key pair authentication. 240 | The private key file must be encrypted. 241 | private_key_passphrase: The passphrase for the private key file. 242 | """ 243 | self.connection = connection 244 | self.account = account 245 | self.user = user 246 | self.password = password 247 | self.database = database 248 | self.private_key_file = private_key_file 249 | self.private_key_passphrase = private_key_passphrase 250 | 251 | @property 252 | def sqlalchemy_url(self): 253 | assert all(v is not None for v in [self.account, self.user, self.password]), \ 254 | "sqlalchemy_url for SnowflakeDB requires a user, password and account" 255 | return (f'snowflake://{self.user}:{self.password}@{self.account}' 256 | + (f'/{self.database}' if self.database else '')) 257 | 258 | 259 | class DatabricksDB(DB): 260 | """A database connection to a Databricks""" 261 | def __init__(self, host: str = None, http_path: str = None, access_token: str = None) -> None: 262 | """ 263 | Connection information for a Databricks 264 | 265 | Args: 266 | host: The hostname 267 | http_path: The http path 268 | access_token: The Access Token 269 | """ 270 | self.host = host 271 | self.http_path = http_path 272 | self.access_token = access_token 273 | 274 | @property 275 | def sqlalchemy_url(self): 276 | return f"databricks+connector://token:{self.access_token}@{self.host}:443/" 277 | 278 | 279 | 280 | @functools.singledispatch 281 | def connect(db: object, **kargs) -> object: 282 | """ 283 | Creating a connection to the database object DB-API 2.0 (PIP-249) compatible. 284 | 285 | See also: https://peps.python.org/pep-0249/#connection-objects 286 | 287 | Args: 288 | db: The database for which you want to get the database object (either an alias or a `dbs.DB` object) 289 | **kargs: Optional arguments. 290 | """ 291 | raise NotImplementedError(f'Please implement connect for type "{db.__class__.__name__}"') 292 | 293 | 294 | @connect.register(str) 295 | def __(alias: str, **kargs) -> object: 296 | return connect(db(alias), **kargs) 297 | 298 | 299 | @connect.register(PostgreSQLDB) 300 | def __(db, **kargs) -> 'psycopg2.extensions.cursor': 301 | import psycopg2 302 | return psycopg2.connect(dbname=db.database, user=db.user, password=db.password, 303 | host=db.host, port=db.port) 304 | 305 | 306 | @connect.register(BigQueryDB) 307 | def __(db, **kargs) -> object: 308 | from google.oauth2.service_account import Credentials 309 | from google.cloud.bigquery.client import Client 310 | from google.cloud.bigquery.dbapi.connection import Connection 311 | credentials = Credentials.from_service_account_file(db.service_account_json_file_name) 312 | client = Client(project=credentials.project_id, credentials=credentials, location=db.location) 313 | return Connection(client) 314 | 315 | 316 | @connect.register(MysqlDB) 317 | def __(db, **kargs) -> 'MySQLdb.cursors.Cursor': 318 | import MySQLdb.cursors # requires https://github.com/PyMySQL/mysqlclient-python 319 | return MySQLdb.connect( 320 | host=db.host, user=db.user, passwd=db.password, db=db.database, port=db.port, 321 | cursorclass=MySQLdb.cursors.Cursor) 322 | 323 | 324 | @connect.register(SQLServerDB) 325 | def __(db, **kargs) -> 'pyodbc.Cursor': 326 | import pyodbc # requires https://github.com/mkleehammer/pyodbc/wiki/Install 327 | server = db.host 328 | if db.port: # connecting via TCP/IP port 329 | server = f"{server},{db.port}" 330 | return pyodbc.connect(f"DRIVER={{{db.odbc_driver}}};SERVER={server};DATABASE={db.database};UID={db.user};PWD={db.password}" \ 331 | + (';Encrypt=YES;TrustServerCertificate=YES' if db.trust_server_certificate else '')) 332 | 333 | 334 | @connect.register(SQLiteDB) 335 | def __(db, **kargs) -> 'sqlite3.Connection': 336 | import sqlite3 337 | return sqlite3.connect(database=db.file_name) 338 | 339 | 340 | @connect.register(DatabricksDB) 341 | def __(db, **kargs) -> object: 342 | from databricks_dbapi import odbc 343 | return odbc.connect( 344 | host=db.host, 345 | http_path=db.http_path, 346 | token=db.access_token, 347 | driver_path=db.odbc_driver_path) 348 | 349 | 350 | 351 | @contextlib.contextmanager 352 | def cursor_context(db: Union[str, DB]) -> object: 353 | """ 354 | A single iteration with a cursor context. When the iteration is 355 | closed, a commit is executed on the cursor. 356 | 357 | Example usage: 358 | with db.cursor_context() as c: 359 | c.execute('UPDATE table SET table.c1 = 1 WHERE table.id = 5') 360 | """ 361 | connection = connect(db) 362 | try: 363 | cursor = connection.cursor() 364 | yield cursor 365 | connection.commit() 366 | except Exception: 367 | connection.rollback() 368 | raise 369 | finally: 370 | cursor.close() 371 | connection.close() 372 | -------------------------------------------------------------------------------- /mara_db/views.py: -------------------------------------------------------------------------------- 1 | """DB schema visualization""" 2 | 3 | import datetime 4 | import re 5 | import typing 6 | from functools import singledispatch 7 | from html import escape 8 | 9 | import flask 10 | from mara_db import config, dbs 11 | from mara_page import acl, navigation, response, bootstrap, html, _, xml 12 | 13 | blueprint = flask.Blueprint('mara_db', __name__, static_folder='static', template_folder='templates', url_prefix='/db') 14 | 15 | acl_resource = acl.AclResource(name='DB Schema') 16 | 17 | 18 | def navigation_entry(): 19 | return navigation.NavigationEntry( 20 | label='DB Schema', icon='star', description='Schemas of all databases connections', 21 | children=[navigation.NavigationEntry( 22 | label='Overview', icon='list', 23 | uri_fn=lambda: flask.url_for('mara_db.index_page'))] + 24 | [ 25 | navigation.NavigationEntry( 26 | label=alias, icon='database', 27 | description=f'The schema of the {alias} db', 28 | uri_fn=lambda current_db=alias: flask.url_for('mara_db.schema_page', db_alias=current_db)) 29 | for alias, db in config.databases().items() 30 | if supports_extract_schema(db) 31 | ]) 32 | 33 | 34 | @blueprint.route('/') 35 | def index_page(): 36 | """Overview page of mara_db""" 37 | return response.Response( 38 | title=f'Database schemas', 39 | html=bootstrap.card( 40 | body=[_.div(style='display:inline-block; margin-top:15px; margin-bottom:15px; margin-right:50px;')[ 41 | _.a(href=flask.url_for('mara_db.schema_page', db_alias=db_alias))[ 42 | _.span(class_='fa fa-database')[''], ' ', db_alias], 43 | _.br, 44 | _.span(style='color:#888')[escape(str(type(db).__name__))] 45 | ] 46 | for db_alias, db in config.databases().items()]), 47 | 48 | js_files=[flask.url_for('mara_db.static', filename='schema-page.js')]) 49 | 50 | 51 | @singledispatch 52 | def supports_extract_schema(db: object) -> [bool]: 53 | """ 54 | Returns true when the db supports schema extraction 55 | 56 | Args: 57 | db: The database which shall be tested for schema extraction 58 | """ 59 | return False 60 | 61 | 62 | @supports_extract_schema.register(str) 63 | def __(alias: str): 64 | return supports_extract_schema(dbs.db(alias)) 65 | 66 | 67 | @supports_extract_schema.register(dbs.PostgreSQLDB) 68 | def __(db: dbs.PostgreSQLDB): 69 | return True 70 | 71 | 72 | @supports_extract_schema.register(dbs.RedshiftDB) 73 | def __(db: dbs.RedshiftDB): 74 | return False 75 | 76 | 77 | @supports_extract_schema.register(dbs.BigQueryDB) 78 | def __(db: dbs.BigQueryDB): 79 | # BigQuery does not support primary and foreign key relations 80 | return False 81 | 82 | 83 | @supports_extract_schema.register(dbs.MysqlDB) 84 | def __(db: dbs.MysqlDB): 85 | return True 86 | 87 | 88 | @supports_extract_schema.register(dbs.SQLServerDB) 89 | def __(db: dbs.SQLServerDB): 90 | # check if module pyodbc can be imported 91 | import importlib 92 | pyodbc_spec = importlib.util.find_spec("pyodbc") 93 | return pyodbc_spec is not None 94 | 95 | 96 | @blueprint.route('/') 97 | def schema_page(db_alias: str): 98 | """A page that visiualizes the schemas of a database""" 99 | if db_alias not in config.databases(): 100 | flask.abort(404, f'unkown database {db_alias}') 101 | 102 | return response.Response( 103 | title=f'Schema of database {db_alias}', 104 | html=[bootstrap.card(sections=[ 105 | html.asynchronous_content(flask.url_for('mara_db.schema_selection', db_alias=db_alias)), 106 | [_.div(id='schema-container')]]), 107 | html.spinner_js_function()], 108 | js_files=[flask.url_for('mara_db.static', filename='schema-page.js')], 109 | action_buttons=[response.ActionButton( 110 | action='javascript:schemaPage.downloadSvg()', label='SVG', 111 | title='Save current chart as SVG file', icon='download')] 112 | ) 113 | 114 | 115 | @singledispatch 116 | def schemas_with_foreign_key_constraints(db: object) -> [str]: 117 | """ 118 | Returns all schemas that are effected by foreign key constraints 119 | 120 | Args: 121 | db: The database in which to run the query (either an alias or a `dbs.DB` object 122 | """ 123 | raise NotImplementedError( 124 | f'Please implement schemas_with_foreign_key_constraints for type "{db.__class__.__name__}"') 125 | 126 | 127 | @schemas_with_foreign_key_constraints.register(str) 128 | def __(alias: str): 129 | return schemas_with_foreign_key_constraints(dbs.db(alias)) 130 | 131 | 132 | @schemas_with_foreign_key_constraints.register(dbs.PostgreSQLDB) 133 | def __(db: dbs.PostgreSQLDB): 134 | import mara_db.postgresql 135 | with mara_db.postgresql.postgres_cursor_context(db) as cursor: 136 | cursor.execute(''' 137 | SELECT 138 | array_cat(array_agg(DISTINCT constrained_table_schema.nspname), array_agg(DISTINCT referenced_table_schema.nspname)) 139 | FROM pg_constraint 140 | JOIN pg_class constrained_table ON constrained_table.oid = pg_constraint.conrelid 141 | JOIN pg_namespace constrained_table_schema ON constrained_table.relnamespace = constrained_table_schema.oid 142 | JOIN pg_class referenced_table ON referenced_table.oid = pg_constraint.confrelid 143 | JOIN pg_namespace referenced_table_schema ON referenced_table.relnamespace = referenced_table_schema.oid''') 144 | result = cursor.fetchone() 145 | if result != (None,): 146 | return list(set(result[0])) 147 | 148 | 149 | @schemas_with_foreign_key_constraints.register(dbs.MysqlDB) 150 | def __(db: dbs.MysqlDB): 151 | import mara_db.mysql 152 | with mara_db.mysql.mysql_cursor_context(db) as cursor: 153 | cursor.execute(""" 154 | SELECT DISTINCT table_schema 155 | FROM information_schema.table_constraints 156 | WHERE CONSTRAINT_TYPE = 'FOREIGN KEY' 157 | UNION 158 | SELECT DISTINCT REFERENCED_TABLE_SCHEMA 159 | FROM information_schema.key_column_usage 160 | WHERE REFERENCED_TABLE_SCHEMA IS NOT NULL; 161 | """) 162 | return [row[0] for row in cursor.fetchall()] 163 | 164 | 165 | @schemas_with_foreign_key_constraints.register(dbs.SQLServerDB) 166 | def __(db: dbs.SQLServerDB): 167 | import mara_db.sqlserver 168 | with mara_db.sqlserver.sqlserver_cursor_context(db) as cursor: 169 | cursor.execute(""" 170 | SELECT DISTINCT fpos.name AS schema_name 171 | FROM sys.foreign_keys AS f 172 | INNER JOIN sys.foreign_key_columns AS fc ON f.object_id = fc.constraint_object_id 173 | INNER JOIN sys.objects AS fpo ON fpo.object_id = f.parent_object_id 174 | INNER JOIN sys.schemas AS fpos ON fpos.schema_id = fpo.schema_id 175 | """) 176 | return [row[0] for row in cursor.fetchall()] 177 | 178 | 179 | @blueprint.route('//.schemas') 180 | def schema_selection(db_alias: str): 181 | """Asynchronously computes the list of schemas with foreign key constraints""" 182 | schemas_with_fk_constraints = schemas_with_foreign_key_constraints(db_alias) 183 | 184 | if not schemas_with_fk_constraints or len(schemas_with_fk_constraints) == 0: 185 | return str(_.i['No schemas with foreign key constraints found']) 186 | 187 | return ''.join(xml.render([ 188 | [_.div(class_='form-check form-check-inline')[ 189 | _.label(class_='form-check-label')[ 190 | _.input(class_="form-check-input schema-checkbox", type="checkbox", value=schema_name)[ 191 | ''], ' ', schema_name]] 192 | for schema_name in sorted(schemas_with_fk_constraints)], 193 | '   ', 194 | _.div(class_='form-check form-check-inline')[ 195 | _.label(class_='form-check-label')[ 196 | _.input(class_="form-check-input", id='hide-columns-checkbox', type="checkbox")[ 197 | ''], ' ', 'hide columns']], 198 | '   ', 199 | _.div(class_='form-check form-check-inline')[ 200 | _.label(class_='form-check-label')[ 201 | 'graphviz engine ', 202 | _.select(id='engine', style='border:none;background-color:white;')[ 203 | [_.option(value=engine)[engine] for engine in ['neato', 'dot', 'twopi', 'fdp']] 204 | ]]], 205 | _.script[''' 206 | var schemaPage = SchemaPage("''' + flask.url_for('mara_db.schema_page', db_alias=db_alias) + '''", "''' + db_alias + '''"); 207 | ''']])) 208 | 209 | 210 | @singledispatch 211 | def extract_schema(db: object, schema_names: [str]) -> (typing.Dict, typing.Set): 212 | """ 213 | Extracts foreign key constraints and the involved tables from a db 214 | 215 | Args: 216 | db: The database in which to run the query (either an alias or a `dbs.DB` object 217 | schema_names: the schemas to visualize 218 | 219 | Returns: 220 | A dictionary of tables: 221 | {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}} 222 | 223 | All foreign key constrains as a set of tuples: 224 | {((table_schema, table_name), (referenced_schema_name, referenced_table_name))} 225 | """ 226 | raise NotImplementedError(f'Please implement extract_schema for type "{db.__class__.__name__}"') 227 | 228 | 229 | @extract_schema.register(str) 230 | def __(alias: str, schema_names: [str]): 231 | return extract_schema(dbs.db(alias), schema_names) 232 | 233 | 234 | @extract_schema.register(dbs.PostgreSQLDB) 235 | def __(db: dbs.PostgreSQLDB, schema_names: [str]): 236 | import mara_db.postgresql 237 | 238 | # get all table inheritance relations as dictionary: {(child_schema, child_table): (parent_schema, parent_table) 239 | inherited_tables = {} 240 | with mara_db.postgresql.postgres_cursor_context(db) as cursor: 241 | cursor.execute(""" 242 | SELECT 243 | rel_namespace.nspname, rel.relname , 244 | parent_namespace.nspname, parent.relname 245 | FROM pg_inherits 246 | JOIN pg_class parent ON parent.oid = pg_inherits.inhparent 247 | JOIN pg_class rel ON rel.oid = pg_inherits.inhrelid 248 | JOIN pg_namespace parent_namespace ON parent_namespace.oid = parent.relnamespace 249 | JOIN pg_namespace rel_namespace ON rel_namespace.oid = rel.relnamespace""") 250 | for schema_name, table_name, parent_schema_name, parent_table_name in cursor.fetchall(): 251 | inherited_tables[(schema_name, table_name)] = (parent_schema_name, parent_table_name) 252 | 253 | # get all tables that have foreign key constrains on them or are referenced by foreign key constraints 254 | tables = {} # {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}} 255 | foreign_key_constraints = set() # {((table_schema, table_name), (referenced_schema_name, referenced_table_name)} 256 | 257 | def empty_table(): 258 | return {'columns': [], 'constrained-columns': set()} 259 | 260 | with mara_db.postgresql.postgres_cursor_context(db) as cursor: 261 | cursor.execute(f''' 262 | SELECT 263 | constrained_table_schema.nspname, 264 | constrained_table.relname, 265 | array_agg(constrained_column.attname), 266 | referenced_table_schema.nspname, 267 | referenced_table.relname 268 | FROM pg_constraint 269 | JOIN pg_class constrained_table ON constrained_table.oid = pg_constraint.conrelid 270 | JOIN pg_namespace constrained_table_schema ON constrained_table.relnamespace = constrained_table_schema.oid 271 | JOIN pg_class referenced_table ON referenced_table.oid = pg_constraint.confrelid 272 | JOIN pg_namespace referenced_table_schema ON referenced_table.relnamespace = referenced_table_schema.oid 273 | JOIN pg_attribute constrained_column ON constrained_column.attrelid = constrained_table.oid AND attnum = ANY (conkey) 274 | WHERE constrained_table_schema.nspname = ANY ({'%s'}) 275 | GROUP BY constrained_table_schema.nspname, constrained_table.relname, referenced_table_schema.nspname, referenced_table.relname; 276 | ''', (schema_names,)) 277 | for schema_name, table_name, table_columns, referenced_schema_name, referenced_table_name in cursor.fetchall(): 278 | referring_table = (schema_name, table_name) 279 | if referring_table in inherited_tables: 280 | referring_table = inherited_tables[referring_table] 281 | if referring_table not in tables: 282 | tables[referring_table] = empty_table() 283 | tables[referring_table]['constrained-columns'].update(table_columns) 284 | 285 | referenced_table = (referenced_schema_name, referenced_table_name) 286 | if referenced_table in inherited_tables: 287 | referenced_table = inherited_tables[referenced_table] 288 | 289 | if referenced_table not in tables: 290 | tables[referenced_table] = empty_table() 291 | 292 | foreign_key_constraints.add((referring_table, referenced_table)) 293 | 294 | # get enum usages 295 | with mara_db.postgresql.postgres_cursor_context(db) as cursor: 296 | cursor.execute(f''' 297 | SELECT 298 | DISTINCT 299 | pg_namespace_table.nspname AS table_schema, 300 | pg_class_table.relname AS table_name, 301 | 302 | pg_namespace_enum.nspname AS enum_schema, 303 | pg_type.typname AS enum_type 304 | FROM pg_attribute 305 | JOIN pg_class pg_class_table ON pg_class_table.oid = attrelid 306 | JOIN pg_namespace pg_namespace_table ON pg_namespace_table.oid = pg_class_table.relnamespace 307 | JOIN pg_type ON atttypid = pg_type.OID 308 | JOIN pg_namespace pg_namespace_enum ON typnamespace = pg_namespace_enum.oid 309 | JOIN pg_enum ON pg_enum.enumtypid = pg_type.oid 310 | WHERE pg_namespace_table.nspname = ANY ({'%s'})''', (schema_names,)) 311 | for table_schema, table_name, enum_schema, enum_name in cursor.fetchall(): 312 | if (table_schema, table_name) in tables: 313 | if not (enum_schema, enum_name) in tables: 314 | tables[(enum_schema, enum_name)] = empty_table() 315 | 316 | foreign_key_constraints.add(((table_schema, table_name), (enum_schema, enum_name))) 317 | 318 | # get all columns of all tables 319 | with mara_db.postgresql.postgres_cursor_context(db) as cursor: 320 | cursor.execute(''' 321 | SELECT 322 | table_schema, table_name, 323 | array_agg(column_name :: TEXT ORDER BY ordinal_position) 324 | FROM information_schema.columns 325 | GROUP BY table_schema, table_name''') 326 | for schema_name, table_name, columns in cursor.fetchall(): 327 | if (schema_name, table_name) in tables: 328 | tables[(schema_name, table_name)]['columns'] = columns 329 | 330 | return tables, foreign_key_constraints 331 | 332 | 333 | @extract_schema.register(dbs.MysqlDB) 334 | def __(db: dbs.MysqlDB, schema_names: [str]): 335 | import mara_db.mysql 336 | 337 | # get all tables that have foreign key constrains on them or are referenced by foreign key constraints 338 | tables = {} # {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}} 339 | foreign_key_constraints = set() # {((table_schema, table_name), (referenced_schema_name, referenced_table_name)} 340 | 341 | def empty_table(): 342 | return {'columns': [], 'constrained-columns': set()} 343 | 344 | with mara_db.mysql.mysql_cursor_context(db) as cursor: 345 | cursor.execute(f''' 346 | SELECT i.table_schema, 347 | i.table_name, 348 | k.column_name, 349 | k.referenced_table_schema, 350 | k.referenced_table_name 351 | FROM information_schema.table_constraints i 352 | LEFT JOIN information_schema.KEY_COLUMN_USAGE k 353 | ON i.constraint_name = k.constraint_name 354 | WHERE i.constraint_type = 'FOREIGN KEY' 355 | AND k.referenced_table_name IS NOT NULL 356 | AND i.table_schema IN {'%s'}; ''', (schema_names,)) 357 | for table_schema, table_name, column_name, referenced_table_schema, referenced_table_name in cursor.fetchall(): 358 | referring_table = (table_schema, table_name) 359 | referenced_table = (referenced_table_schema, referenced_table_name) 360 | if not referring_table in tables: 361 | tables[referring_table] = empty_table() 362 | tables[referring_table]['constrained-columns'].add(column_name) 363 | 364 | if not referenced_table in tables: 365 | tables[referenced_table] = empty_table() 366 | 367 | foreign_key_constraints.add((referring_table, referenced_table)) 368 | 369 | with mara_db.mysql.mysql_cursor_context(db) as cursor: 370 | cursor.execute(f''' 371 | SELECT table_schema, table_name, column_name 372 | FROM information_schema.COLUMNS 373 | WHERE table_schema IN {'%s'} 374 | ''', (schema_names,)) 375 | for table_schema, table_name, column_name in cursor.fetchall(): 376 | if (table_schema, table_name) in tables: 377 | tables[(table_schema, table_name)]['columns'].append(column_name) 378 | 379 | return tables, foreign_key_constraints 380 | 381 | 382 | @extract_schema.register(dbs.SQLServerDB) 383 | def __(db: dbs.SQLServerDB, schema_names: [str]): 384 | import mara_db.sqlserver 385 | 386 | # get all tables that have foreign key constrains on them or are referenced by foreign key constraints 387 | tables = {} # {(table_schema, table_name): {'columns': [columns], 'constrained-columns': {constrained-columns}} 388 | foreign_key_constraints = set() # {((table_schema, table_name), (referenced_schema_name, referenced_table_name)} 389 | 390 | def empty_table(): 391 | return {'columns': [], 'constrained-columns': set()} 392 | 393 | with mara_db.sqlserver.sqlserver_cursor_context(db) as cursor: 394 | cursor.execute(f''' 395 | SELECT 396 | s.name AS table_schema, 397 | t.name AS table_name, 398 | COL_NAME(fkc.parent_object_id, fkc.parent_column_id) AS column_name, 399 | fkts.name AS referenced_table_name, 400 | OBJECT_NAME (fk.referenced_object_id) AS referenced_table_name 401 | FROM sys.tables t 402 | INNER JOIN sys.schemas s ON 403 | s.schema_id = t.schema_id 404 | LEFT JOIN sys.foreign_keys fk ON 405 | fk.parent_object_id = t.object_id 406 | LEFT JOIN sys.foreign_key_columns fkc ON 407 | fkc.constraint_object_id = fk.object_id 408 | LEFT JOIN sys.tables fkt ON 409 | fkt.object_id = fk.referenced_object_id 410 | LEFT JOIN sys.schemas fkts ON 411 | fkts.schema_id = fkt.schema_id 412 | WHERE s.name IN ('%s'); ''' % '\',\''.join(schema_names)) 413 | for table_schema, table_name, column_name, referenced_table_schema, referenced_table_name in cursor.fetchall(): 414 | referring_table = (table_schema, table_name) 415 | referenced_table = (referenced_table_schema, referenced_table_name) 416 | if not referring_table in tables: 417 | tables[referring_table] = empty_table() 418 | if column_name is not None: 419 | tables[referring_table]['constrained-columns'].add(column_name) 420 | 421 | # this logic is necessary so that tables with no foreign key are not added to the schema 422 | if referenced_table_schema is not None and referenced_table_name is not None: 423 | if not referenced_table in tables: 424 | tables[referenced_table] = empty_table() 425 | foreign_key_constraints.add((referring_table, referenced_table)) 426 | 427 | with mara_db.sqlserver.sqlserver_cursor_context(db) as cursor: 428 | cursor.execute(f''' 429 | SELECT 430 | s.name AS table_schema, 431 | t.name AS table_name, 432 | c.name AS column_name 433 | FROM sys.columns c 434 | INNER JOIN sys.tables t ON 435 | t.object_id = c.object_id 436 | INNER JOIN sys.schemas s ON 437 | s.schema_id = t.schema_id 438 | WHERE s.name IN ('%s') 439 | ''' % '\',\''.join(schema_names)) 440 | for table_schema, table_name, column_name in cursor.fetchall(): 441 | if (table_schema, table_name) in tables: 442 | tables[(table_schema, table_name)]['columns'].append(column_name) 443 | return tables, foreign_key_constraints 444 | 445 | 446 | @blueprint.route('//') 447 | @acl.require_permission(acl_resource, do_abort=False) 448 | def draw_schema(db_alias: str, schemas: str): 449 | """Shows a chart of the tables and FK relationships in a given database and schema list""" 450 | 451 | if db_alias not in config.databases(): 452 | flask.abort(404, f'unkown database {db_alias}') 453 | 454 | if not supports_extract_schema(db_alias): 455 | flask.abort(404, f"could not extract schema for database {db_alias}") 456 | 457 | schema_names = schemas.split('/') 458 | hide_columns = flask.request.args.get('hide-columns') 459 | engine = flask.request.args.get('engine', 'neato') 460 | 461 | tables, fk_constraints = extract_schema(db_alias, schema_names) 462 | 463 | import graphviz.backend 464 | 465 | graph = graphviz.Digraph(engine=engine, 466 | graph_attr={'splines': 'True', 'overlap': 'ortho'}) 467 | 468 | schema_colors = {} 469 | fk_pattern = re.compile(config.schema_ui_foreign_key_column_regex()) 470 | for schema_name, table_name in sorted(tables): 471 | if schema_name not in schema_colors: 472 | colors = ['#ffffcc', '#bbffcc', '#cceeff', '#eedd99', '#ddee99', '#99ddff', '#dddddd'] 473 | schema_colors[schema_name] = colors[len(schema_colors) % len(colors)] 474 | 475 | label = '< ' 477 | 478 | node_name = schema_name + '.' + table_name 479 | if hide_columns: 480 | label += '' 481 | else: 482 | label += '' 483 | for column in tables[(schema_name, table_name)]['columns']: 484 | label += '' 491 | 492 | label += '
' + table_name.replace('_', '
') + '
' + table_name + '
' 485 | if fk_pattern.match(column) \ 486 | and column not in tables[(schema_name, table_name)]['constrained-columns']: 487 | label += ' ' + column + ' ' 488 | else: 489 | label += column 490 | label += '
>' 493 | 494 | graph.node(name=node_name, label=label, 495 | _attributes={'fontname': 'Helvetica, Arial, sans-serif', 'fontsize': '10', 496 | 'fontcolor': '#555555', 'shape': 'none'}) 497 | 498 | for (schema_name, table_name), (referenced_schema_name, referenced_table_name) in fk_constraints: 499 | graph.edge(schema_name + '.' + table_name, referenced_schema_name + '.' + referenced_table_name, 500 | _attributes={'color': '#888888'}) 501 | 502 | try: 503 | svg = graph.pipe('svg').decode('utf-8') 504 | except graphviz.backend.ExecutableNotFound as e: 505 | import uuid 506 | # This exception occurs when the graphviz tools are not found. 507 | # We use here a fallback to client-side rendering using the javascript library d3-graphviz. 508 | graph_id = f'dependency_graph_{uuid.uuid4().hex}' 509 | escaped_graph_source = graph.source.replace("`","\\`") 510 | return str(_.div(id=graph_id)[ 511 | _.tt(style="color:red")[str(e)], 512 | ]) + str(_.script[ 513 | f'div=d3.select("#{graph_id}");', 514 | 'graph=div.graphviz();', 515 | 'div.text("");', 516 | f'graph.renderDot(`{escaped_graph_source}`);', 517 | ]) 518 | 519 | response = flask.Response(svg) 520 | response.headers[ 521 | 'Content-Disposition'] = f'attachment; filename="{datetime.date.today().isoformat()}-{db_alias}.svg"' 522 | return response 523 | --------------------------------------------------------------------------------