├── .env.example ├── .github ├── issue_template.md ├── pull_request_template.md ├── stale.yml └── workflows │ └── general.yml ├── .gitignore ├── LEAD.md ├── LICENSE.md ├── MANIFEST.in ├── Makefile ├── README.md ├── data ├── articles.csv ├── articles.json ├── comments.csv └── comments.json ├── examples ├── __init__.py └── storage.py ├── pylama.ini ├── pytest.ini ├── setup.cfg ├── setup.py ├── tableschema_sql ├── VERSION ├── __init__.py ├── mapper.py ├── storage.py └── writer.py └── tests ├── __init__.py ├── test_mapper.py └── test_storage.py /.env.example: -------------------------------------------------------------------------------- 1 | POSTGRES_URL='postgresql://:@localhost:5432/postgres' 2 | SQLITE_URL='sqlite:///:memory:' 3 | MYSQL_URL='mysql+pymysql://:@localhost:3306/test?charset=utf8' 4 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your idea or problem. If it's a bug share as much as possible to reproduce it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @roll (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | Please replace this line with full information about your pull request. Make sure that tests pass before publishing it 4 | 5 | --- 6 | 7 | Please preserve this line to notify @roll (lead of this repository) 8 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 90 3 | 4 | # Number of days of inactivity before a stale issue is closed 5 | daysUntilClose: 30 6 | 7 | # Issues with these labels will never be considered stale 8 | exemptLabels: 9 | - feature 10 | - enhancement 11 | - bug 12 | 13 | # Label to use when marking an issue as stale 14 | staleLabel: wontfix 15 | 16 | # Comment to post when marking an issue as stale. Set to `false` to disable 17 | markComment: > 18 | This issue has been automatically marked as stale because it has not had 19 | recent activity. It will be closed if no further activity occurs. Thank you 20 | for your contributions. 21 | 22 | # Comment to post when closing a stale issue. Set to `false` to disable 23 | closeComment: false 24 | -------------------------------------------------------------------------------- /.github/workflows/general.yml: -------------------------------------------------------------------------------- 1 | name: general 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - v*.*.* 9 | pull_request: 10 | branches: 11 | - main 12 | 13 | jobs: 14 | 15 | test-linux: 16 | runs-on: ubuntu-latest 17 | strategy: 18 | matrix: 19 | python-version: [3.8, 3.9, "3.10"] 20 | sqlalchemy-version: [1.4, 2.0] 21 | services: 22 | postgres: 23 | image: postgres 24 | env: 25 | POSTGRES_PASSWORD: postgres 26 | options: --health-cmd=pg_isready --health-interval=10s --health-timeout=5s --health-retries=5 27 | ports: 28 | - 5432:5432 29 | mysql: 30 | image: mysql:8 31 | env: 32 | MYSQL_USER: test_user 33 | MYSQL_PASSWORD: test_password 34 | MYSQL_ROOT_PASSWORD: root 35 | MYSQL_DATABASE: test_db 36 | options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=10 37 | ports: 38 | - 3306:3306 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | - name: Install Python 43 | uses: actions/setup-python@v2 44 | with: 45 | python-version: ${{ matrix.python-version }} 46 | - name: Install dependencies 47 | run: | 48 | make install 49 | pip install "sqlalchemy>=${{ matrix.sqlalchemy-version }}" 50 | - name: Test software 51 | env: 52 | POSTGRES_URL: postgresql://postgres:postgres@localhost:5432/postgres 53 | SQLITE_URL: 'sqlite:///:memory:' 54 | MYSQL_URL: mysql+pymysql://test_user:test_password@localhost:3306/test_db?charset=utf8 55 | run: make test 56 | - name: Report coverage 57 | uses: codecov/codecov-action@v1 58 | 59 | release: 60 | if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') 61 | runs-on: ubuntu-latest 62 | needs: [test-linux] 63 | steps: 64 | - name: Checkout repository 65 | uses: actions/checkout@v2 66 | - name: Install Python 67 | uses: actions/setup-python@v2 68 | with: 69 | python-version: 3.9 70 | - name: Install dependencies 71 | run: | 72 | python -m pip install --upgrade pip 73 | pip install setuptools wheel 74 | - name: Build distribution 75 | run: | 76 | python setup.py sdist bdist_wheel 77 | - name: Publish to PYPI 78 | uses: pypa/gh-action-pypi-publish@master 79 | with: 80 | password: ${{ secrets.PYPI_API_KEY }} 81 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # pyenv 60 | .python-version 61 | 62 | # dotenv 63 | .env 64 | 65 | # Extras 66 | tabulator 67 | tableschema 68 | !/tableschema_sql/mappers.py 69 | -------------------------------------------------------------------------------- /LEAD.md: -------------------------------------------------------------------------------- 1 | roll 2 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Open Knowledge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-include VERSION 2 | include LICENSE.md 3 | include Makefile 4 | include pylama.ini 5 | include pytest.ini 6 | include README.md 7 | include tox.ini 8 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all install list readme release templates test version 2 | 3 | 4 | PACKAGE := $(shell grep '^PACKAGE =' setup.py | cut -d "'" -f2) 5 | VERSION := $(shell head -n 1 $(PACKAGE)/VERSION) 6 | LEAD := $(shell head -n 1 LEAD.md) 7 | 8 | 9 | all: list 10 | 11 | install: 12 | pip install --upgrade -e .[develop] 13 | 14 | list: 15 | @grep '^\.PHONY' Makefile | cut -d' ' -f2- | tr ' ' '\n' 16 | 17 | readme: 18 | pip install md-toc 19 | pip install referencer 20 | referencer $(PACKAGE) README.md --in-place 21 | md_toc -p README.md github --header-levels 3 22 | sed -i '/(#tableschema-sql-py)/,+2d' README.md 23 | 24 | release: 25 | git checkout master && git pull origin && git fetch -p && git diff 26 | @echo "\nContinuing in 10 seconds. Press to abort\n" && sleep 10 27 | @git log --pretty=format:"%C(yellow)%h%Creset %s%Cgreen%d" --reverse -20 28 | @echo "\nReleasing v$(VERSION) in 10 seconds. Press to abort\n" && sleep 10 29 | git commit -a -m 'v$(VERSION)' && git tag -a v$(VERSION) -m 'v$(VERSION)' 30 | git push --follow-tags 31 | 32 | templates: 33 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/issue_template.md 34 | sed -i -E "s/@(\w*)/@$(LEAD)/" .github/pull_request_template.md 35 | 36 | test: 37 | pylama $(PACKAGE) 38 | py.test -vvv --cov tableschema_sql --cov-report term-missing 39 | 40 | version: 41 | @echo $(VERSION) 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tableschema-sql-py 2 | 3 | [![Travis](https://img.shields.io/travis/frictionlessdata/tableschema-sql-py/master.svg)](https://travis-ci.org/frictionlessdata/tableschema-sql-py) 4 | [![Coveralls](http://img.shields.io/coveralls/frictionlessdata/tableschema-sql-py/master.svg)](https://coveralls.io/r/frictionlessdata/tableschema-sql-py?branch=master) 5 | [![PyPi](https://img.shields.io/pypi/v/tableschema-sql.svg)](https://pypi.python.org/pypi/tableschema-sql) 6 | [![Github](https://img.shields.io/badge/github-master-brightgreen)](https://github.com/frictionlessdata/tableschema-sql-py) 7 | [![Gitter](https://img.shields.io/gitter/room/frictionlessdata/chat.svg)](https://gitter.im/frictionlessdata/chat) 8 | 9 | Generate and load SQL tables based on [Table Schema](http://specs.frictionlessdata.io/table-schema/) descriptors. 10 | 11 | ## Features 12 | 13 | - implements `tableschema.Storage` interface 14 | - provides additional features like indexes and updating 15 | 16 | ## Contents 17 | 18 | 19 | 20 | - [Getting Started](#getting-started) 21 | - [Installation](#installation) 22 | - [Documentation](#documentation) 23 | - [API Reference](#api-reference) 24 | - [`Storage`](#storage) 25 | - [Contributing](#contributing) 26 | - [Changelog](#changelog) 27 | 28 | 29 | 30 | ## Getting Started 31 | 32 | ### Installation 33 | 34 | The package use semantic versioning. It means that major versions could include breaking changes. It's highly recommended to specify `package` version range in your `setup/requirements` file e.g. `package>=1.0,<2.0`. 35 | 36 | ```bash 37 | pip install tableschema-sql 38 | ``` 39 | 40 | ## Documentation 41 | 42 | ```python 43 | from datapackage import Package 44 | from tableschema import Table 45 | from sqlalchemy import create_engine 46 | 47 | # Create sqlalchemy engine 48 | engine = create_engine('sqlite://') 49 | 50 | # Save package to SQL 51 | package = Package('datapackage.json') 52 | package.save(storage='sql', engine=engine) 53 | 54 | # Load package from SQL 55 | package = Package(storage='sql', engine=engine) 56 | package.resources 57 | ``` 58 | 59 | ## API Reference 60 | 61 | ### `Storage` 62 | ```python 63 | Storage(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None) 64 | ``` 65 | SQL storage 66 | 67 | Package implements 68 | [Tabular Storage](https://github.com/frictionlessdata/tableschema-py#storage) 69 | interface (see full documentation on the link): 70 | 71 | ![Storage](https://i.imgur.com/RQgrxqp.png) 72 | 73 | > Only additional API is documented 74 | 75 | __Arguments__ 76 | - __engine (object)__: `sqlalchemy` engine 77 | - __dbschema (str)__: name of database schema 78 | - __prefix (str)__: prefix for all buckets 79 | - __reflect_only (callable)__: 80 | a boolean predicate to filter the list of table names when reflecting 81 | - __autoincrement (str/dict)__: 82 | add autoincrement column at the beginning. 83 | - if a string it's an autoincrement column name 84 | - if a dict it's an autoincrements mapping with column 85 | names indexed by bucket names, for example, 86 | `{'bucket1': 'id', 'bucket2': 'other_id}` 87 | 88 | 89 | #### `storage.create` 90 | ```python 91 | storage.create(self, bucket, descriptor, force=False, indexes_fields=None) 92 | ``` 93 | Create bucket 94 | 95 | __Arguments__ 96 | - __indexes_fields (str[])__: 97 | list of tuples containing field names, or list of such lists 98 | 99 | 100 | #### `storage.write` 101 | ```python 102 | storage.write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None, buffer_size=1000, use_bloom_filter=True) 103 | ``` 104 | Write to bucket 105 | 106 | __Arguments__ 107 | - __keyed (bool)__: 108 | accept keyed rows 109 | - __as_generator (bool)__: 110 | returns generator to provide writing control to the client 111 | - __update_keys (str[])__: 112 | update instead of inserting if key values match existent rows 113 | - __buffer_size (int=1000)__: 114 | maximum number of rows to try and write to the db in one batch 115 | - __use_bloom_filter (bool=True)__: 116 | should we use a bloom filter to optimize DB update performance 117 | (in exchange for some setup time) 118 | 119 | 120 | ## Contributing 121 | 122 | > The project follows the [Open Knowledge International coding standards](https://github.com/okfn/coding-standards). 123 | 124 | Recommended way to get started is to create and activate a project virtual environment. 125 | To install package and development dependencies into active environment: 126 | 127 | ```bash 128 | $ make install 129 | ``` 130 | 131 | To run tests with linting and coverage: 132 | 133 | ```bash 134 | $ make test 135 | ``` 136 | 137 | ## Changelog 138 | 139 | Here described only breaking and the most important changes. The full changelog and documentation for all released versions could be found in nicely formatted [commit history](https://github.com/frictionlessdata/tableschema-sql-py/commits/master). 140 | 141 | #### v1.3 142 | 143 | - Implemented constraints loading to a database 144 | 145 | #### v1.2 146 | 147 | - Add option to configure buffer size, bloom filter use (#77) 148 | 149 | #### v1.1 150 | 151 | - Added support for the `autoincrement` parameter to be a mapping 152 | - Fixed autoincrement support for SQLite and MySQL 153 | 154 | #### v1.0 155 | 156 | - Initial driver implementation. 157 | -------------------------------------------------------------------------------- /data/articles.csv: -------------------------------------------------------------------------------- 1 | id,parent,name,current,rating,created_year,created_date,created_time,created_datetime,stats,persons,location 2 | 1,,Taxes,True,9.5,2015,2015-01-01,03:00:00,2015-01-01T03:00:00Z,{"chars":560},["mike"],"{""type"": ""Point"",""coordinates"":[50.00,50.00]}" 3 | 2,1,中国人,False,7,2015,2015-12-31,15:45:33,2015-12-31T15:45:33Z,{"chars":970},["chen"],"{""type"": ""Point"",""coordinates"":[33.33,33.33]}" 4 | -------------------------------------------------------------------------------- /data/articles.json: -------------------------------------------------------------------------------- 1 | { 2 | "primaryKey": "id", 3 | "foreignKeys": [ 4 | { 5 | "fields": "parent", 6 | "reference": { 7 | "resource": "", 8 | "fields": "id" 9 | } 10 | } 11 | ], 12 | "fields": [ 13 | { 14 | "name": "id", 15 | "type": "integer", 16 | "constraints": { 17 | "required": true 18 | } 19 | }, 20 | { 21 | "name": "parent", 22 | "type": "integer" 23 | }, 24 | { 25 | "name": "name", 26 | "type": "string" 27 | }, 28 | { 29 | "name": "current", 30 | "type": "boolean" 31 | }, 32 | { 33 | "name": "rating", 34 | "type": "number" 35 | }, 36 | { 37 | "name": "created_year", 38 | "type": "date", 39 | "format": "%Y" 40 | }, 41 | { 42 | "name": "created_date", 43 | "type": "date" 44 | }, 45 | { 46 | "name": "created_time", 47 | "type": "time" 48 | }, 49 | { 50 | "name": "created_datetime", 51 | "type": "datetime" 52 | }, 53 | { 54 | "name": "stats", 55 | "type": "object" 56 | }, 57 | { 58 | "name": "persons", 59 | "type": "array" 60 | }, 61 | { 62 | "name": "location", 63 | "type": "geojson" 64 | } 65 | 66 | ] 67 | } 68 | -------------------------------------------------------------------------------- /data/comments.csv: -------------------------------------------------------------------------------- 1 | entry_id,comment 2 | 1,good 3 | -------------------------------------------------------------------------------- /data/comments.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "primaryKey": "entry_id", 4 | "foreignKeys": [ 5 | { 6 | "fields": "entry_id", 7 | "reference": { 8 | "resource": "articles", 9 | "fields": "id" 10 | } 11 | } 12 | ], 13 | "fields": [ 14 | { 15 | "name": "entry_id", 16 | "type": "integer", 17 | "constraints": { 18 | "required": true 19 | } 20 | }, 21 | { 22 | "name": "comment", 23 | "type": "string" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/tableschema-sql-py/eee264d1f90c30a8dcf1871b1a55238bb4c855f1/examples/__init__.py -------------------------------------------------------------------------------- /examples/storage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import os 8 | import io 9 | import json 10 | from tabulator import topen 11 | from sqlalchemy import create_engine 12 | from dotenv import load_dotenv; load_dotenv('.env') 13 | 14 | from tableschema_sql import Storage 15 | 16 | 17 | # Get resources 18 | articles_schema = json.load(io.open('data/articles.json', encoding='utf-8')) 19 | comments_schema = json.load(io.open('data/comments.json', encoding='utf-8')) 20 | articles_data = topen('data/articles.csv', with_headers=True).read() 21 | comments_data = topen('data/comments.csv', with_headers=True).read() 22 | 23 | # Engine 24 | engine = create_engine(os.environ['POSTGRES_URL']) 25 | 26 | # Storage 27 | storage = Storage(engine=engine, prefix='prefix_') 28 | 29 | # Delete tables 30 | for table in reversed(storage.tables): 31 | storage.delete(table) 32 | 33 | # Create tables 34 | storage.create(['articles', 'comments'], [articles_schema, comments_schema]) 35 | 36 | # Write data to tables 37 | storage.write('articles', articles_data) 38 | storage.write('comments', comments_data) 39 | 40 | # List tables 41 | print(storage.tables) 42 | 43 | # Describe tables 44 | print(storage.describe('articles')) 45 | print(storage.describe('comments')) 46 | 47 | # Read data from tables 48 | print(list(storage.read('articles'))) 49 | print(list(storage.read('comments'))) 50 | -------------------------------------------------------------------------------- /pylama.ini: -------------------------------------------------------------------------------- 1 | [pylama] 2 | linters = pyflakes,mccabe,pep8 3 | ignore = E731,E128,C901 4 | 5 | [pylama:pep8] 6 | max_line_length = 100 7 | 8 | [pylama:mccabe] 9 | complexity = 24 10 | 11 | [pylama:*/__init__.py] 12 | ignore = W0611 13 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths = tests 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import absolute_import 5 | 6 | import os 7 | import io 8 | from setuptools import setup, find_packages 9 | 10 | 11 | # Helpers 12 | def read(*paths): 13 | """Read a text file.""" 14 | basedir = os.path.dirname(__file__) 15 | fullpath = os.path.join(basedir, *paths) 16 | contents = io.open(fullpath, encoding='utf-8').read().strip() 17 | return contents 18 | 19 | 20 | # Prepare 21 | PACKAGE = 'tableschema_sql' 22 | NAME = PACKAGE.replace('_', '-') 23 | INSTALL_REQUIRES = [ 24 | 'six>=1.9', 25 | 'sqlalchemy>=1.4,<3', 26 | 'pybloom_live>=2.2', 27 | 'tabulator>=1.1', 28 | 'tableschema>=1.0', 29 | 'cryptography' 30 | ] 31 | TESTS_REQUIRE = [ 32 | 'coverage', 33 | 'mock', 34 | 'pylama', 35 | 'pytest', 36 | 'pytest-cov', 37 | 'psycopg2', 38 | 'pymysql', 39 | 'python-dotenv', 40 | ] 41 | 42 | README = read('README.md') 43 | VERSION = read(PACKAGE, 'VERSION') 44 | PACKAGES = find_packages(exclude=['examples', 'tests']) 45 | 46 | 47 | # Run 48 | setup( 49 | name=NAME, 50 | version=VERSION, 51 | packages=PACKAGES, 52 | include_package_data=True, 53 | install_requires=INSTALL_REQUIRES, 54 | tests_require=TESTS_REQUIRE, 55 | extras_require={'develop': TESTS_REQUIRE}, 56 | zip_safe=False, 57 | long_description=README, 58 | long_description_content_type='text/markdown', 59 | description='Generate SQL tables, load and extract data, based on JSON Table Schema descriptors.', 60 | author='Open Knowledge Foundation', 61 | author_email='info@okfn.org', 62 | url='https://github.com/frictionlessdata/tableschema-sql-py', 63 | license='MIT', 64 | keywords=[ 65 | 'frictionless data', 66 | ], 67 | classifiers=[ 68 | 'Development Status :: 4 - Beta', 69 | 'Environment :: Web Environment', 70 | 'Intended Audience :: Developers', 71 | 'License :: OSI Approved :: MIT License', 72 | 'Operating System :: OS Independent', 73 | 'Programming Language :: Python :: 2', 74 | 'Programming Language :: Python :: 2.7', 75 | 'Programming Language :: Python :: 3', 76 | 'Programming Language :: Python :: 3.3', 77 | 'Programming Language :: Python :: 3.4', 78 | 'Programming Language :: Python :: 3.5', 79 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 80 | 'Topic :: Software Development :: Libraries :: Python Modules' 81 | ], 82 | ) 83 | -------------------------------------------------------------------------------- /tableschema_sql/VERSION: -------------------------------------------------------------------------------- 1 | 2.0.1 2 | -------------------------------------------------------------------------------- /tableschema_sql/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pylama:skip=1 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import absolute_import 6 | from __future__ import unicode_literals 7 | 8 | 9 | # Module API 10 | 11 | from .storage import Storage 12 | 13 | 14 | # Version 15 | 16 | import io 17 | import os 18 | __version__ = io.open( 19 | os.path.join(os.path.dirname(__file__), 'VERSION'), 20 | encoding='utf-8').read().strip() 21 | -------------------------------------------------------------------------------- /tableschema_sql/mapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import json 8 | 9 | import six 10 | import tableschema 11 | import sqlalchemy as sa 12 | from sqlalchemy import CheckConstraint as Check 13 | from sqlalchemy.dialects.postgresql import ARRAY, JSON, JSONB, UUID 14 | 15 | 16 | # Module API 17 | 18 | class Mapper(object): 19 | 20 | # Public 21 | 22 | def __init__(self, prefix, dialect='sqlite'): 23 | """Mapper to convert/restore FD entities to/from SQL entities 24 | """ 25 | self.__prefix = prefix 26 | self.__dialect = dialect 27 | 28 | def convert_bucket(self, bucket): 29 | """Convert bucket to SQL 30 | """ 31 | return self.__prefix + bucket 32 | 33 | def convert_descriptor(self, bucket, descriptor, index_fields=[], autoincrement=None): 34 | """Convert descriptor to SQL 35 | """ 36 | 37 | # Prepare 38 | columns = [] 39 | indexes = [] 40 | fallbacks = [] 41 | constraints = [] 42 | column_mapping = {} 43 | table_name = self.convert_bucket(bucket) 44 | comment = _get_comment(descriptor.get('title', ''), descriptor.get('description', '')) 45 | schema = tableschema.Schema(descriptor) 46 | 47 | # Autoincrement 48 | if autoincrement is not None: 49 | columns.append(sa.Column( 50 | autoincrement, sa.Integer, autoincrement=True, nullable=False)) 51 | 52 | # Fields 53 | for field in schema.fields: 54 | column_type = self.convert_type(field.type) 55 | if not column_type: 56 | column_type = sa.Text 57 | fallbacks.append(field.name) 58 | nullable = not field.required 59 | comment = _get_field_comment(field) 60 | unique = field.constraints.get('unique', False) 61 | checks = [] 62 | for name, value in field.constraints.items(): 63 | if name == 'minLength': 64 | checks.append(Check('LENGTH("%s") >= %s' % (field.name, value))) 65 | elif name == 'maxLength': 66 | checks.append(Check('LENGTH("%s") <= %s' % (field.name, value))) 67 | elif name == 'minimum': 68 | checks.append(Check('"%s" >= %s' % (field.name, value))) 69 | elif name == 'maximum': 70 | checks.append(Check('"%s" <= %s' % (field.name, value))) 71 | elif name == 'pattern': 72 | if self.__dialect in ['postgresql']: 73 | checks.append(Check('"%s" ~ \'%s\'' % (field.name, value))) 74 | else: 75 | checks.append(Check('"%s" REGEXP \'%s\'' % (field.name, value))) 76 | elif name == 'enum': 77 | if self.__dialect in ['sqlite']: 78 | checks.append(Check(sa.text('"%s" in (:values)' % field.name) 79 | .bindparams(sa.bindparam(key="values", value=value, expanding=True)))) 80 | else: 81 | column_type = sa.Enum(*value, name='%s_%s_enum' % (table_name, field.name)) 82 | column = sa.Column(*([field.name, column_type] + checks), 83 | nullable=nullable, comment=comment, unique=unique) 84 | columns.append(column) 85 | column_mapping[field.name] = column 86 | 87 | # Primary key 88 | pk = descriptor.get('primaryKey', None) 89 | if pk is not None: 90 | if isinstance(pk, six.string_types): 91 | pk = [pk] 92 | if autoincrement is not None: 93 | if pk is not None: 94 | pk = [autoincrement] + pk 95 | else: 96 | pk = [autoincrement] 97 | if pk is not None: 98 | constraint = sa.PrimaryKeyConstraint(*pk) 99 | constraints.append(constraint) 100 | 101 | # Foreign keys 102 | if self.__dialect in ['postgresql', 'sqlite']: 103 | fks = descriptor.get('foreignKeys', []) 104 | for fk in fks: 105 | fields = fk['fields'] 106 | resource = fk['reference']['resource'] 107 | foreign_fields = fk['reference']['fields'] 108 | if isinstance(fields, six.string_types): 109 | fields = [fields] 110 | if resource != '': 111 | table_name = self.convert_bucket(resource) 112 | if isinstance(foreign_fields, six.string_types): 113 | foreign_fields = [foreign_fields] 114 | composer = lambda field: '.'.join([table_name, field]) 115 | foreign_fields = list(map(composer, foreign_fields)) 116 | constraint = sa.ForeignKeyConstraint(fields, foreign_fields) 117 | constraints.append(constraint) 118 | 119 | # Indexes 120 | if self.__dialect in ('postgresql', 'sqlite'): 121 | for index, index_definition in enumerate(index_fields): 122 | name = table_name + '_ix%03d' % index 123 | index_columns = [column_mapping[field] for field in index_definition] 124 | indexes.append(sa.Index(name, *index_columns)) 125 | 126 | return columns, constraints, indexes, fallbacks, comment 127 | 128 | def convert_row(self, keyed_row, schema, fallbacks): 129 | """Convert row to SQL 130 | """ 131 | for key, value in list(keyed_row.items()): 132 | field = schema.get_field(key) 133 | if not field: 134 | del keyed_row[key] 135 | if key in fallbacks: 136 | value = _uncast_value(value, field=field) 137 | else: 138 | value = field.cast_value(value) 139 | keyed_row[key] = value 140 | return keyed_row 141 | 142 | def convert_type(self, type): 143 | """Convert type to SQL 144 | """ 145 | 146 | # Default dialect 147 | mapping = { 148 | 'any': sa.Text, 149 | 'array': None, 150 | 'boolean': sa.Boolean, 151 | 'date': sa.Date, 152 | 'datetime': sa.DateTime, 153 | 'duration': None, 154 | 'geojson': None, 155 | 'geopoint': None, 156 | 'integer': sa.Integer, 157 | 'number': sa.Float, 158 | 'object': None, 159 | 'string': sa.Text, 160 | 'time': sa.Time, 161 | 'year': sa.Integer, 162 | 'yearmonth': None, 163 | } 164 | 165 | # Postgresql dialect 166 | if self.__dialect == 'postgresql': 167 | mapping.update({ 168 | 'array': JSONB, 169 | 'geojson': JSONB, 170 | 'number': sa.Numeric, 171 | 'object': JSONB, 172 | }) 173 | 174 | # Not supported type 175 | if type not in mapping: 176 | message = 'Field type "%s" is not supported' 177 | raise tableschema.exceptions.StorageError(message % type) 178 | 179 | return mapping[type] 180 | 181 | def restore_bucket(self, table_name): 182 | """Restore bucket from SQL 183 | """ 184 | if table_name.startswith(self.__prefix): 185 | return table_name.replace(self.__prefix, '', 1) 186 | return None 187 | 188 | def restore_descriptor(self, table_name, columns, constraints, autoincrement=None): 189 | """Restore descriptor from SQL 190 | """ 191 | 192 | # Fields 193 | fields = [] 194 | for column in columns: 195 | if column.name == autoincrement: 196 | continue 197 | field_type = self.restore_type(column.type) 198 | field = {'name': column.name, 'type': field_type} 199 | if not column.nullable: 200 | field['constraints'] = {'required': True} 201 | fields.append(field) 202 | 203 | # Primary key 204 | pk = [] 205 | for constraint in constraints: 206 | if isinstance(constraint, sa.PrimaryKeyConstraint): 207 | for column in constraint.columns: 208 | if column.name == autoincrement: 209 | continue 210 | pk.append(column.name) 211 | 212 | # Foreign keys 213 | fks = [] 214 | if self.__dialect in ['postgresql', 'sqlite']: 215 | for constraint in constraints: 216 | if isinstance(constraint, sa.ForeignKeyConstraint): 217 | resource = '' 218 | own_fields = [] 219 | foreign_fields = [] 220 | for element in constraint.elements: 221 | own_fields.append(element.parent.name) 222 | if element.column.table.name != table_name: 223 | resource = self.restore_bucket(element.column.table.name) 224 | foreign_fields.append(element.column.name) 225 | if len(own_fields) == len(foreign_fields) == 1: 226 | own_fields = own_fields.pop() 227 | foreign_fields = foreign_fields.pop() 228 | fks.append({ 229 | 'fields': own_fields, 230 | 'reference': {'resource': resource, 'fields': foreign_fields}, 231 | }) 232 | 233 | # Desscriptor 234 | descriptor = {} 235 | descriptor['fields'] = fields 236 | if len(pk) > 0: 237 | if len(pk) == 1: 238 | pk = pk.pop() 239 | descriptor['primaryKey'] = pk 240 | if len(fks) > 0: 241 | descriptor['foreignKeys'] = fks 242 | 243 | return descriptor 244 | 245 | def restore_row(self, row, schema, autoincrement): 246 | """Restore row from SQL 247 | """ 248 | row = list(row) 249 | for index, field in enumerate(schema.fields, start=1 if autoincrement else 0): 250 | if self.__dialect == 'postgresql': 251 | if field.type in ['array', 'object']: 252 | continue 253 | row[index] = field.cast_value(row[index]) 254 | return row 255 | 256 | def restore_type(self, type): 257 | """Restore type from SQL 258 | """ 259 | 260 | # All dialects 261 | mapping = { 262 | ARRAY: 'array', 263 | sa.Boolean: 'boolean', 264 | sa.Date: 'date', 265 | sa.DateTime: 'datetime', 266 | sa.Float: 'number', 267 | sa.Integer: 'integer', 268 | JSONB: 'object', 269 | JSON: 'object', 270 | sa.Numeric: 'number', 271 | sa.Text: 'string', 272 | sa.Time: 'time', 273 | sa.VARCHAR: 'string', 274 | UUID: 'string', 275 | } 276 | 277 | # Get field type 278 | field_type = None 279 | for key, value in mapping.items(): 280 | if isinstance(type, key): 281 | field_type = value 282 | 283 | # Not supported 284 | if field_type is None: 285 | message = 'Type "%s" is not supported' 286 | raise tableschema.exceptions.StorageError(message % type) 287 | 288 | return field_type 289 | 290 | 291 | # Internal 292 | 293 | def _uncast_value(value, field): 294 | # Eventially should be moved to: 295 | # https://github.com/frictionlessdata/tableschema-py/issues/161 296 | if isinstance(value, (list, dict)): 297 | value = json.dumps(value) 298 | else: 299 | value = str(value) 300 | return value 301 | 302 | 303 | def _get_field_comment(field, separator=' - '): 304 | """ 305 | Create SQL comment from field's title and description 306 | 307 | :param field: tableschema-py Field, with optional 'title' and 'description' values 308 | :param separator: 309 | :return: 310 | 311 | >>> _get_field_comment(tableschema.Field({'title': 'my_title', 'description': 'my_desc'})) 312 | 'my_title - my_desc' 313 | >>> _get_field_comment(tableschema.Field({'title': 'my_title', 'description': None})) 314 | 'my_title' 315 | >>> _get_field_comment(tableschema.Field({'title': '', 'description': 'my_description'})) 316 | 'my_description' 317 | >>> _get_field_comment(tableschema.Field({})) 318 | '' 319 | """ 320 | title = field.descriptor.get('title') or '' 321 | description = field.descriptor.get('description') or '' 322 | return _get_comment(description, title, separator) 323 | 324 | 325 | def _get_comment(description, title, separator=' - '): 326 | if title == '': 327 | return description 328 | if description == '': 329 | return title 330 | return title + separator + description 331 | -------------------------------------------------------------------------------- /tableschema_sql/storage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import collections 8 | from functools import partial 9 | 10 | import re 11 | import six 12 | import sqlalchemy 13 | import tableschema 14 | from sqlalchemy import Table, MetaData 15 | 16 | from .mapper import Mapper 17 | from .writer import Writer 18 | 19 | 20 | # Module API 21 | 22 | class Storage(tableschema.Storage): 23 | """SQL storage 24 | 25 | Package implements 26 | [Tabular Storage](https://github.com/frictionlessdata/tableschema-py#storage) 27 | interface (see full documentation on the link): 28 | 29 | ![Storage](https://i.imgur.com/RQgrxqp.png) 30 | 31 | > Only additional API is documented 32 | 33 | # Arguments 34 | engine (object): `sqlalchemy` engine 35 | dbschema (str): name of database schema 36 | prefix (str): prefix for all buckets 37 | reflect_only (callable): 38 | a boolean predicate to filter the list of table names when reflecting 39 | autoincrement (str/dict): 40 | add autoincrement column at the beginning. 41 | - if a string it's an autoincrement column name 42 | - if a dict it's an autoincrements mapping with column 43 | names indexed by bucket names, for example, 44 | `{'bucket1'\\: 'id', 'bucket2'\\: 'other_id}` 45 | 46 | """ 47 | 48 | # Public 49 | 50 | def __init__(self, engine, dbschema=None, prefix='', reflect_only=None, autoincrement=None): 51 | 52 | # Set attributes 53 | self.__engine = engine 54 | self.__dbschema = dbschema 55 | self.__prefix = prefix 56 | self.__descriptors = {} 57 | self.__fallbacks = {} 58 | self.__autoincrement = autoincrement 59 | self.__only = reflect_only or (lambda _: True) 60 | self.__dialect = engine.dialect.name 61 | 62 | # Added regex support to sqlite 63 | if self.__dialect == 'sqlite': 64 | def regexp(expr, item): 65 | reg = re.compile(expr) 66 | return reg.search(item) is not None 67 | # It will fail silently if this function already exists 68 | with self.__engine.connect() as __connection: 69 | __connection.connection.create_function('REGEXP', 2, regexp) 70 | 71 | # Create mapper 72 | self.__mapper = Mapper(prefix=prefix, dialect=self.__dialect) 73 | 74 | # Create metadata and reflect 75 | self.__metadata = MetaData(schema=self.__dbschema) 76 | self.__reflect() 77 | 78 | def __repr__(self): 79 | 80 | # Template and format 81 | template = 'Storage <{engine}/{dbschema}>' 82 | text = template.format( 83 | engine=self.__engine, 84 | dbschema=self.__dbschema) 85 | 86 | return text 87 | 88 | @property 89 | def buckets(self): 90 | buckets = [] 91 | for table in self.__metadata.sorted_tables: 92 | bucket = self.__mapper.restore_bucket(table.name) 93 | if bucket is not None: 94 | buckets.append(bucket) 95 | return buckets 96 | 97 | def create(self, bucket, descriptor, force=False, indexes_fields=None): 98 | """Create bucket 99 | 100 | # Arguments 101 | indexes_fields (str[]): 102 | list of tuples containing field names, or list of such lists 103 | 104 | """ 105 | 106 | # Make lists 107 | buckets = bucket 108 | if isinstance(bucket, six.string_types): 109 | buckets = [bucket] 110 | descriptors = descriptor 111 | if isinstance(descriptor, dict): 112 | descriptors = [descriptor] 113 | if indexes_fields is None or len(indexes_fields) == 0: 114 | indexes_fields = [()] * len(descriptors) 115 | elif type(indexes_fields[0][0]) not in {list, tuple}: 116 | indexes_fields = [indexes_fields] 117 | 118 | # Check dimensions 119 | if not (len(buckets) == len(descriptors) == len(indexes_fields)): 120 | raise tableschema.exceptions.StorageError('Wrong argument dimensions') 121 | 122 | # Check buckets for existence 123 | for bucket in reversed(self.buckets): 124 | if bucket in buckets: 125 | if not force: 126 | message = 'Bucket "%s" already exists.' % bucket 127 | raise tableschema.exceptions.StorageError(message) 128 | self.delete(bucket) 129 | 130 | # Define buckets 131 | for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields): 132 | tableschema.validate(descriptor) 133 | table_name = self.__mapper.convert_bucket(bucket) 134 | autoincrement = self.__get_autoincrement_for_bucket(bucket) 135 | columns, constraints, indexes, fallbacks, table_comment = self.__mapper \ 136 | .convert_descriptor(bucket, descriptor, index_fields, autoincrement) 137 | Table(table_name, self.__metadata, *(columns + constraints + indexes), 138 | comment=table_comment) 139 | self.__descriptors[bucket] = descriptor 140 | self.__fallbacks[bucket] = fallbacks 141 | 142 | # Create tables, update metadata 143 | try: 144 | self.__metadata.create_all(bind=self.__engine) 145 | except sqlalchemy.exc.ProgrammingError as exception: 146 | if 'there is no unique constraint matching given keys' in str(exception): 147 | message = 'Foreign keys can only reference primary key or unique fields\n%s' 148 | six.raise_from( 149 | tableschema.exceptions.ValidationError(message % str(exception)), 150 | None) 151 | 152 | def delete(self, bucket=None, ignore=False): 153 | 154 | # Make lists 155 | buckets = bucket 156 | if isinstance(bucket, six.string_types): 157 | buckets = [bucket] 158 | elif bucket is None: 159 | buckets = reversed(self.buckets) 160 | 161 | # Iterate 162 | tables = [] 163 | for bucket in buckets: 164 | 165 | # Check existent 166 | if bucket not in self.buckets: 167 | if not ignore: 168 | message = 'Bucket "%s" doesn\'t exist.' % bucket 169 | raise tableschema.exceptions.StorageError(message) 170 | return 171 | 172 | # Remove from buckets 173 | if bucket in self.__descriptors: 174 | del self.__descriptors[bucket] 175 | 176 | # Add table to tables 177 | table = self.__get_table(bucket) 178 | tables.append(table) 179 | 180 | # Drop tables, update metadata 181 | self.__metadata.drop_all(tables=tables, bind=self.__engine) 182 | self.__metadata.clear() 183 | self.__reflect() 184 | 185 | def describe(self, bucket, descriptor=None): 186 | 187 | # Set descriptor 188 | if descriptor is not None: 189 | self.__descriptors[bucket] = descriptor 190 | 191 | # Get descriptor 192 | else: 193 | descriptor = self.__descriptors.get(bucket) 194 | if descriptor is None: 195 | table = self.__get_table(bucket) 196 | autoincrement = self.__get_autoincrement_for_bucket(bucket) 197 | descriptor = self.__mapper.restore_descriptor( 198 | table.name, table.columns, table.constraints, autoincrement) 199 | 200 | return descriptor 201 | 202 | def iter(self, bucket): 203 | 204 | # Get table and fallbacks 205 | table = self.__get_table(bucket) 206 | schema = tableschema.Schema(self.describe(bucket)) 207 | autoincrement = self.__get_autoincrement_for_bucket(bucket) 208 | 209 | # Streaming could be not working for some backends: 210 | # http://docs.sqlalchemy.org/en/latest/core/connections.html 211 | select = table.select().execution_options(stream_results=True) 212 | with self.__engine.connect() as connection: 213 | result = connection.execute(select) 214 | for row in result: 215 | row = self.__mapper.restore_row( 216 | row, schema=schema, autoincrement=autoincrement) 217 | yield row 218 | 219 | def read(self, bucket): 220 | rows = list(self.iter(bucket)) 221 | return rows 222 | 223 | def write(self, bucket, rows, keyed=False, as_generator=False, update_keys=None, 224 | buffer_size=1000, use_bloom_filter=True): 225 | """Write to bucket 226 | 227 | # Arguments 228 | keyed (bool): 229 | accept keyed rows 230 | as_generator (bool): 231 | returns generator to provide writing control to the client 232 | update_keys (str[]): 233 | update instead of inserting if key values match existent rows 234 | buffer_size (int=1000): 235 | maximum number of rows to try and write to the db in one batch 236 | use_bloom_filter (bool=True): 237 | should we use a bloom filter to optimize DB update performance 238 | (in exchange for some setup time) 239 | 240 | """ 241 | 242 | # Check update keys 243 | if update_keys is not None and len(update_keys) == 0: 244 | message = 'Argument "update_keys" cannot be an empty list' 245 | raise tableschema.exceptions.StorageError(message) 246 | 247 | # Get table and description 248 | table = self.__get_table(bucket) 249 | schema = tableschema.Schema(self.describe(bucket)) 250 | fallbacks = self.__fallbacks.get(bucket, []) 251 | 252 | # Write rows to table 253 | convert_row = partial(self.__mapper.convert_row, schema=schema, fallbacks=fallbacks) 254 | autoincrement = self.__get_autoincrement_for_bucket(bucket) 255 | writer = Writer(self.__engine, table, schema, 256 | # Only PostgreSQL supports "returning" so we don't use autoincrement for all 257 | autoincrement=autoincrement if self.__dialect in ['postgresql'] else None, 258 | update_keys=update_keys, 259 | convert_row=convert_row, 260 | buffer_size=buffer_size, 261 | use_bloom_filter=use_bloom_filter) 262 | gen = writer.write(rows, keyed=keyed) 263 | if as_generator: 264 | return gen 265 | collections.deque(gen, maxlen=0) 266 | 267 | # Private 268 | 269 | def __get_table(self, bucket): 270 | table_name = self.__mapper.convert_bucket(bucket) 271 | if self.__dbschema: 272 | table_name = '.'.join((self.__dbschema, table_name)) 273 | return self.__metadata.tables[table_name] 274 | 275 | def __reflect(self): 276 | def only(name, _): 277 | return self.__only(name) and self.__mapper.restore_bucket(name) is not None 278 | self.__metadata.reflect(only=only, bind=self.__engine) 279 | 280 | def __get_autoincrement_for_bucket(self, bucket): 281 | if isinstance(self.__autoincrement, dict): 282 | return self.__autoincrement.get(bucket) 283 | return self.__autoincrement 284 | -------------------------------------------------------------------------------- /tableschema_sql/writer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import pybloom_live 8 | from collections import namedtuple 9 | WrittenRow = namedtuple('WrittenRow', ['row', 'updated', 'updated_id']) 10 | 11 | 12 | # Module API 13 | 14 | class Writer(object): 15 | 16 | # Public 17 | 18 | def __init__(self, engine, table, schema, update_keys, 19 | autoincrement, convert_row, buffer_size, 20 | use_bloom_filter): 21 | """Writer to insert/update rows into table 22 | """ 23 | self.__engine = engine 24 | self.__table = table 25 | self.__schema = schema 26 | self.__update_keys = update_keys 27 | self.__autoincrement = autoincrement 28 | self.__convert_row = convert_row 29 | self.__buffer = [] 30 | self.__buffer_size = buffer_size 31 | self.__use_bloom_filter = use_bloom_filter 32 | if update_keys is not None and use_bloom_filter: 33 | with self.__engine.connect() as connection: 34 | self.__prepare_bloom(connection) 35 | 36 | def write(self, rows, keyed=False): 37 | """Write rows/keyed_rows to table 38 | """ 39 | with self.__engine.connect() as connection: 40 | with connection.begin(): 41 | for row in rows: 42 | keyed_row = row 43 | if not keyed: 44 | keyed_row = dict(zip(self.__schema.field_names, row)) 45 | keyed_row = self.__convert_row(keyed_row) 46 | if self.__check_existing(keyed_row): 47 | for wr in self.__insert(connection): 48 | yield wr 49 | ret = self.__update(connection, keyed_row) 50 | if ret is not None: 51 | yield WrittenRow(keyed_row, True, ret if self.__autoincrement else None) 52 | continue 53 | self.__buffer.append(keyed_row) 54 | if len(self.__buffer) > self.__buffer_size: 55 | for wr in self.__insert(connection): 56 | yield wr 57 | for wr in self.__insert(connection): 58 | yield wr 59 | 60 | # Private 61 | 62 | def __prepare_bloom(self, connection): 63 | """Prepare bloom for existing checks 64 | """ 65 | self.__bloom = pybloom_live.ScalableBloomFilter() 66 | columns = [getattr(self.__table.c, key) for key in self.__update_keys] 67 | keys = connection.execute(self.__table.select().with_only_columns(*columns).execution_options(stream_results=True)) 68 | for key in keys: 69 | self.__bloom.add(tuple(key)) 70 | 71 | def __insert(self, connection): 72 | """Insert rows to table 73 | """ 74 | if len(self.__buffer) > 0: 75 | # Insert data 76 | statement = self.__table.insert() 77 | if self.__autoincrement: 78 | statement = statement.returning( 79 | getattr(self.__table.c, self.__autoincrement)) 80 | statement = statement.values(self.__buffer) 81 | res = connection.execute(statement) 82 | for id, in res: 83 | row = self.__buffer.pop(0) 84 | yield WrittenRow(row, False, id) 85 | else: 86 | connection.execute(statement, self.__buffer) 87 | for row in self.__buffer: 88 | yield WrittenRow(row, False, None) 89 | # Clean memory 90 | self.__buffer = [] 91 | 92 | def __update(self, connection, row): 93 | """Update rows in table 94 | """ 95 | expr = self.__table.update().values(row) 96 | for key in self.__update_keys: 97 | expr = expr.where(getattr(self.__table.c, key) == row[key]) 98 | if self.__autoincrement: 99 | expr = expr.returning(getattr(self.__table.c, self.__autoincrement)) 100 | res = connection.execute(expr) 101 | if res.rowcount > 0: 102 | if self.__autoincrement: 103 | first = next(iter(res)) 104 | last_row_id = first[0] 105 | return last_row_id 106 | return 0 107 | return None 108 | 109 | def __check_existing(self, row): 110 | """Check if row exists in table 111 | """ 112 | if self.__update_keys is not None: 113 | if self.__use_bloom_filter: 114 | key = tuple(row[key] for key in self.__update_keys) 115 | if key in self.__bloom: 116 | return True 117 | self.__bloom.add(key) 118 | return False 119 | else: 120 | return True 121 | return False 122 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/frictionlessdata/tableschema-sql-py/eee264d1f90c30a8dcf1871b1a55238bb4c855f1/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_mapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import pytest 8 | import tableschema 9 | from mock import Mock 10 | from tableschema_sql.mapper import Mapper 11 | 12 | 13 | # Tests 14 | 15 | def test_mapper_convert_bucket(): 16 | mapper = Mapper('prefix_') 17 | assert mapper.convert_bucket('bucket') == 'prefix_bucket' 18 | 19 | 20 | def test_mapper_restore_bucket(): 21 | mapper = Mapper('prefix_') 22 | assert mapper.restore_bucket('prefix_bucket') == 'bucket' 23 | assert mapper.restore_bucket('xxxxxx_bucket') is None 24 | -------------------------------------------------------------------------------- /tests/test_storage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import absolute_import 5 | from __future__ import unicode_literals 6 | 7 | import os 8 | import io 9 | import json 10 | import pytest 11 | import tableschema 12 | import sqlalchemy as sa 13 | from copy import deepcopy 14 | from tabulator import Stream 15 | from sqlalchemy import create_engine, text 16 | from sqlalchemy.engine import reflection 17 | from tableschema_sql import Storage 18 | from dotenv import load_dotenv; load_dotenv('.env') 19 | 20 | 21 | # Resources 22 | 23 | ARTICLES = { 24 | 'schema': { 25 | 'fields': [ 26 | {'name': 'id', 'type': 'integer', 'constraints': {'required': True}}, 27 | {'name': 'parent', 'type': 'integer'}, 28 | {'name': 'name', 'type': 'string'}, 29 | {'name': 'current', 'type': 'boolean'}, 30 | {'name': 'rating', 'type': 'number'}, 31 | ], 32 | 'primaryKey': 'id', 33 | 'foreignKeys': [ 34 | {'fields': 'parent', 'reference': {'resource': '', 'fields': 'id'}}, 35 | ], 36 | }, 37 | 'data': [ 38 | ['1', '', 'Taxes', 'True', '9.5'], 39 | ['2', '1', '中国人', 'False', '7'], 40 | ], 41 | } 42 | COMMENTS = { 43 | 'schema': { 44 | 'fields': [ 45 | {'name': 'entry_id', 'type': 'integer', 'constraints': {'required': True}}, 46 | {'name': 'comment', 'type': 'string'}, 47 | {'name': 'note', 'type': 'any'}, 48 | ], 49 | 'primaryKey': 'entry_id', 50 | 'foreignKeys': [ 51 | {'fields': 'entry_id', 'reference': {'resource': 'articles', 'fields': 'id'}}, 52 | ], 53 | }, 54 | 'data': [ 55 | ['1', 'good', 'note1'], 56 | ['2', 'bad', 'note2'], 57 | ], 58 | } 59 | TEMPORAL = { 60 | 'schema': { 61 | 'fields': [ 62 | {'name': 'date', 'type': 'date'}, 63 | {'name': 'date_year', 'type': 'date', 'format': '%Y'}, 64 | {'name': 'datetime', 'type': 'datetime'}, 65 | {'name': 'duration', 'type': 'duration'}, 66 | {'name': 'time', 'type': 'time'}, 67 | {'name': 'year', 'type': 'year'}, 68 | {'name': 'yearmonth', 'type': 'yearmonth'}, 69 | ], 70 | }, 71 | 'data': [ 72 | ['2015-01-01', '2015', '2015-01-01T03:00:00Z', 'P1Y1M', '03:00:00', '2015', '2015-01'], 73 | ['2015-12-31', '2015', '2015-12-31T15:45:33Z', 'P2Y2M', '15:45:33', '2015', '2015-01'], 74 | ], 75 | } 76 | LOCATION = { 77 | 'schema': { 78 | 'fields': [ 79 | {'name': 'location', 'type': 'geojson'}, 80 | {'name': 'geopoint', 'type': 'geopoint'}, 81 | ], 82 | }, 83 | 'data': [ 84 | ['{"type": "Point","coordinates":[33.33,33.33]}', '30,75'], 85 | ['{"type": "Point","coordinates":[50.00,50.00]}', '90,45'], 86 | ], 87 | } 88 | COMPOUND = { 89 | 'schema': { 90 | 'fields': [ 91 | {'name': 'stats', 'type': 'object'}, 92 | {'name': 'persons', 'type': 'array'}, 93 | ], 94 | }, 95 | 'data': [ 96 | ['{"chars":560}', '["Mike", "John"]'], 97 | ['{"chars":970}', '["Paul", "Alex"]'], 98 | ], 99 | } 100 | 101 | 102 | # Tests 103 | 104 | @pytest.mark.parametrize('dialect, database_url', [ 105 | ('postgresql', os.environ['POSTGRES_URL']), 106 | ('sqlite', os.environ['SQLITE_URL']), 107 | ]) 108 | def test_storage_flow(dialect, database_url): 109 | 110 | # Create storage 111 | engine = create_engine(database_url) 112 | storage = Storage(engine=engine, prefix='test_storage_') 113 | 114 | # Delete buckets 115 | storage.delete() 116 | 117 | # Create buckets 118 | storage.create( 119 | ['articles', 'comments'], 120 | [ARTICLES['schema'], COMMENTS['schema']], 121 | indexes_fields=[[['rating'], ['name']], []]) 122 | storage.create('comments', COMMENTS['schema'], force=True) 123 | storage.create('temporal', TEMPORAL['schema']) 124 | storage.create('location', LOCATION['schema']) 125 | storage.create('compound', COMPOUND['schema']) 126 | 127 | # Write data 128 | storage.write('articles', ARTICLES['data']) 129 | storage.write('comments', COMMENTS['data']) 130 | storage.write('temporal', TEMPORAL['data']) 131 | storage.write('location', LOCATION['data']) 132 | storage.write('compound', COMPOUND['data']) 133 | 134 | # Create new storage to use reflection only 135 | storage = Storage(engine=engine, prefix='test_storage_') 136 | 137 | # Create existent bucket 138 | with pytest.raises(tableschema.exceptions.StorageError): 139 | storage.create('articles', ARTICLES['schema']) 140 | 141 | # Assert buckets 142 | assert storage.buckets == ['articles', 'compound', 'location', 'temporal', 'comments'] 143 | 144 | # Assert schemas 145 | assert storage.describe('articles') == ARTICLES['schema'] 146 | assert storage.describe('comments') == { 147 | 'fields': [ 148 | {'name': 'entry_id', 'type': 'integer', 'constraints': {'required': True}}, 149 | {'name': 'comment', 'type': 'string'}, 150 | {'name': 'note', 'type': 'string'}, # type downgrade 151 | ], 152 | 'primaryKey': 'entry_id', 153 | 'foreignKeys': [ 154 | {'fields': 'entry_id', 'reference': {'resource': 'articles', 'fields': 'id'}}, 155 | ], 156 | } 157 | assert storage.describe('temporal') == { 158 | 'fields': [ 159 | {'name': 'date', 'type': 'date'}, 160 | {'name': 'date_year', 'type': 'date'}, # format removal 161 | {'name': 'datetime', 'type': 'datetime'}, 162 | {'name': 'duration', 'type': 'string'}, # type fallback 163 | {'name': 'time', 'type': 'time'}, 164 | {'name': 'year', 'type': 'integer'}, # type downgrade 165 | {'name': 'yearmonth', 'type': 'string'}, # type fallback 166 | ], 167 | } 168 | if dialect != 'sqlite': 169 | assert storage.describe('location') == { 170 | 'fields': [ 171 | {'name': 'location', 'type': 'object'}, # type downgrade 172 | {'name': 'geopoint', 'type': 'string'}, # type fallback 173 | ], 174 | } 175 | assert storage.describe('compound') == { 176 | 'fields': [ 177 | {'name': 'stats', 'type': 'object'}, 178 | {'name': 'persons', 'type': 'object'}, # type downgrade 179 | ], 180 | } 181 | 182 | # Assert data 183 | assert storage.read('articles') == cast(ARTICLES)['data'] 184 | assert storage.read('comments') == cast(COMMENTS)['data'] 185 | assert storage.read('temporal') == cast(TEMPORAL, skip=['duration', 'yearmonth'])['data'] 186 | if dialect != 'sqlite': 187 | assert storage.read('location') == cast(LOCATION, skip=['geopoint'])['data'] 188 | assert storage.read('compound') == cast(COMPOUND)['data'] 189 | 190 | # Assert data with forced schema 191 | storage.describe('compound', COMPOUND['schema']) 192 | assert storage.read('compound') == cast(COMPOUND)['data'] 193 | 194 | # Delete non existent bucket 195 | with pytest.raises(tableschema.exceptions.StorageError): 196 | storage.delete('non_existent') 197 | 198 | # Delete buckets 199 | storage.delete() 200 | 201 | 202 | @pytest.mark.parametrize('dialect, database_url', [ 203 | ('mysql', os.environ['MYSQL_URL']), 204 | ('sqlite', os.environ['SQLITE_URL']), 205 | ]) 206 | def test_storage_limited_databases(dialect, database_url): 207 | 208 | # Create storage 209 | engine = create_engine(database_url) 210 | storage = Storage(engine=engine, prefix='test_storage_') 211 | 212 | # Delete buckets 213 | storage.delete() 214 | 215 | # Create buckets 216 | storage.create( 217 | ['articles', 'comments'], 218 | [remove_fk(ARTICLES['schema']), remove_fk(COMMENTS['schema'])], 219 | indexes_fields=[[['rating'], ['name']], []]) 220 | storage.create('comments', remove_fk(COMMENTS['schema']), force=True) 221 | storage.create('temporal', TEMPORAL['schema']) 222 | storage.create('location', LOCATION['schema']) 223 | storage.create('compound', COMPOUND['schema']) 224 | 225 | # Write data 226 | storage.write('articles', ARTICLES['data']) 227 | storage.write('comments', COMMENTS['data']) 228 | storage.write('temporal', TEMPORAL['data']) 229 | storage.write('location', LOCATION['data']) 230 | storage.write('compound', COMPOUND['data']) 231 | 232 | # Create new storage to use reflection only 233 | storage = Storage(engine=engine, prefix='test_storage_') 234 | 235 | # Create existent bucket 236 | with pytest.raises(tableschema.exceptions.StorageError): 237 | storage.create('articles', ARTICLES['schema']) 238 | 239 | # Assert buckets 240 | assert storage.buckets == ['articles', 'comments', 'compound', 'location', 'temporal'] 241 | 242 | # Assert schemas 243 | assert storage.describe('articles') == { 244 | 'fields': [ 245 | {'name': 'id', 'type': 'integer', 'constraints': {'required': True}}, 246 | {'name': 'parent', 'type': 'integer'}, 247 | {'name': 'name', 'type': 'string'}, 248 | {'name': 'current', 'type': 'boolean' if dialect == 'sqlite' else 'integer'}, 249 | {'name': 'rating', 'type': 'number'}, 250 | ], 251 | 'primaryKey': 'id', 252 | # foreignKeys not supported 253 | } 254 | assert storage.describe('comments') == { 255 | 'fields': [ 256 | {'name': 'entry_id', 'type': 'integer', 'constraints': {'required': True}}, 257 | {'name': 'comment', 'type': 'string'}, 258 | {'name': 'note', 'type': 'string'}, # type downgrade 259 | ], 260 | 'primaryKey': 'entry_id', 261 | # foreignKeys not supported 262 | } 263 | assert storage.describe('temporal') == { 264 | 'fields': [ 265 | {'name': 'date', 'type': 'date'}, 266 | {'name': 'date_year', 'type': 'date'}, # format removal 267 | {'name': 'datetime', 'type': 'datetime'}, 268 | {'name': 'duration', 'type': 'string'}, # type fallback 269 | {'name': 'time', 'type': 'time'}, 270 | {'name': 'year', 'type': 'integer'}, # type downgrade 271 | {'name': 'yearmonth', 'type': 'string'}, # type fallback 272 | ], 273 | } 274 | assert storage.describe('location') == { 275 | 'fields': [ 276 | {'name': 'location', 'type': 'string'}, # type fallback 277 | {'name': 'geopoint', 'type': 'string'}, # type fallback 278 | ], 279 | } 280 | assert storage.describe('compound') == { 281 | 'fields': [ 282 | {'name': 'stats', 'type': 'string'}, # type fallback 283 | {'name': 'persons', 'type': 'string'}, # type fallback 284 | ], 285 | } 286 | 287 | # Assert data 288 | assert storage.read('articles') == cast(ARTICLES)['data'] 289 | assert storage.read('comments') == cast(COMMENTS)['data'] 290 | assert storage.read('temporal') == cast(TEMPORAL, skip=['duration', 'yearmonth'])['data'] 291 | assert storage.read('location') == cast(LOCATION, skip=['geojson', 'geopoint'])['data'] 292 | assert storage.read('compound') == cast(COMPOUND, skip=['array', 'object'])['data'] 293 | 294 | # Assert data with forced schema 295 | storage.describe('compound', COMPOUND['schema']) 296 | assert storage.read('compound') == cast(COMPOUND)['data'] 297 | 298 | # Delete non existent bucket 299 | with pytest.raises(tableschema.exceptions.StorageError): 300 | storage.delete('non_existent') 301 | 302 | # Delete buckets 303 | storage.delete() 304 | 305 | 306 | def test_storage_write_generator(): 307 | 308 | # Create storage 309 | engine = create_engine(os.environ['SQLITE_URL']) 310 | storage = Storage(engine=engine, prefix='test_storage_') 311 | 312 | # Create bucket 313 | storage.create('comments', remove_fk(COMMENTS['schema']), force=True) 314 | 315 | # Write data using generator 316 | gen = storage.write('comments', COMMENTS['data'], as_generator=True) 317 | res = list(gen) 318 | 319 | # Assert 320 | assert len(res) == 2 321 | assert storage.read('comments') == cast(COMMENTS)['data'] 322 | 323 | 324 | @pytest.mark.parametrize('use_bloom_filter, buffer_size', [ 325 | (True, 1000), 326 | (False, 1000), 327 | (True, 2), 328 | (False, 1), 329 | ]) 330 | def test_storage_update(use_bloom_filter, buffer_size): 331 | RESOURCE = { 332 | 'schema': { 333 | 'fields': [ 334 | {'name': 'person_id', 'type': 'integer', 'constraints': {'required': True}}, 335 | {'name': 'name', 'type': 'string', 'constraints': {'required': True}}, 336 | {'name': 'favorite_color', 'type': 'string'}, 337 | 338 | ], 339 | 'primaryKey': 'person_id', 340 | }, 341 | 'data': [ 342 | ['1', 'ulysses', 'blue'], 343 | ['2', 'theseus', 'green'], 344 | ['3', 'perseus', 'red'], 345 | ['4', 'dedalus', 'yellow'], 346 | ], 347 | 'updateData': [ 348 | ['5', 'apollo', 'orange'], 349 | ['3', 'perseus', 'magenta'], 350 | ['6', 'zeus', 'grey'], 351 | ['4', 'dedalus', 'sunshine',], 352 | ['5', 'apollo', 'peach'], 353 | ], 354 | } 355 | 356 | # Create storage 357 | update_keys = ['person_id', 'name'] 358 | engine = create_engine(os.environ['POSTGRES_URL']) 359 | storage = Storage(engine=engine, prefix='test_update_', autoincrement='__id') 360 | 361 | # Delete buckets 362 | storage.delete() 363 | 364 | # Create buckets 365 | storage.create('colors', RESOURCE['schema']) 366 | 367 | # Write data to buckets 368 | storage.write('colors', RESOURCE['data'], update_keys=update_keys) 369 | gen = storage.write('colors', RESOURCE['updateData'], 370 | update_keys=update_keys, as_generator=True, 371 | use_bloom_filter=use_bloom_filter, buffer_size=buffer_size) 372 | gen = list(gen) 373 | assert len(gen) == 5 374 | assert len(list(filter(lambda i: i.updated, gen))) == 3 375 | assert list(map(lambda i: i.updated_id, gen)) == [5, 3, 6, 4, 5] 376 | 377 | # Reflect storage 378 | storage = Storage(engine=engine, prefix='test_update_', autoincrement='__id') 379 | gen = storage.write('colors', RESOURCE['updateData'], 380 | update_keys=update_keys, as_generator=True, 381 | use_bloom_filter=use_bloom_filter, buffer_size=buffer_size) 382 | gen = list(gen) 383 | assert len(gen) == 5 384 | assert len(list(filter(lambda i: i.updated, gen))) == 5 385 | assert list(map(lambda i: i.updated_id, gen)) == [5, 3, 6, 4, 5] 386 | 387 | # Create new storage to use reflection only 388 | storage = Storage(engine=engine, prefix='test_update_') 389 | 390 | # Assert data 391 | rows = list(storage.iter('colors')) 392 | assert len(rows) == 6 393 | color_by_person = dict((row[1], row[3]) for row in rows) 394 | assert color_by_person == { 395 | 1: 'blue', 396 | 2: 'green', 397 | 3: 'magenta', 398 | 4: 'sunshine', 399 | 5: 'peach', 400 | 6: 'grey' 401 | } 402 | 403 | # Storage without autoincrement 404 | storage = Storage(engine=engine, prefix='test_update_') 405 | storage.delete() 406 | storage.create('colors', RESOURCE['schema']) 407 | storage.write('colors', RESOURCE['data'], update_keys=update_keys, 408 | use_bloom_filter=use_bloom_filter, buffer_size=buffer_size) 409 | gen = storage.write('colors', RESOURCE['updateData'], 410 | update_keys=update_keys, as_generator=True, 411 | use_bloom_filter=use_bloom_filter, buffer_size=buffer_size) 412 | gen = list(gen) 413 | assert len(gen) == 5 414 | assert len(list(filter(lambda i: i.updated, gen))) == 3 415 | assert list(map(lambda i: i.updated_id, gen)) == [None, None, None, None, None] 416 | 417 | 418 | def test_storage_bad_type(): 419 | RESOURCE = { 420 | 'schema': { 421 | 'fields': [ 422 | {'name': 'bad_field', 'type': 'bad_type'} 423 | ], 424 | }, 425 | 'data': [] 426 | } 427 | 428 | # Create bucket 429 | engine = create_engine(os.environ['POSTGRES_URL']) 430 | storage = Storage(engine=engine, prefix='test_bad_type_') 431 | with pytest.raises(tableschema.exceptions.ValidationError): 432 | storage.create('bad_type', RESOURCE['schema'], force=True) 433 | 434 | 435 | def test_storage_only_parameter(): 436 | RESOURCE = { 437 | 'schema': { 438 | 'fields': [ 439 | {'name': 'person_id', 'type': 'integer', 'constraints': {'required': True}}, 440 | {'name': 'name', 'type': 'string'}, 441 | ], 442 | 'primaryKey': 'person_id', 443 | }, 444 | 'data': [] 445 | } 446 | 447 | # Create storage 448 | engine = create_engine(os.environ['POSTGRES_URL'], echo=True) 449 | storage = Storage(engine=engine, prefix='test_only_') 450 | 451 | # Delete buckets 452 | storage.delete() 453 | 454 | # Create buckets 455 | storage.create('names', RESOURCE['schema'], indexes_fields=[['person_id']]) 456 | 457 | # Recreate storage limiting reflection 458 | only = lambda table: 'name' not in table 459 | engine = create_engine(os.environ['POSTGRES_URL'], echo=True) 460 | storage = Storage(engine=engine, prefix='test_only_', reflect_only=only) 461 | 462 | # Delete non existent bucket 463 | with pytest.raises(tableschema.exceptions.StorageError): 464 | storage.delete('names') 465 | 466 | 467 | def test_storage_bigdata(): 468 | RESOURCE = { 469 | 'schema': { 470 | 'fields': [ 471 | {'name': 'id', 'type': 'integer'} 472 | ] 473 | }, 474 | 'data': [{'id': value} for value in range(0, 2500)] 475 | } 476 | 477 | # Write data 478 | engine = create_engine(os.environ['POSTGRES_URL']) 479 | storage = Storage(engine=engine, prefix='test_storage_bigdata_') 480 | storage.create('bucket', RESOURCE['schema'], force=True) 481 | storage.write('bucket', RESOURCE['data'], keyed=True) 482 | 483 | # Read data 484 | assert list(storage.read('bucket')) == list(map(lambda x: [x['id']], RESOURCE['data'])) 485 | 486 | 487 | def test_storage_bigdata_rollback(): 488 | RESOURCE = { 489 | 'schema': { 490 | 'fields': [ 491 | {'name': 'id', 'type': 'integer'} 492 | ] 493 | }, 494 | 'data': [(value,) for value in range(0, 2500)] + [('bad-value',)] 495 | } 496 | 497 | # Write data 498 | engine = create_engine(os.environ['POSTGRES_URL']) 499 | storage = Storage(engine=engine, prefix='test_storage_bigdata_rollback_') 500 | storage.create('bucket', RESOURCE['schema'], force=True) 501 | try: 502 | storage.write('bucket', RESOURCE['data']) 503 | except Exception: 504 | pass 505 | 506 | # Read data 507 | assert list(storage.read('bucket')) == [] 508 | 509 | 510 | @pytest.mark.parametrize('dialect, database_url', [ 511 | ('postgresql', os.environ['POSTGRES_URL']), 512 | ('sqlite', os.environ['SQLITE_URL']), 513 | ('mysql', os.environ['MYSQL_URL']), 514 | ]) 515 | def test_storage_autoincrement_string(dialect, database_url): 516 | RESOURCE = { 517 | 'schema': {'fields': [{'name': 'name', 'type': 'string'}]}, 518 | 'data': [['london'], ['paris'], ['rome']], 519 | } 520 | 521 | # Write data 522 | engine = create_engine(database_url) 523 | storage = Storage(engine, autoincrement='id', prefix='test_storage_autoincrement_string_') 524 | storage.create(['bucket1', 'bucket2'], [RESOURCE['schema'], RESOURCE['schema']], force=True) 525 | storage.write('bucket1', RESOURCE['data']) 526 | storage.write('bucket2', RESOURCE['data']) 527 | 528 | # Read data 529 | assert list(storage.read('bucket1')) == [ 530 | [1, 'london'], 531 | [2, 'paris'], 532 | [3, 'rome'], 533 | ] 534 | assert list(storage.read('bucket2')) == [ 535 | [1, 'london'], 536 | [2, 'paris'], 537 | [3, 'rome'], 538 | ] 539 | 540 | 541 | @pytest.mark.parametrize('dialect, database_url', [ 542 | ('postgresql', os.environ['POSTGRES_URL']), 543 | ('sqlite', os.environ['SQLITE_URL']), 544 | ('mysql', os.environ['MYSQL_URL']), 545 | ]) 546 | def test_storage_autoincrement_mapping(dialect, database_url): 547 | RESOURCE = { 548 | 'schema': {'fields': [{'name': 'name', 'type': 'string'}]}, 549 | 'data': [['london'], ['paris'], ['rome']], 550 | } 551 | 552 | # Write data 553 | engine = create_engine(database_url) 554 | storage = Storage(engine, autoincrement={'bucket1': 'id'}, prefix='test_storage_autoincrement_mapping_') 555 | storage.create(['bucket1', 'bucket2'], [RESOURCE['schema'], RESOURCE['schema']], force=True) 556 | storage.write('bucket1', RESOURCE['data']) 557 | storage.write('bucket2', RESOURCE['data']) 558 | 559 | # Read data 560 | assert list(storage.read('bucket1')) == [ 561 | [1, 'london'], 562 | [2, 'paris'], 563 | [3, 'rome'], 564 | ] 565 | assert list(storage.read('bucket2')) == [ 566 | ['london'], 567 | ['paris'], 568 | ['rome'], 569 | ] 570 | 571 | 572 | @pytest.mark.parametrize('dialect, database_url', [ 573 | ('postgresql', os.environ['POSTGRES_URL']), 574 | ('sqlite', os.environ['SQLITE_URL']), 575 | # ('mysql', os.environ['MYSQL_URL']), 576 | ]) 577 | def test_storage_constraints(dialect, database_url): 578 | schema = { 579 | 'fields': [ 580 | {'name': 'stringMinLength', 'type': 'string', 'constraints': {'minLength': 5}}, 581 | {'name': 'stringMaxLength', 'type': 'string', 'constraints': {'maxLength': 5}}, 582 | {'name': 'numberMinimum', 'type': 'number', 'constraints': {'minimum': 5}}, 583 | {'name': 'numberMaximum', 'type': 'number', 'constraints': {'maximum': 5}}, 584 | {'name': 'stringPattern', 'type': 'string', 'constraints': {'pattern': '^[a-z]+$'}}, 585 | {'name': 'stringEnum', 'type': 'string', 'constraints': {'enum': ['test']}}, 586 | ] 587 | } 588 | 589 | # Create table 590 | engine = create_engine(database_url) 591 | storage = Storage(engine, prefix='test_storage_constraints_') 592 | storage.create('bucket', schema, force=True) 593 | table_name = 'test_storage_constraints_bucket' 594 | 595 | # Write valid data 596 | storage.write('bucket', [['aaaaa', 'aaaaa', 5, 5, 'test', 'test']]) 597 | 598 | # Write invalid data (stringMinLength) 599 | with pytest.raises(sa.exc.IntegrityError) as excinfo: 600 | pattern = text("INSERT INTO %s VALUES('a', 'aaaaa', 5, 5, 'test', 'test')" % table_name) 601 | engine.connect().execute(pattern) 602 | 603 | # Write invalid data (stringMaxLength) 604 | with pytest.raises(sa.exc.IntegrityError) as excinfo: 605 | pattern = text("INSERT INTO %s VALUES('aaaaa', 'aaaaaaaaa', 5, 5, 'test', 'test')" % table_name) 606 | engine.connect().execute(pattern) 607 | 608 | # Write invalid data (numberMinimum) 609 | with pytest.raises(sa.exc.IntegrityError) as excinfo: 610 | pattern = text("INSERT INTO %s VALUES('aaaaa', 'aaaaa', 1, 5, 'test', 'test')" % table_name) 611 | engine.connect().execute(pattern) 612 | 613 | # Write invalid data (numberMaximum) 614 | with pytest.raises(sa.exc.IntegrityError) as excinfo: 615 | pattern = text("INSERT INTO %s VALUES('aaaaa', 'aaaaa', 5, 9, 'test', 'test')" % table_name) 616 | engine.connect().execute(pattern) 617 | 618 | # Write invalid data (stringPattern) 619 | with pytest.raises(sa.exc.IntegrityError) as excinfo: 620 | pattern = text("INSERT INTO %s VALUES('aaaaa', 'aaaaa', 5, 5, 'bad1', 'test')" % table_name) 621 | engine.connect().execute(pattern) 622 | 623 | # Write invalid data (stringEnum) 624 | with pytest.raises((sa.exc.DataError, sa.exc.IntegrityError)) as excinfo: 625 | pattern = text("INSERT INTO %s VALUES('aaaaa', 'aaaaa', 5, 5, 'test', 'bad')" % table_name) 626 | engine.connect().execute(pattern) 627 | 628 | 629 | @pytest.mark.parametrize('dialect, database_url', [ 630 | ('postgresql', os.environ['POSTGRES_URL']), 631 | ('sqlite', os.environ['SQLITE_URL']), 632 | ]) 633 | def test_indexes_fields(dialect, database_url): 634 | engine = create_engine(database_url) 635 | storage = Storage(engine=engine, prefix='test_indexes_fields_') 636 | storage.delete() 637 | storage.create( 638 | ['articles'], [ARTICLES['schema']], 639 | indexes_fields=[[['rating'], ['name']]] 640 | ) 641 | storage.write('articles', ARTICLES['data']) 642 | inspector = reflection.Inspector.from_engine(engine) 643 | indexes = [index for index in [inspector.get_indexes(table_name) for table_name in inspector.get_table_names()]][0] 644 | assert indexes 645 | 646 | 647 | # Helpers 648 | 649 | def cast(resource, skip=[]): 650 | resource = deepcopy(resource) 651 | schema = tableschema.Schema(resource['schema']) 652 | for row in resource['data']: 653 | for index, field in enumerate(schema.fields): 654 | if field.type not in skip: 655 | row[index] = field.cast_value(row[index]) 656 | return resource 657 | 658 | def remove_fk(schema): 659 | schema = deepcopy(schema) 660 | del schema['foreignKeys'] 661 | return schema 662 | --------------------------------------------------------------------------------