├── test ├── __init__.py ├── sample_data.py └── test_dataset.py ├── docs ├── .gitignore ├── requirements.txt ├── _static │ ├── dataset-logo.png │ └── knight_mozilla_on.jpg ├── _themes │ ├── kr │ │ ├── theme.conf │ │ ├── relations.html │ │ ├── sidebarlogo.html │ │ ├── autotoc.html │ │ ├── layout.html │ │ └── static │ │ │ └── flasky.css_t │ ├── README.md │ ├── LICENSE │ └── flask_theme_support.py ├── install.rst ├── api.rst ├── index.rst ├── quickstart.rst ├── Makefile └── conf.py ├── setup.cfg ├── .bumpversion.cfg ├── .gitignore ├── Makefile ├── README.md ├── dataset ├── chunked.py ├── types.py ├── __init__.py ├── util.py ├── database.py └── table.py ├── .travis.yml ├── LICENSE.txt ├── setup.py └── CHANGELOG.md /test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | datafreeze 2 | -------------------------------------------------------------------------------- /docs/_static/dataset-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patarapolw/dataset/master/docs/_static/dataset-logo.png -------------------------------------------------------------------------------- /docs/_static/knight_mozilla_on.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/patarapolw/dataset/master/docs/_static/knight_mozilla_on.jpg -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [flake8] 5 | ignore = E501,E123,E124,E126,E127,E128,E722,E741 6 | 7 | [bdist_wheel] 8 | universal = 1 9 | -------------------------------------------------------------------------------- /docs/_themes/kr/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = flasky.css 4 | pygments_style = flask_theme_support.FlaskyStyle 5 | 6 | [options] 7 | touch_icon = 8 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 1.1.0 3 | tag_name = {new_version} 4 | commit = True 5 | tag = True 6 | 7 | [bumpversion:file:setup.py] 8 | 9 | [bumpversion:file:dataset/__init__.py] 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | *.egg 4 | dist/* 5 | .tox/* 6 | .vscode/* 7 | build/* 8 | .DS_Store 9 | .watchr 10 | .coverage 11 | htmlcov/ 12 | 13 | *.pyo 14 | env3/* 15 | env/* 16 | Test.yaml 17 | Freezefile.yaml 18 | :memory: 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: clean test dists 3 | 4 | .PHONY: test 5 | test: 6 | nosetests -v 7 | 8 | dists: 9 | python setup.py sdist 10 | python setup.py bdist_wheel 11 | 12 | release: dists 13 | pip install -q twine 14 | twine upload dist/* 15 | 16 | .PHONY: clean 17 | clean: 18 | rm -rf dist build .eggs 19 | find . -name '*.egg-info' -exec rm -fr {} + 20 | find . -name '*.egg' -exec rm -f {} + 21 | find . -name '*.pyc' -exec rm -f {} + 22 | find . -name '*.pyo' -exec rm -f {} + 23 | -------------------------------------------------------------------------------- /docs/_themes/kr/relations.html: -------------------------------------------------------------------------------- 1 |

Related Topics

2 | 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | dataset: databases for lazy people 2 | ================================== 3 | 4 | [![Build Status](https://api.travis-ci.org/pudo/dataset.png)](https://travis-ci.org/pudo/dataset) 5 | 6 | In short, **dataset** makes reading and writing data in databases as simple as reading and writing JSON files. 7 | 8 | [Read the docs](https://dataset.readthedocs.io/) 9 | 10 | To install dataset, fetch it with ``pip``: 11 | 12 | ```bash 13 | $ pip install dataset 14 | ``` 15 | 16 | **Note:** as of version 1.0, **dataset** is split into two packages, with the 17 | data export features now extracted into a stand-alone package, **datafreeze**. 18 | See the relevant repository [here](https://github.com/pudo/datafreeze). 19 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | 2 | Installation Guide 3 | ================== 4 | 5 | The easiest way is to install ``dataset`` from the `Python Package Index 6 | `_ using ``pip`` or ``easy_install``: 7 | 8 | .. code-block:: bash 9 | 10 | $ pip install dataset 11 | 12 | To install it manually simply download the repository from Github: 13 | 14 | .. code-block:: bash 15 | 16 | $ git clone git://github.com/pudo/dataset.git 17 | $ cd dataset/ 18 | $ python setup.py install 19 | 20 | Depending on the type of database backend, you may also need to install a 21 | database specific driver package. For MySQL, this is ``MySQLdb``, for Postgres 22 | its ``psycopg2``. SQLite support is integrated into Python. 23 | -------------------------------------------------------------------------------- /docs/_themes/kr/sidebarlogo.html: -------------------------------------------------------------------------------- 1 | dataset 2 | 3 |

Because managing databases in Python should be as simple as reading and writing JSON files.

4 | 5 | 6 | 7 |

Overview

8 | 9 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /docs/_themes/kr/autotoc.html: -------------------------------------------------------------------------------- 1 |

{{ _('Table Of Contents') }}

2 | 3 | 4 | 5 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /docs/_themes/README.md: -------------------------------------------------------------------------------- 1 | krTheme Sphinx Style 2 | ==================== 3 | 4 | This repository contains sphinx styles Kenneth Reitz uses in most of 5 | his projects. It is a derivative of Mitsuhiko's themes for Flask and Flask related 6 | projects. To use this style in your Sphinx documentation, follow 7 | this guide: 8 | 9 | 1. put this folder as _themes into your docs folder. Alternatively 10 | you can also use git submodules to check out the contents there. 11 | 12 | 2. add this to your conf.py: 13 | 14 | sys.path.append(os.path.abspath('_themes')) 15 | html_theme_path = ['_themes'] 16 | html_theme = 'kr' 17 | 18 | The following themes exist: 19 | 20 | **kr** 21 | the standard flask documentation theme for large projects 22 | 23 | **kr_small** 24 | small one-page theme. Intended to be used by very small addon libraries. 25 | 26 | -------------------------------------------------------------------------------- /test/sample_data.py: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | from datetime import datetime 5 | 6 | 7 | TEST_CITY_1 = 'B€rkeley' 8 | TEST_CITY_2 = 'G€lway' 9 | 10 | TEST_DATA = [ 11 | { 12 | 'date': datetime(2011, 1, 1), 13 | 'temperature': 1, 14 | 'place': TEST_CITY_2 15 | }, 16 | { 17 | 'date': datetime(2011, 1, 2), 18 | 'temperature': -1, 19 | 'place': TEST_CITY_2 20 | }, 21 | { 22 | 'date': datetime(2011, 1, 3), 23 | 'temperature': 0, 24 | 'place': TEST_CITY_2 25 | }, 26 | { 27 | 'date': datetime(2011, 1, 1), 28 | 'temperature': 6, 29 | 'place': TEST_CITY_1 30 | }, 31 | { 32 | 'date': datetime(2011, 1, 2), 33 | 'temperature': 8, 34 | 'place': TEST_CITY_1 35 | }, 36 | { 37 | 'date': datetime(2011, 1, 3), 38 | 'temperature': 5, 39 | 'place': TEST_CITY_1 40 | } 41 | ] 42 | -------------------------------------------------------------------------------- /dataset/chunked.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class ChunkedInsert(object): 4 | """Batch up insert operations 5 | with ChunkedStorer(my_table) as storer: 6 | table.insert(row) 7 | 8 | Rows will be inserted in groups of 1000 9 | """ 10 | 11 | def __init__(self, table, chunksize=1000): 12 | self.queue = [] 13 | self.fields = set() 14 | self.table = table 15 | self.chunksize = chunksize 16 | 17 | def flush(self): 18 | for item in self.queue: 19 | for field in self.fields: 20 | item[field] = item.get(field) 21 | self.table.insert_many(self.queue) 22 | self.queue = [] 23 | 24 | def insert(self, item): 25 | self.fields.update(item.keys()) 26 | self.queue.append(item) 27 | if len(self.queue) >= self.chunksize: 28 | self.flush() 29 | 30 | def __enter__(self): 31 | return self 32 | 33 | def __exit__(self, exc_type, exc_val, exc_tb): 34 | self.flush() 35 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - '3.6' 4 | - '3.5' 5 | - '3.4' 6 | - '2.7' 7 | services: 8 | - mysql 9 | - postgresql 10 | env: 11 | - 'DATABASE_URL=sqlite:///:memory:' 12 | - DATABASE_URL=postgresql+psycopg2://postgres@127.0.0.1/dataset 13 | - DATABASE_URL=mysql+pymysql://root@127.0.0.1/dataset?charset=utf8 14 | install: 15 | - pip install -U pip wheel 16 | - pip install flake8 psycopg2 PyMySQL nose 17 | - pip install -e . 18 | before_script: 19 | - sh -c "if [ '$DATABASE_URL' = 'postgresql+psycopg2://postgres@127.0.0.1/dataset' ]; then psql -c 'DROP DATABASE IF EXISTS dataset;' -U postgres; fi" 20 | - sh -c "if [ '$DATABASE_URL' = 'postgresql+psycopg2://postgres@127.0.0.1/dataset' ]; then psql -c 'create database dataset;' -U postgres; fi" 21 | - sh -c "if [ '$DATABASE_URL' = 'mysql+pymysql://root@127.0.0.1/dataset?charset=utf8' ]; then mysql -e 'create database IF NOT EXISTS dataset DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_general_ci;'; fi" 22 | script: 23 | - flake8 --ignore=E501,E123,E124,E126,E127,E128 dataset test 24 | - nosetests -v 25 | cache: 26 | directories: 27 | - $HOME/.cache/pip 28 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | 2 | API documentation 3 | ================= 4 | 5 | Connecting 6 | ---------- 7 | 8 | .. autofunction:: dataset.connect 9 | 10 | Notes 11 | ----- 12 | 13 | * **dataset** uses SQLAlchemy connection pooling when connecting to the 14 | database. There is no way of explicitly clearing or shutting down the 15 | connections, other than having the dataset instance garbage collected. 16 | 17 | Database 18 | -------- 19 | 20 | .. autoclass:: dataset.Database 21 | :members: tables, get_table, create_table, load_table, query, begin, commit, rollback 22 | :special-members: 23 | 24 | 25 | Table 26 | ----- 27 | 28 | .. autoclass:: dataset.Table 29 | :members: columns, find, find_one, all, count, distinct, insert, insert_ignore, insert_many, update, upsert, delete, create_column, drop_column, create_index, drop 30 | :special-members: __len__, __iter__ 31 | 32 | 33 | Data Export 34 | ----------- 35 | 36 | 37 | **Note:** Data exporting has been extracted into a stand-alone package, datafreeze. See the relevant repository here_. 38 | 39 | .. _here: https://github.com/pudo/datafreeze 40 | 41 | | 42 | 43 | .. autofunction:: datafreeze.freeze 44 | -------------------------------------------------------------------------------- /docs/_themes/kr/layout.html: -------------------------------------------------------------------------------- 1 | {%- extends "basic/layout.html" %} 2 | {%- block extrahead %} 3 | {{ super() }} 4 | 5 | {% if theme_touch_icon %} 6 | 7 | {% endif %} 8 | 9 | {% endblock %} 10 | 11 | 12 | {% block sidebar2 %} 13 | {{ sidebar() }} 14 | {% endblock %} 15 | 16 | {%- block footer %} 17 | 22 |
23 | 24 |
25 | 26 | Fork me on GitHub 27 | 28 | 29 | {%- endblock %} 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Open Knowledge Foundation, Friedrich Lindenberg, 2 | Gregor Aisch 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the 6 | "Software"), to deal in the Software without restriction, including 7 | without limitation the rights to use, copy, modify, merge, publish, 8 | distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so, subject to 10 | the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included 13 | in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /dataset/types.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, date 2 | 3 | from sqlalchemy import Integer, UnicodeText, Float, BigInteger 4 | from sqlalchemy import Boolean, Date, DateTime, Unicode 5 | from sqlalchemy.types import TypeEngine 6 | 7 | 8 | class Types(object): 9 | """A holder class for easy access to SQLAlchemy type names.""" 10 | integer = Integer 11 | string = Unicode 12 | text = UnicodeText 13 | float = Float 14 | bigint = BigInteger 15 | boolean = Boolean 16 | date = Date 17 | datetime = DateTime 18 | 19 | def guess(cls, sample): 20 | """Given a single sample, guess the column type for the field. 21 | 22 | If the sample is an instance of an SQLAlchemy type, the type will be 23 | used instead. 24 | """ 25 | if isinstance(sample, TypeEngine): 26 | return sample 27 | if isinstance(sample, bool): 28 | return cls.boolean 29 | elif isinstance(sample, int): 30 | return cls.integer 31 | elif isinstance(sample, float): 32 | return cls.float 33 | elif isinstance(sample, datetime): 34 | return cls.datetime 35 | elif isinstance(sample, date): 36 | return cls.date 37 | return cls.text 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | setup( 5 | name='dataset', 6 | version='1.1.0', 7 | description="Toolkit for Python-based database access.", 8 | classifiers=[ 9 | "Development Status :: 3 - Alpha", 10 | "Intended Audience :: Developers", 11 | "License :: OSI Approved :: MIT License", 12 | "Operating System :: OS Independent", 13 | 'Programming Language :: Python :: 2.7', 14 | 'Programming Language :: Python :: 3.3', 15 | 'Programming Language :: Python :: 3.4', 16 | 'Programming Language :: Python :: 3.5', 17 | 'Programming Language :: Python :: 3.6' 18 | ], 19 | keywords='sql sqlalchemy etl loading utility', 20 | author='Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer', 21 | author_email='friedrich@pudo.org', 22 | url='http://github.com/pudo/dataset', 23 | license='MIT', 24 | packages=find_packages(exclude=['ez_setup', 'examples', 'test']), 25 | namespace_packages=[], 26 | include_package_data=False, 27 | zip_safe=False, 28 | install_requires=[ 29 | 'sqlalchemy >= 1.1.0', 30 | 'alembic >= 0.6.2', 31 | 'normality >= 0.5.1', 32 | "six >= 1.11.0" 33 | ], 34 | tests_require=[ 35 | 'nose' 36 | ], 37 | test_suite='test', 38 | entry_points={} 39 | ) 40 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # dataset ChangeLog 2 | 3 | *The changelog has only been started with version 0.3.12, previous 4 | changes must be reconstructed from revision history.* 5 | 6 | * 0.6.0: Remove sqlite_datetime_fix for automatic int-casting of dates, 7 | make table['foo', 'bar'] an alias for table.distinct('foo', 'bar'), 8 | check validity of column and table names more thoroughly, rename 9 | reflectMetadata constructor argument to reflect_metadata, fix 10 | ResultIter to not leave queries open (so you can update in a loop). 11 | * 0.5.7: dataset Databases can now have customized row types. This allows, 12 | for example, information to be retrieved in attribute-accessible dict 13 | subclasses, such as stuf. 14 | * 0.5.4: Context manager for transactions, thanks to @victorkashirin. 15 | * 0.5.1: Fix a regression where empty queries would raise an exception. 16 | * 0.5: Improve overall code quality and testing, including Travis CI. 17 | An advanced __getitem__ syntax which allowed for the specification 18 | of primary keys when getting a table was dropped. 19 | DDL is no longer run against a transaction, but the base connection. 20 | * 0.4: Python 3 support and switch to alembic for migrations. 21 | * 0.3.15: Fixes to update and insertion of data, thanks to @cli248 22 | and @abhinav-upadhyay. 23 | * 0.3.14: dataset went viral somehow. Thanks to @gtsafas for 24 | refactorings, @alasdairnicol for fixing the Freezfile example in 25 | the documentation. @diegoguimaraes fixed the behaviour of insert to 26 | return the newly-created primary key ID. table.find_one() now 27 | returns a dict, not an SQLAlchemy ResultProxy. Slugs are now generated 28 | using the Python-Slugify package, removing slug code from dataset. 29 | * 0.3.13: Fixed logging, added support for transformations on result 30 | rows to support slug generation in output (#28). 31 | * 0.3.12: Makes table primary key's types and names configurable, fixing 32 | #19. Contributed by @dnatag. 33 | -------------------------------------------------------------------------------- /docs/_themes/LICENSE: -------------------------------------------------------------------------------- 1 | Modifications: 2 | 3 | Copyright (c) 2011 Kenneth Reitz. 4 | 5 | 6 | Original Project: 7 | 8 | Copyright (c) 2010 by Armin Ronacher. 9 | 10 | 11 | Some rights reserved. 12 | 13 | Redistribution and use in source and binary forms of the theme, with or 14 | without modification, are permitted provided that the following conditions 15 | are met: 16 | 17 | * Redistributions of source code must retain the above copyright 18 | notice, this list of conditions and the following disclaimer. 19 | 20 | * Redistributions in binary form must reproduce the above 21 | copyright notice, this list of conditions and the following 22 | disclaimer in the documentation and/or other materials provided 23 | with the distribution. 24 | 25 | * The names of the contributors may not be used to endorse or 26 | promote products derived from this software without specific 27 | prior written permission. 28 | 29 | We kindly ask you to only use these themes in an unmodified manner just 30 | for Flask and Flask-related products, not for unrelated projects. If you 31 | like the visual style and want to use it for your own projects, please 32 | consider making some larger changes to the themes (such as changing 33 | font faces, sizes, colors or margins). 34 | 35 | THIS THEME IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 36 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 37 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 38 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 39 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 40 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 41 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 42 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 43 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 44 | ARISING IN ANY WAY OUT OF THE USE OF THIS THEME, EVEN IF ADVISED OF THE 45 | POSSIBILITY OF SUCH DAMAGE. 46 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import warnings 3 | from dataset.database import Database 4 | from dataset.table import Table 5 | from dataset.util import row_type 6 | 7 | # shut up useless SA warning: 8 | warnings.filterwarnings( 9 | 'ignore', 'Unicode type received non-unicode bind param value.') 10 | warnings.filterwarnings( 11 | 'ignore', 'Skipping unsupported ALTER for creation of implicit constraint') 12 | 13 | __all__ = ['Database', 'Table', 'freeze', 'connect'] 14 | __version__ = '1.1.0' 15 | 16 | 17 | def connect(url=None, schema=None, reflect_metadata=True, engine_kwargs=None, 18 | reflect_views=True, ensure_schema=True, row_type=row_type): 19 | """ Opens a new connection to a database. 20 | 21 | *url* can be any valid `SQLAlchemy engine URL`_. If *url* is not defined 22 | it will try to use *DATABASE_URL* from environment variable. Returns an 23 | instance of :py:class:`Database `. Set *reflect_metadata* 24 | to False if you don't want the entire database schema to be pre-loaded. 25 | This significantly speeds up connecting to large databases with lots of 26 | tables. *reflect_views* can be set to False if you don't want views to be 27 | loaded. Additionally, *engine_kwargs* will be directly passed to 28 | SQLAlchemy, e.g. set *engine_kwargs={'pool_recycle': 3600}* will avoid `DB 29 | connection timeout`_. Set *row_type* to an alternate dict-like class to 30 | change the type of container rows are stored in.:: 31 | 32 | db = dataset.connect('sqlite:///factbook.db') 33 | 34 | .. _SQLAlchemy Engine URL: http://docs.sqlalchemy.org/en/latest/core/engines.html#sqlalchemy.create_engine 35 | .. _DB connection timeout: http://docs.sqlalchemy.org/en/latest/core/pooling.html#setting-pool-recycle 36 | """ 37 | if url is None: 38 | url = os.environ.get('DATABASE_URL', 'sqlite://') 39 | 40 | return Database(url, schema=schema, reflect_metadata=reflect_metadata, 41 | engine_kwargs=engine_kwargs, reflect_views=reflect_views, 42 | ensure_schema=ensure_schema, row_type=row_type) 43 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. dataset documentation master file, created by 2 | sphinx-quickstart on Mon Apr 1 18:41:21 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | dataset: databases for lazy people 7 | ================================== 8 | 9 | .. toctree:: 10 | :hidden: 11 | 12 | 13 | Although managing data in relational database has plenty of benefits, they're 14 | rarely used in day-to-day work with small to medium scale datasets. But why is 15 | that? Why do we see an awful lot of data stored in static files in CSV or JSON 16 | format, even though they are hard to query and update incrementally? 17 | 18 | The answer is that **programmers are lazy**, and thus they tend to prefer the 19 | easiest solution they find. And in **Python**, a database isn't the simplest 20 | solution for storing a bunch of structured data. This is what **dataset** is 21 | going to change! 22 | 23 | **dataset** provides a simple abstraction layer removes most direct SQL 24 | statements without the necessity for a full ORM model - essentially, databases 25 | can be used like a JSON file or NoSQL store. 26 | 27 | A simple data loading script using **dataset** might look like this: 28 | 29 | :: 30 | 31 | import dataset 32 | 33 | db = dataset.connect('sqlite:///:memory:') 34 | 35 | table = db['sometable'] 36 | table.insert(dict(name='John Doe', age=37)) 37 | table.insert(dict(name='Jane Doe', age=34, gender='female')) 38 | 39 | john = table.find_one(name='John Doe') 40 | 41 | 42 | Here is `similar code, without dataset `_. 43 | 44 | 45 | Features 46 | -------- 47 | 48 | * **Automatic schema**: If a table or column is written that does not 49 | exist in the database, it will be created automatically. 50 | * **Upserts**: Records are either created or updated, depending on 51 | whether an existing version can be found. 52 | * **Query helpers** for simple queries such as :py:meth:`all ` rows in a table or 53 | all :py:meth:`distinct ` values across a set of columns. 54 | * **Compatibility**: Being built on top of `SQLAlchemy `_, ``dataset`` works with all major databases, such as SQLite, PostgreSQL and MySQL. 55 | 56 | Contents 57 | -------- 58 | 59 | .. toctree:: 60 | :maxdepth: 2 61 | 62 | install 63 | quickstart 64 | api 65 | 66 | Contributors 67 | ------------ 68 | 69 | ``dataset`` is written and maintained by `Friedrich Lindenberg `_, 70 | `Gregor Aisch `_ and `Stefan Wehrmeyer `_. 71 | Its code is largely based on the preceding libraries `sqlaload `_ 72 | and datafreeze. And of course, we're standing on the `shoulders of giants `_. 73 | 74 | Our cute little `naked mole rat `_ was drawn by `Johannes Koch `_. 75 | -------------------------------------------------------------------------------- /dataset/util.py: -------------------------------------------------------------------------------- 1 | import six 2 | from hashlib import sha1 3 | from collections import OrderedDict, Iterable 4 | from six.moves.urllib.parse import urlparse 5 | 6 | QUERY_STEP = 1000 7 | row_type = OrderedDict 8 | 9 | 10 | class DatasetException(Exception): 11 | pass 12 | 13 | 14 | def convert_row(row_type, row): 15 | if row is None: 16 | return None 17 | return row_type(row.items()) 18 | 19 | 20 | def iter_result_proxy(rp, step=None): 21 | """Iterate over the ResultProxy.""" 22 | while True: 23 | if step is None: 24 | chunk = rp.fetchall() 25 | else: 26 | chunk = rp.fetchmany(step) 27 | if not chunk: 28 | break 29 | for row in chunk: 30 | yield row 31 | 32 | 33 | class ResultIter(object): 34 | """ SQLAlchemy ResultProxies are not iterable to get a 35 | list of dictionaries. This is to wrap them. """ 36 | 37 | def __init__(self, result_proxy, row_type=row_type, step=None): 38 | self.row_type = row_type 39 | self.result_proxy = result_proxy 40 | self.keys = list(result_proxy.keys()) 41 | self._iter = iter_result_proxy(result_proxy, step=step) 42 | 43 | def __next__(self): 44 | return convert_row(self.row_type, next(self._iter)) 45 | 46 | next = __next__ 47 | 48 | def __iter__(self): 49 | return self 50 | 51 | def close(self): 52 | self.result_proxy.close() 53 | 54 | 55 | def normalize_column_name(name): 56 | """Check if a string is a reasonable thing to use as a column name.""" 57 | if not isinstance(name, six.string_types): 58 | raise ValueError('%r is not a valid column name.' % name) 59 | 60 | # limit to 63 characters 61 | name = name.strip()[:63] 62 | # column names can be 63 *bytes* max in postgresql 63 | if isinstance(name, six.text_type): 64 | while len(name.encode('utf-8')) >= 64: 65 | name = name[:len(name) - 1] 66 | 67 | if not len(name) or '.' in name or '-' in name: 68 | raise ValueError('%r is not a valid column name.' % name) 69 | return name 70 | 71 | 72 | def normalize_table_name(name): 73 | """Check if the table name is obviously invalid.""" 74 | if not isinstance(name, six.string_types): 75 | raise ValueError("Invalid table name: %r" % name) 76 | name = name.strip()[:63] 77 | if not len(name): 78 | raise ValueError("Invalid table name: %r" % name) 79 | return name 80 | 81 | 82 | def safe_url(url): 83 | """Remove password from printed connection URLs.""" 84 | parsed = urlparse(url) 85 | if parsed.password is not None: 86 | pwd = ':%s@' % parsed.password 87 | url = url.replace(pwd, ':*****@') 88 | return url 89 | 90 | 91 | def index_name(table, columns): 92 | """Generate an artificial index name.""" 93 | sig = '||'.join(columns) 94 | key = sha1(sig.encode('utf-8')).hexdigest()[:16] 95 | return 'ix_%s_%s' % (table, key) 96 | 97 | 98 | def ensure_tuple(obj): 99 | """Try and make the given argument into a tuple.""" 100 | if obj is None: 101 | return tuple() 102 | if isinstance(obj, Iterable) and not isinstance(obj, six.string_types): 103 | return tuple(obj) 104 | return obj, 105 | 106 | 107 | def pad_chunk_columns(chunk): 108 | """Given a set of items to be inserted, make sure they all have the 109 | same columns by padding columns with None if they are missing.""" 110 | columns = set() 111 | for record in chunk: 112 | columns.update(record.keys()) 113 | for record in chunk: 114 | for column in columns: 115 | record.setdefault(column, None) 116 | return chunk 117 | -------------------------------------------------------------------------------- /docs/_themes/flask_theme_support.py: -------------------------------------------------------------------------------- 1 | # flasky extensions. flasky pygments style based on tango style 2 | from pygments.style import Style 3 | from pygments.token import Keyword, Name, Comment, String, Error, \ 4 | Number, Operator, Generic, Whitespace, Punctuation, Other, Literal 5 | 6 | 7 | class FlaskyStyle(Style): 8 | background_color = "#f8f8f8" 9 | default_style = "" 10 | 11 | styles = { 12 | # No corresponding class for the following: 13 | #Text: "", # class: '' 14 | Whitespace: "underline #f8f8f8", # class: 'w' 15 | Error: "#a40000 border:#ef2929", # class: 'err' 16 | Other: "#000000", # class 'x' 17 | 18 | Comment: "italic #8f5902", # class: 'c' 19 | Comment.Preproc: "noitalic", # class: 'cp' 20 | 21 | Keyword: "bold #004461", # class: 'k' 22 | Keyword.Constant: "bold #004461", # class: 'kc' 23 | Keyword.Declaration: "bold #004461", # class: 'kd' 24 | Keyword.Namespace: "bold #004461", # class: 'kn' 25 | Keyword.Pseudo: "bold #004461", # class: 'kp' 26 | Keyword.Reserved: "bold #004461", # class: 'kr' 27 | Keyword.Type: "bold #004461", # class: 'kt' 28 | 29 | Operator: "#582800", # class: 'o' 30 | Operator.Word: "bold #004461", # class: 'ow' - like keywords 31 | 32 | Punctuation: "bold #000000", # class: 'p' 33 | 34 | # because special names such as Name.Class, Name.Function, etc. 35 | # are not recognized as such later in the parsing, we choose them 36 | # to look the same as ordinary variables. 37 | Name: "#000000", # class: 'n' 38 | Name.Attribute: "#c4a000", # class: 'na' - to be revised 39 | Name.Builtin: "#004461", # class: 'nb' 40 | Name.Builtin.Pseudo: "#3465a4", # class: 'bp' 41 | Name.Class: "#000000", # class: 'nc' - to be revised 42 | Name.Constant: "#000000", # class: 'no' - to be revised 43 | Name.Decorator: "#888", # class: 'nd' - to be revised 44 | Name.Entity: "#ce5c00", # class: 'ni' 45 | Name.Exception: "bold #cc0000", # class: 'ne' 46 | Name.Function: "#000000", # class: 'nf' 47 | Name.Property: "#000000", # class: 'py' 48 | Name.Label: "#f57900", # class: 'nl' 49 | Name.Namespace: "#000000", # class: 'nn' - to be revised 50 | Name.Other: "#000000", # class: 'nx' 51 | Name.Tag: "bold #004461", # class: 'nt' - like a keyword 52 | Name.Variable: "#000000", # class: 'nv' - to be revised 53 | Name.Variable.Class: "#000000", # class: 'vc' - to be revised 54 | Name.Variable.Global: "#000000", # class: 'vg' - to be revised 55 | Name.Variable.Instance: "#000000", # class: 'vi' - to be revised 56 | 57 | Number: "#990000", # class: 'm' 58 | 59 | Literal: "#000000", # class: 'l' 60 | Literal.Date: "#000000", # class: 'ld' 61 | 62 | String: "#4e9a06", # class: 's' 63 | String.Backtick: "#4e9a06", # class: 'sb' 64 | String.Char: "#4e9a06", # class: 'sc' 65 | String.Doc: "italic #8f5902", # class: 'sd' - like a comment 66 | String.Double: "#4e9a06", # class: 's2' 67 | String.Escape: "#4e9a06", # class: 'se' 68 | String.Heredoc: "#4e9a06", # class: 'sh' 69 | String.Interpol: "#4e9a06", # class: 'si' 70 | String.Other: "#4e9a06", # class: 'sx' 71 | String.Regex: "#4e9a06", # class: 'sr' 72 | String.Single: "#4e9a06", # class: 's1' 73 | String.Symbol: "#4e9a06", # class: 'ss' 74 | 75 | Generic: "#000000", # class: 'g' 76 | Generic.Deleted: "#a40000", # class: 'gd' 77 | Generic.Emph: "italic #000000", # class: 'ge' 78 | Generic.Error: "#ef2929", # class: 'gr' 79 | Generic.Heading: "bold #000080", # class: 'gh' 80 | Generic.Inserted: "#00A000", # class: 'gi' 81 | Generic.Output: "#888", # class: 'go' 82 | Generic.Prompt: "#745334", # class: 'gp' 83 | Generic.Strong: "bold #000000", # class: 'gs' 84 | Generic.Subheading: "bold #800080", # class: 'gu' 85 | Generic.Traceback: "bold #a40000", # class: 'gt' 86 | } 87 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | 2 | Quickstart 3 | ========== 4 | 5 | 6 | Hi, welcome to the twelve-minute quick-start tutorial. 7 | 8 | Connecting to a database 9 | ------------------------ 10 | 11 | At first you need to import the dataset package :) :: 12 | 13 | import dataset 14 | 15 | To connect to a database you need to identify it by its `URL `_, which basically is a string of the form ``"dialect://user:password@host/dbname"``. Here are a few examples for different database backends:: 16 | 17 | # connecting to a SQLite database 18 | db = dataset.connect('sqlite:///mydatabase.db') 19 | 20 | # connecting to a MySQL database with user and password 21 | db = dataset.connect('mysql://user:password@localhost/mydatabase') 22 | 23 | # connecting to a PostgreSQL database 24 | db = dataset.connect('postgresql://scott:tiger@localhost:5432/mydatabase') 25 | 26 | It is also possible to define the `URL` as an environment variable called `DATABASE_URL` 27 | so you can initialize database connection without explicitly passing an `URL`:: 28 | 29 | db = dataset.connect() 30 | 31 | Depending on which database you're using, you may also have to install 32 | the database bindings to support that database. SQLite is included in 33 | the Python core, but PostgreSQL requires ``psycopg2`` to be installed. 34 | MySQL can be enabled by installing the ``mysql-db`` drivers. 35 | 36 | 37 | Storing data 38 | ------------ 39 | 40 | To store some data you need to get a reference to a table. You don't need 41 | to worry about whether the table already exists or not, since dataset 42 | will create it automatically:: 43 | 44 | # get a reference to the table 'user' 45 | table = db['user'] 46 | 47 | Now storing data in a table is a matter of a single function call. Just 48 | pass a `dict`_ to *insert*. Note that you don't need to create the columns 49 | *name* and *age* – dataset will do this automatically:: 50 | 51 | # Insert a new record. 52 | table.insert(dict(name='John Doe', age=46, country='China')) 53 | 54 | # dataset will create "missing" columns any time you insert a dict with an unknown key 55 | table.insert(dict(name='Jane Doe', age=37, country='France', gender='female')) 56 | 57 | .. _dict: http://docs.python.org/2/library/stdtypes.html#dict 58 | 59 | Updating existing entries is easy, too:: 60 | 61 | table.update(dict(name='John Doe', age=47), ['name']) 62 | 63 | The list of filter columns given as the second argument filter using the 64 | values in the first column. If you don't want to update over a 65 | particular value, just use the auto-generated ``id`` column. 66 | 67 | Using Transactions 68 | ------------------ 69 | 70 | You can group a set of database updates in a transaction. In that case, all updates 71 | are committed at once or, in case of exception, all of them are reverted. Transactions 72 | are supported through a context manager, so they can be used through a ``with`` 73 | statement:: 74 | 75 | with dataset.connect() as tx: 76 | tx['user'].insert(dict(name='John Doe', age=46, country='China')) 77 | 78 | You can get same functionality by invoking the methods :py:meth:`begin() `, 79 | :py:meth:`commit() ` and :py:meth:`rollback() ` 80 | explicitly:: 81 | 82 | db = dataset.connect() 83 | db.begin() 84 | try: 85 | db['user'].insert(dict(name='John Doe', age=46, country='China')) 86 | db.commit() 87 | except: 88 | db.rollback() 89 | 90 | Nested transactions are supported too:: 91 | 92 | db = dataset.connect() 93 | with db as tx1: 94 | tx1['user'].insert(dict(name='John Doe', age=46, country='China')) 95 | with db as tx2: 96 | tx2['user'].insert(dict(name='Jane Doe', age=37, country='France', gender='female')) 97 | 98 | 99 | 100 | Inspecting databases and tables 101 | ------------------------------- 102 | 103 | When dealing with unknown databases we might want to check their structure 104 | first. To start exploring, let's find out what tables are stored in the 105 | database: 106 | 107 | >>> print(db.tables) 108 | [u'user'] 109 | 110 | Now, let's list all columns available in the table ``user``: 111 | 112 | >>> print(db['user'].columns) 113 | [u'id', u'country', u'age', u'name', u'gender'] 114 | 115 | Using ``len()`` we can get the total number of rows in a table: 116 | 117 | >>> print(len(db['user'])) 118 | 2 119 | 120 | Reading data from tables 121 | ------------------------ 122 | 123 | Now let's get some real data out of the table:: 124 | 125 | users = db['user'].all() 126 | 127 | If we simply want to iterate over all rows in a table, we can omit :py:meth:`all() `:: 128 | 129 | for user in db['user']: 130 | print(user['age']) 131 | 132 | We can search for specific entries using :py:meth:`find() ` and 133 | :py:meth:`find_one() `:: 134 | 135 | # All users from China 136 | chinese_users = table.find(country='China') 137 | 138 | # Get a specific user 139 | john = table.find_one(name='John Doe') 140 | 141 | # Find multiple at once 142 | winners = table.find(id=[1, 3, 7]) 143 | 144 | # Find by comparison operator 145 | elderly_users = table.find(age={'>=': 70}) 146 | possible_customers = table.find(age={'between': [21, 80]}) 147 | 148 | # Use the underlying SQLAlchemy directly 149 | elderly_users = table.find(table.table.columns.age >= 70) 150 | 151 | Possible comparison operators:: 152 | 153 | gt, > 154 | lt, < 155 | gte, >= 156 | lte, <= 157 | !=, <>, not 158 | between, .. 159 | 160 | Using :py:meth:`distinct() ` we can grab a set of rows 161 | with unique values in one or more columns:: 162 | 163 | # Get one user per country 164 | db['user'].distinct('country') 165 | 166 | Finally, you can use the ``row_type`` parameter to choose the data type in which 167 | results will be returned:: 168 | 169 | import dataset 170 | from stuf import stuf 171 | 172 | db = dataset.connect('sqlite:///mydatabase.db', row_type=stuf) 173 | 174 | Now contents will be returned in ``stuf`` objects (basically, ``dict`` 175 | objects whose elements can be acessed as attributes (``item.name``) as well as 176 | by index (``item['name']``). 177 | 178 | Running custom SQL queries 179 | -------------------------- 180 | 181 | Of course the main reason you're using a database is that you want to 182 | use the full power of SQL queries. Here's how you run them with ``dataset``:: 183 | 184 | result = db.query('SELECT country, COUNT(*) c FROM user GROUP BY country') 185 | for row in result: 186 | print(row['country'], row['c']) 187 | 188 | The :py:meth:`query() ` method can also be used to 189 | access the underlying `SQLAlchemy core API `_, which allows for the 190 | programmatic construction of more complex queries:: 191 | 192 | table = db['user'].table 193 | statement = table.select(table.c.name.like('%John%')) 194 | result = db.query(statement) 195 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/dataset.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/dataset.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/dataset" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/dataset" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # dataset documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Apr 1 18:41:21 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | sys.path.insert(0, os.path.abspath('../')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'dataset' 44 | copyright = u'2013-2018, Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '1.0' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '1.0.8' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | # If true, keep warnings as "system message" paragraphs in the built documents. 90 | #keep_warnings = False 91 | 92 | 93 | # -- Options for HTML output --------------------------------------------------- 94 | 95 | # The theme to use for HTML and HTML Help pages. See the documentation for 96 | # a list of builtin themes. 97 | html_theme = 'kr' 98 | 99 | # Theme options are theme-specific and customize the look and feel of a theme 100 | # further. For a list of options available for each theme, see the 101 | # documentation. 102 | # html_theme_options = { 103 | # 'stickysidebar': "true" 104 | # } 105 | 106 | # Add any paths that contain custom themes here, relative to this directory. 107 | html_theme_path = ['_themes'] 108 | 109 | # The name for this set of Sphinx documents. If None, it defaults to 110 | # " v documentation". 111 | #html_title = None 112 | 113 | # A shorter title for the navigation bar. Default is the same as html_title. 114 | #html_short_title = None 115 | 116 | # The name of an image file (relative to this directory) to place at the top 117 | # of the sidebar. 118 | #html_logo = None 119 | 120 | # The name of an image file (within the static path) to use as favicon of the 121 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 122 | # pixels large. 123 | #html_favicon = None 124 | 125 | # Add any paths that contain custom static files (such as style sheets) here, 126 | # relative to this directory. They are copied after the builtin static files, 127 | # so a file named "default.css" will overwrite the builtin "default.css". 128 | html_static_path = ['_static'] 129 | 130 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 131 | # using the given strftime format. 132 | #html_last_updated_fmt = '%b %d, %Y' 133 | 134 | # If true, SmartyPants will be used to convert quotes and dashes to 135 | # typographically correct entities. 136 | #html_use_smartypants = True 137 | 138 | # Custom sidebar templates, maps document names to template names. 139 | html_sidebars = { 140 | 'index': ['sidebarlogo.html', 'sourcelink.html', 'searchbox.html'], 141 | 'api': ['sidebarlogo.html', 'autotoc.html', 'sourcelink.html', 'searchbox.html'], 142 | '**': ['sidebarlogo.html', 'localtoc.html', 'sourcelink.html', 'searchbox.html'] 143 | } 144 | 145 | # Additional templates that should be rendered to pages, maps page names to 146 | # template names. 147 | #html_additional_pages = {} 148 | 149 | # If false, no module index is generated. 150 | #html_domain_indices = True 151 | 152 | # If false, no index is generated. 153 | #html_use_index = True 154 | 155 | # If true, the index is split into individual pages for each letter. 156 | #html_split_index = False 157 | 158 | # If true, links to the reST sources are added to the pages. 159 | html_show_sourcelink = False 160 | 161 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 162 | #html_show_sphinx = True 163 | 164 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 165 | #html_show_copyright = True 166 | 167 | # If true, an OpenSearch description file will be output, and all pages will 168 | # contain a tag referring to it. The value of this option must be the 169 | # base URL from which the finished HTML is served. 170 | #html_use_opensearch = '' 171 | 172 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 173 | #html_file_suffix = None 174 | 175 | # Output file base name for HTML help builder. 176 | htmlhelp_basename = 'datasetdoc' 177 | 178 | 179 | # -- Options for LaTeX output -------------------------------------------------- 180 | 181 | latex_elements = { 182 | # The paper size ('letterpaper' or 'a4paper'). 183 | #'papersize': 'letterpaper', 184 | 185 | # The font size ('10pt', '11pt' or '12pt'). 186 | #'pointsize': '10pt', 187 | 188 | # Additional stuff for the LaTeX preamble. 189 | #'preamble': '', 190 | } 191 | 192 | # Grouping the document tree into LaTeX files. List of tuples 193 | # (source start file, target name, title, author, documentclass [howto/manual]). 194 | latex_documents = [ 195 | ('index', 'dataset.tex', u'dataset Documentation', 196 | u'Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer', 'manual'), 197 | ] 198 | 199 | # The name of an image file (relative to this directory) to place at the top of 200 | # the title page. 201 | #latex_logo = None 202 | 203 | # For "manual" documents, if this is true, then toplevel headings are parts, 204 | # not chapters. 205 | #latex_use_parts = False 206 | 207 | # If true, show page references after internal links. 208 | #latex_show_pagerefs = False 209 | 210 | # If true, show URL addresses after external links. 211 | #latex_show_urls = False 212 | 213 | # Documents to append as an appendix to all manuals. 214 | #latex_appendices = [] 215 | 216 | # If false, no module index is generated. 217 | #latex_domain_indices = True 218 | 219 | 220 | # -- Options for manual page output -------------------------------------------- 221 | 222 | # One entry per manual page. List of tuples 223 | # (source start file, name, description, authors, manual section). 224 | man_pages = [ 225 | ('index', 'dataset', u'dataset Documentation', 226 | [u'Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer'], 1) 227 | ] 228 | 229 | # If true, show URL addresses after external links. 230 | #man_show_urls = False 231 | 232 | 233 | # -- Options for Texinfo output ------------------------------------------------ 234 | 235 | # Grouping the document tree into Texinfo files. List of tuples 236 | # (source start file, target name, title, author, 237 | # dir menu entry, description, category) 238 | texinfo_documents = [ 239 | ('index', 'dataset', u'dataset Documentation', 240 | u'Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer', 'dataset', 'Databases for lazy people.', 241 | 'Miscellaneous'), 242 | ] 243 | 244 | # Documents to append as an appendix to all manuals. 245 | #texinfo_appendices = [] 246 | 247 | # If false, no module index is generated. 248 | #texinfo_domain_indices = True 249 | 250 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 251 | #texinfo_show_urls = 'footnote' 252 | 253 | # If true, do not generate a @detailmenu in the "Top" node's menu. 254 | #texinfo_no_detailmenu = False 255 | -------------------------------------------------------------------------------- /dataset/database.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import threading 3 | 4 | import six 5 | from six.moves.urllib.parse import parse_qs, urlparse 6 | 7 | from sqlalchemy import create_engine 8 | from sqlalchemy.sql import text 9 | from sqlalchemy.schema import MetaData 10 | from sqlalchemy.pool import StaticPool 11 | from sqlalchemy.util import safe_reraise 12 | from sqlalchemy.engine.reflection import Inspector 13 | 14 | from alembic.migration import MigrationContext 15 | from alembic.operations import Operations 16 | 17 | from dataset.table import Table 18 | from dataset.util import ResultIter, row_type, safe_url, QUERY_STEP 19 | from dataset.util import normalize_table_name 20 | from dataset.types import Types 21 | 22 | log = logging.getLogger(__name__) 23 | 24 | 25 | class Database(object): 26 | """A database object represents a SQL database with multiple tables.""" 27 | 28 | table_class = Table 29 | 30 | def __init__(self, url, schema=None, reflect_metadata=True, 31 | engine_kwargs=None, reflect_views=True, 32 | ensure_schema=True, row_type=row_type): 33 | """Configure and connect to the database.""" 34 | if engine_kwargs is None: 35 | engine_kwargs = {} 36 | 37 | parsed_url = urlparse(url) 38 | if parsed_url.scheme.lower() in 'sqlite': 39 | # ref: https://github.com/pudo/dataset/issues/163 40 | if 'poolclass' not in engine_kwargs: 41 | engine_kwargs['poolclass'] = StaticPool 42 | 43 | self.lock = threading.RLock() 44 | self.local = threading.local() 45 | 46 | if len(parsed_url.query): 47 | query = parse_qs(parsed_url.query) 48 | if schema is None: 49 | schema_qs = query.get('schema', query.get('searchpath', [])) 50 | if len(schema_qs): 51 | schema = schema_qs.pop() 52 | 53 | self.types = Types() 54 | self.schema = schema 55 | self.engine = create_engine(url, **engine_kwargs) 56 | self.url = url 57 | self.row_type = row_type 58 | self.ensure_schema = ensure_schema 59 | self._tables = {} 60 | 61 | @property 62 | def executable(self): 63 | """Connection against which statements will be executed.""" 64 | if not hasattr(self.local, 'conn'): 65 | self.local.conn = self.engine.connect() 66 | return self.local.conn 67 | 68 | @property 69 | def op(self): 70 | """Get an alembic operations context.""" 71 | ctx = MigrationContext.configure(self.executable) 72 | return Operations(ctx) 73 | 74 | @property 75 | def inspect(self): 76 | """Get a SQLAlchemy inspector.""" 77 | return Inspector.from_engine(self.executable) 78 | 79 | @property 80 | def metadata(self): 81 | """Return a SQLAlchemy schema cache object.""" 82 | return MetaData(schema=self.schema, bind=self.executable) 83 | 84 | @property 85 | def in_transaction(self): 86 | """Check if this database is in a transactional context.""" 87 | if not hasattr(self.local, 'tx'): 88 | return False 89 | return len(self.local.tx) > 0 90 | 91 | def _flush_tables(self): 92 | """Clear the table metadata after transaction rollbacks.""" 93 | for table in self._tables.values(): 94 | table._table = None 95 | 96 | def begin(self): 97 | """Enter a transaction explicitly. 98 | 99 | No data will be written until the transaction has been committed. 100 | """ 101 | if not hasattr(self.local, 'tx'): 102 | self.local.tx = [] 103 | self.local.tx.append(self.executable.begin()) 104 | 105 | def commit(self): 106 | """Commit the current transaction. 107 | 108 | Make all statements executed since the transaction was begun permanent. 109 | """ 110 | if hasattr(self.local, 'tx') and self.local.tx: 111 | tx = self.local.tx.pop() 112 | tx.commit() 113 | self._flush_tables() 114 | 115 | def rollback(self): 116 | """Roll back the current transaction. 117 | 118 | Discard all statements executed since the transaction was begun. 119 | """ 120 | if hasattr(self.local, 'tx') and self.local.tx: 121 | tx = self.local.tx.pop() 122 | tx.rollback() 123 | self._flush_tables() 124 | 125 | def __enter__(self): 126 | """Start a transaction.""" 127 | self.begin() 128 | return self 129 | 130 | def __exit__(self, error_type, error_value, traceback): 131 | """End a transaction by committing or rolling back.""" 132 | if error_type is None: 133 | try: 134 | self.commit() 135 | except Exception: 136 | with safe_reraise(): 137 | self.rollback() 138 | else: 139 | self.rollback() 140 | 141 | @property 142 | def tables(self): 143 | """Get a listing of all tables that exist in the database.""" 144 | return self.inspect.get_table_names(schema=self.schema) 145 | 146 | def __contains__(self, table_name): 147 | """Check if the given table name exists in the database.""" 148 | try: 149 | return normalize_table_name(table_name) in self.tables 150 | except ValueError: 151 | return False 152 | 153 | def create_table(self, table_name, primary_id=None, primary_type=None): 154 | """Create a new table. 155 | 156 | Either loads a table or creates it if it doesn't exist yet. You can 157 | define the name and type of the primary key field, if a new table is to 158 | be created. The default is to create an auto-incrementing integer, 159 | ``id``. You can also set the primary key to be a string or big integer. 160 | The caller will be responsible for the uniqueness of ``primary_id`` if 161 | it is defined as a text type. 162 | 163 | Returns a :py:class:`Table ` instance. 164 | :: 165 | 166 | table = db.create_table('population') 167 | 168 | # custom id and type 169 | table2 = db.create_table('population2', 'age') 170 | table3 = db.create_table('population3', 171 | primary_id='city', 172 | primary_type=db.types.text) 173 | # custom length of String 174 | table4 = db.create_table('population4', 175 | primary_id='city', 176 | primary_type=db.types.string(25)) 177 | # no primary key 178 | table5 = db.create_table('population5', 179 | primary_id=False) 180 | """ 181 | assert not isinstance(primary_type, six.string_types), \ 182 | 'Text-based primary_type support is dropped, use db.types.' 183 | table_name = normalize_table_name(table_name) 184 | with self.lock: 185 | if table_name not in self._tables: 186 | self._tables[table_name] = self.table_class(self, table_name, 187 | primary_id=primary_id, 188 | primary_type=primary_type, 189 | auto_create=True) 190 | return self._tables.get(table_name) 191 | 192 | def load_table(self, table_name): 193 | """Load a table. 194 | 195 | This will fail if the tables does not already exist in the database. If 196 | the table exists, its columns will be reflected and are available on 197 | the :py:class:`Table ` object. 198 | 199 | Returns a :py:class:`Table ` instance. 200 | :: 201 | 202 | table = db.load_table('population') 203 | """ 204 | table_name = normalize_table_name(table_name) 205 | with self.lock: 206 | if table_name not in self._tables: 207 | self._tables[table_name] = self.table_class(self, table_name) 208 | return self._tables.get(table_name) 209 | 210 | def get_table(self, table_name, primary_id=None, primary_type=None): 211 | """Load or create a table. 212 | 213 | This is now the same as ``create_table``. 214 | :: 215 | 216 | table = db.get_table('population') 217 | # you can also use the short-hand syntax: 218 | table = db['population'] 219 | """ 220 | return self.create_table(table_name, primary_id, primary_type) 221 | 222 | def __getitem__(self, table_name): 223 | """Get a given table.""" 224 | return self.get_table(table_name) 225 | 226 | def _ipython_key_completions_(self): 227 | """Completion for table names with IPython.""" 228 | return self.tables 229 | 230 | def query(self, query, *args, **kwargs): 231 | """Run a statement on the database directly. 232 | 233 | Allows for the execution of arbitrary read/write queries. A query can 234 | either be a plain text string, or a `SQLAlchemy expression 235 | `_. 236 | If a plain string is passed in, it will be converted to an expression 237 | automatically. 238 | 239 | Further positional and keyword arguments will be used for parameter 240 | binding. To include a positional argument in your query, use question 241 | marks in the query (i.e. ``SELECT * FROM tbl WHERE a = ?```). For 242 | keyword arguments, use a bind parameter (i.e. ``SELECT * FROM tbl 243 | WHERE a = :foo``). 244 | :: 245 | 246 | statement = 'SELECT user, COUNT(*) c FROM photos GROUP BY user' 247 | for row in db.query(statement): 248 | print(row['user'], row['c']) 249 | 250 | The returned iterator will yield each result sequentially. 251 | """ 252 | if isinstance(query, six.string_types): 253 | query = text(query) 254 | _step = kwargs.pop('_step', QUERY_STEP) 255 | rp = self.executable.execute(query, *args, **kwargs) 256 | return ResultIter(rp, row_type=self.row_type, step=_step) 257 | 258 | def __repr__(self): 259 | """Text representation contains the URL.""" 260 | return '' % safe_url(self.url) 261 | -------------------------------------------------------------------------------- /docs/_themes/kr/static/flasky.css_t: -------------------------------------------------------------------------------- 1 | /* 2 | * flasky.css_t 3 | * ~~~~~~~~~~~~ 4 | * 5 | * :copyright: Copyright 2010 by Armin Ronacher. Modifications by Kenneth Reitz. 6 | * :license: Flask Design License, see LICENSE for details. 7 | */ 8 | 9 | {% set page_width = '940px' %} 10 | {% set sidebar_width = '220px' %} 11 | 12 | @import url("basic.css"); 13 | 14 | /* -- page layout ----------------------------------------------------------- */ 15 | 16 | body { 17 | font-family: "Georgia", "Open Sans", OpenSansRegular, sans-serif; 18 | font-size: 16px; 19 | background: #fff; 20 | font-weight: 400; 21 | color: #000; 22 | margin: 0; 23 | padding: 0; 24 | } 25 | 26 | div.document { 27 | width: {{ page_width }}; 28 | margin: 30px auto 0 auto; 29 | } 30 | 31 | div.documentwrapper { 32 | float: left; 33 | width: 100%; 34 | } 35 | 36 | div.bodywrapper { 37 | margin: 0 0 0 {{ sidebar_width }}; 38 | } 39 | 40 | div.sphinxsidebar { 41 | width: {{ sidebar_width }}; 42 | } 43 | 44 | hr { 45 | border: 1px solid #B1B4B6; 46 | } 47 | 48 | div.body { 49 | background-color: white; 50 | color: #3E4349; 51 | padding: 0 30px 0 30px; 52 | } 53 | 54 | img.floatingflask { 55 | padding: 0 0 10px 10px; 56 | float: right; 57 | } 58 | 59 | div.footer { 60 | width: {{ page_width }}; 61 | margin: 20px auto 30px auto; 62 | font-size: 14px; 63 | color: #888; 64 | text-align: right; 65 | } 66 | 67 | div.footer a { 68 | color: #888; 69 | } 70 | 71 | div.related { 72 | display: none; 73 | } 74 | 75 | div.sphinxsidebar a { 76 | color: #444; 77 | text-decoration: none; 78 | border-bottom: 1px dotted #999; 79 | } 80 | 81 | div.sphinxsidebar a:hover { 82 | border-bottom: 1px solid #999; 83 | } 84 | 85 | div.sphinxsidebar { 86 | font-size: 14px; 87 | line-height: 1.5; 88 | } 89 | 90 | div.sphinxsidebarwrapper { 91 | padding: 18px 10px; 92 | } 93 | 94 | div.sphinxsidebarwrapper p.logo { 95 | padding: 0; 96 | margin: -10px 0 0 -20px; 97 | text-align: center; 98 | } 99 | 100 | div.sphinxsidebar h3, 101 | div.sphinxsidebar h4 { 102 | font-family: 'Antic Slab' ,'Garamond', 'Georgia', serif; 103 | color: #000; 104 | font-size: 24px; 105 | font-weight: normal; 106 | margin: 30px 0 5px 0; 107 | padding: 0; 108 | } 109 | 110 | div.sphinxsidebar h4 { 111 | font-size: 20px; 112 | } 113 | 114 | div.sphinxsidebar h3 a { 115 | color: #000; 116 | } 117 | 118 | div.sphinxsidebar p.logo a, 119 | div.sphinxsidebar h3 a, 120 | div.sphinxsidebar p.logo a:hover, 121 | div.sphinxsidebar h3 a:hover { 122 | border: none; 123 | } 124 | 125 | div.sphinxsidebar p { 126 | color: #555; 127 | margin: 10px 0; 128 | } 129 | 130 | div.sphinxsidebar ul { 131 | margin: 10px 0px; 132 | padding: 0; 133 | color: #000; 134 | } 135 | 136 | div.sphinxsidebar input { 137 | border: 1px solid #ccc; 138 | font-family: 'Georgia', serif; 139 | font-size: 1em; 140 | } 141 | 142 | /* -- body styles ----------------------------------------------------------- */ 143 | 144 | a { 145 | color: #004B6B; 146 | text-decoration: underline; 147 | } 148 | 149 | a:hover { 150 | color: #6D4100; 151 | text-decoration: underline; 152 | } 153 | 154 | div.body h1, 155 | div.body h2, 156 | div.body h3, 157 | div.body h4, 158 | div.body h5, 159 | div.body h6 { 160 | font-family: 'Antic Slab', serif; 161 | font-weight: normal; 162 | margin: 30px 0px 10px 0px; 163 | padding: 0; 164 | text-shadow: 1px 1px 3px #ddd; 165 | color: #000; 166 | } 167 | 168 | div.body h1 { margin-top: 0; padding-top: 0; font-size: 250%; } 169 | div.body h2 { font-size: 190%; } 170 | div.body h3 { font-size: 160%; } 171 | div.body h4 { font-size: 140%; } 172 | div.body h5 { font-size: 110%; } 173 | div.body h6 { font-size: 110%; } 174 | 175 | a.headerlink { 176 | color: #ddd; 177 | padding: 0 4px; 178 | text-decoration: none; 179 | } 180 | 181 | a.headerlink:hover { 182 | color: #444; 183 | background: #eaeaea; 184 | } 185 | 186 | div.body p, div.body dd, div.body li { 187 | line-height: 1.4em; 188 | } 189 | 190 | div.admonition { 191 | background: #fafafa; 192 | margin: 20px -30px; 193 | padding: 10px 30px; 194 | border-top: 1px solid #ccc; 195 | border-bottom: 1px solid #ccc; 196 | } 197 | 198 | div.admonition tt.xref, div.admonition a tt { 199 | border-bottom: 1px solid #fafafa; 200 | } 201 | 202 | dd div.admonition { 203 | margin-left: -60px; 204 | padding-left: 60px; 205 | } 206 | 207 | div.admonition p.admonition-title { 208 | font-family: 'Garamond', 'Georgia', serif; 209 | font-weight: normal; 210 | font-size: 24px; 211 | margin: 0 0 10px 0; 212 | padding: 0; 213 | line-height: 1; 214 | } 215 | 216 | div.admonition p.last { 217 | margin-bottom: 0; 218 | } 219 | 220 | div.highlight { 221 | background-color: white; 222 | } 223 | 224 | dt:target, .highlight { 225 | background: #FAF3E8; 226 | } 227 | 228 | div.note { 229 | background-color: #eee; 230 | border: 1px solid #ccc; 231 | } 232 | 233 | div.seealso { 234 | background-color: #ffc; 235 | border: 1px solid #ff6; 236 | } 237 | 238 | div.topic { 239 | background-color: #eee; 240 | } 241 | 242 | p.admonition-title { 243 | display: inline; 244 | } 245 | 246 | p.admonition-title:after { 247 | content: ":"; 248 | } 249 | 250 | pre { 251 | font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; 252 | font-size: 0.88em; 253 | } 254 | 255 | tt { 256 | font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; 257 | font-size: 0.95em; 258 | } 259 | 260 | img.screenshot { 261 | } 262 | 263 | tt.descname, tt.descclassname { 264 | font-size: 0.95em; 265 | } 266 | 267 | tt.descname { 268 | padding-right: 0.08em; 269 | } 270 | 271 | img.screenshot { 272 | -moz-box-shadow: 2px 2px 4px #eee; 273 | -webkit-box-shadow: 2px 2px 4px #eee; 274 | box-shadow: 2px 2px 4px #eee; 275 | } 276 | 277 | table.docutils { 278 | border: 1px solid #888; 279 | -moz-box-shadow: 2px 2px 4px #eee; 280 | -webkit-box-shadow: 2px 2px 4px #eee; 281 | box-shadow: 2px 2px 4px #eee; 282 | } 283 | 284 | table.docutils td, table.docutils th { 285 | border: 1px solid #888; 286 | padding: 0.25em 0.7em; 287 | } 288 | 289 | table.field-list, table.footnote { 290 | border: none; 291 | -moz-box-shadow: none; 292 | -webkit-box-shadow: none; 293 | box-shadow: none; 294 | } 295 | 296 | table.footnote { 297 | margin: 15px 0; 298 | width: 100%; 299 | border: 1px solid #eee; 300 | background: #fdfdfd; 301 | font-size: 0.9em; 302 | } 303 | 304 | table.footnote + table.footnote { 305 | margin-top: -15px; 306 | border-top: none; 307 | } 308 | 309 | table.field-list th { 310 | padding: 0 0.8em 0 0; 311 | } 312 | 313 | table.field-list td { 314 | padding: 0; 315 | } 316 | 317 | table.footnote td.label { 318 | width: 0px; 319 | padding: 0.3em 0 0.3em 0.5em; 320 | } 321 | 322 | table.footnote td { 323 | padding: 0.3em 0.5em; 324 | } 325 | 326 | dl { 327 | margin: 0; 328 | padding: 0; 329 | } 330 | 331 | dl dd { 332 | margin-left: 30px; 333 | } 334 | 335 | blockquote { 336 | margin: 0 0 0 30px; 337 | padding: 0; 338 | } 339 | 340 | ul, ol { 341 | margin: 10px 0 10px 30px; 342 | padding: 0; 343 | } 344 | 345 | pre { 346 | background: #eee; 347 | padding: 7px 30px; 348 | margin: 15px -30px; 349 | line-height: 1.3em; 350 | } 351 | 352 | dl pre, blockquote pre, li pre { 353 | margin-left: 0px; 354 | padding-left: 15px; 355 | } 356 | 357 | dl dl pre { 358 | margin-left: 0px; 359 | padding-left: 15px; 360 | } 361 | 362 | tt { 363 | background-color: #ecf0f3; 364 | color: #222; 365 | /* padding: 1px 2px; */ 366 | } 367 | 368 | tt.xref, a tt { 369 | background-color: #FBFBFB; 370 | color: #2277bb; 371 | border-bottom: 1px solid white; 372 | } 373 | 374 | a.reference { 375 | text-decoration: none; 376 | border-bottom: 1px dotted #004B6B; 377 | } 378 | 379 | a.reference:hover { 380 | border-bottom: 1px solid #6D4100; 381 | } 382 | 383 | a.footnote-reference { 384 | text-decoration: none; 385 | font-size: 0.7em; 386 | vertical-align: top; 387 | border-bottom: 1px dotted #004B6B; 388 | } 389 | 390 | a.footnote-reference:hover { 391 | border-bottom: 1px solid #6D4100; 392 | } 393 | 394 | a:hover tt { 395 | background: #EEE; 396 | } 397 | 398 | li { 399 | margin-bottom: 0.3em; 400 | } 401 | 402 | 403 | @media screen and (max-width: 870px) { 404 | 405 | div.sphinxsidebar { 406 | display: none; 407 | } 408 | 409 | div.document { 410 | width: 100%; 411 | 412 | } 413 | 414 | div.documentwrapper { 415 | margin-left: 0; 416 | margin-top: 0; 417 | margin-right: 0; 418 | margin-bottom: 0; 419 | } 420 | 421 | div.bodywrapper { 422 | margin-top: 0; 423 | margin-right: 0; 424 | margin-bottom: 0; 425 | margin-left: 0; 426 | } 427 | 428 | ul { 429 | margin-left: 0; 430 | } 431 | 432 | .document { 433 | width: auto; 434 | } 435 | 436 | .footer { 437 | width: auto; 438 | } 439 | 440 | .bodywrapper { 441 | margin: 0; 442 | } 443 | 444 | .footer { 445 | width: auto; 446 | } 447 | 448 | .github { 449 | display: none; 450 | } 451 | 452 | 453 | 454 | } 455 | 456 | 457 | 458 | @media screen and (max-width: 875px) { 459 | 460 | body { 461 | margin: 0; 462 | padding: 20px 30px; 463 | } 464 | 465 | div.documentwrapper { 466 | float: none; 467 | background: white; 468 | } 469 | 470 | div.sphinxsidebar { 471 | display: block; 472 | float: none; 473 | width: 102.5%; 474 | margin: 50px -30px -20px -30px; 475 | padding: 10px 20px; 476 | background: #333; 477 | color: white; 478 | } 479 | 480 | div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, 481 | div.sphinxsidebar h3 a { 482 | color: white; 483 | } 484 | 485 | div.sphinxsidebar a { 486 | color: #aaa; 487 | } 488 | 489 | div.sphinxsidebar p.logo { 490 | display: none; 491 | } 492 | 493 | div.document { 494 | width: 100%; 495 | margin: 0; 496 | } 497 | 498 | div.related { 499 | display: block; 500 | margin: 0; 501 | padding: 10px 0 20px 0; 502 | } 503 | 504 | div.related ul, 505 | div.related ul li { 506 | margin: 0; 507 | padding: 0; 508 | } 509 | 510 | div.footer { 511 | display: none; 512 | } 513 | 514 | div.bodywrapper { 515 | margin: 0; 516 | } 517 | 518 | div.body { 519 | min-height: 0; 520 | padding: 0; 521 | } 522 | 523 | 524 | .rtd_doc_footer { 525 | display: none; 526 | } 527 | 528 | .document { 529 | width: auto; 530 | } 531 | 532 | .footer { 533 | width: auto; 534 | } 535 | 536 | .footer { 537 | width: auto; 538 | } 539 | 540 | .github { 541 | display: none; 542 | } 543 | } 544 | 545 | 546 | /* misc. */ 547 | 548 | .revsys-inline { 549 | display: none!important; 550 | } 551 | 552 | div.sphinxsidebar #searchbox input[type="text"] { 553 | width: 140px; 554 | padding: 4px 3px; 555 | } 556 | 557 | .highlight .nv { 558 | color: #C65D09!important; 559 | } 560 | -------------------------------------------------------------------------------- /test/test_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import os 4 | import unittest 5 | from datetime import datetime 6 | 7 | try: 8 | from collections import OrderedDict 9 | except ImportError: # pragma: no cover 10 | from ordereddict import OrderedDict # Python < 2.7 drop-in 11 | 12 | from sqlalchemy import FLOAT, INTEGER, TEXT 13 | from sqlalchemy.exc import IntegrityError, SQLAlchemyError, ArgumentError 14 | 15 | from dataset import connect 16 | 17 | from .sample_data import TEST_DATA, TEST_CITY_1 18 | 19 | 20 | class DatabaseTestCase(unittest.TestCase): 21 | 22 | def setUp(self): 23 | os.environ.setdefault('DATABASE_URL', 'sqlite:///:memory:') 24 | self.db = connect(os.environ['DATABASE_URL']) 25 | self.tbl = self.db['weather'] 26 | self.tbl.insert_many(TEST_DATA) 27 | 28 | def tearDown(self): 29 | for table in self.db.tables: 30 | self.db[table].drop() 31 | 32 | def test_valid_database_url(self): 33 | assert self.db.url, os.environ['DATABASE_URL'] 34 | 35 | def test_database_url_query_string(self): 36 | db = connect('sqlite:///:memory:/?cached_statements=1') 37 | assert 'cached_statements' in db.url, db.url 38 | 39 | def test_tables(self): 40 | assert self.db.tables == ['weather'], self.db.tables 41 | 42 | def test_contains(self): 43 | assert 'weather' in self.db, self.db.tables 44 | 45 | def test_create_table(self): 46 | table = self.db['foo'] 47 | assert table.table.exists() 48 | assert len(table.table.columns) == 1, table.table.columns 49 | assert 'id' in table.table.c, table.table.c 50 | 51 | def test_create_table_no_ids(self): 52 | if 'mysql' in self.db.engine.dialect.dbapi.__name__: 53 | return 54 | if 'sqlite' in self.db.engine.dialect.dbapi.__name__: 55 | return 56 | table = self.db.create_table("foo_no_id", primary_id=False) 57 | assert table.table.exists() 58 | assert len(table.table.columns) == 0, table.table.columns 59 | 60 | def test_create_table_custom_id1(self): 61 | pid = "string_id" 62 | table = self.db.create_table("foo2", pid, self.db.types.string(255)) 63 | assert table.table.exists() 64 | assert len(table.table.columns) == 1, table.table.columns 65 | assert pid in table.table.c, table.table.c 66 | table.insert({pid: 'foobar'}) 67 | assert table.find_one(string_id='foobar')[pid] == 'foobar' 68 | 69 | def test_create_table_custom_id2(self): 70 | pid = "string_id" 71 | table = self.db.create_table("foo3", pid, self.db.types.string(50)) 72 | assert table.table.exists() 73 | assert len(table.table.columns) == 1, table.table.columns 74 | assert pid in table.table.c, table.table.c 75 | 76 | table.insert({pid: 'foobar'}) 77 | assert table.find_one(string_id='foobar')[pid] == 'foobar' 78 | 79 | def test_create_table_custom_id3(self): 80 | pid = "int_id" 81 | table = self.db.create_table("foo4", primary_id=pid) 82 | assert table.table.exists() 83 | assert len(table.table.columns) == 1, table.table.columns 84 | assert pid in table.table.c, table.table.c 85 | 86 | table.insert({pid: 123}) 87 | table.insert({pid: 124}) 88 | assert table.find_one(int_id=123)[pid] == 123 89 | assert table.find_one(int_id=124)[pid] == 124 90 | self.assertRaises(IntegrityError, lambda: table.insert({pid: 123})) 91 | 92 | def test_create_table_shorthand1(self): 93 | pid = "int_id" 94 | table = self.db.get_table('foo5', pid) 95 | assert table.table.exists 96 | assert len(table.table.columns) == 1, table.table.columns 97 | assert pid in table.table.c, table.table.c 98 | 99 | table.insert({'int_id': 123}) 100 | table.insert({'int_id': 124}) 101 | assert table.find_one(int_id=123)['int_id'] == 123 102 | assert table.find_one(int_id=124)['int_id'] == 124 103 | self.assertRaises(IntegrityError, lambda: table.insert({'int_id': 123})) 104 | 105 | def test_create_table_shorthand2(self): 106 | pid = "string_id" 107 | table = self.db.get_table('foo6', primary_id=pid, 108 | primary_type=self.db.types.string(255)) 109 | assert table.table.exists 110 | assert len(table.table.columns) == 1, table.table.columns 111 | assert pid in table.table.c, table.table.c 112 | 113 | table.insert({ 114 | 'string_id': 'foobar'}) 115 | assert table.find_one(string_id='foobar')['string_id'] == 'foobar' 116 | 117 | def test_with(self): 118 | init_length = len(self.db['weather']) 119 | with self.assertRaises(ValueError): 120 | with self.db as tx: 121 | tx['weather'].insert({'date': datetime(2011, 1, 1), 122 | 'temperature': 1, 123 | 'place': 'tmp_place'}) 124 | raise ValueError() 125 | assert len(self.db['weather']) == init_length 126 | 127 | def test_invalid_values(self): 128 | if 'mysql' in self.db.engine.dialect.dbapi.__name__: 129 | # WARNING: mysql seems to be doing some weird type casting upon insert. 130 | # The mysql-python driver is not affected but it isn't compatible with Python 3 131 | # Conclusion: use postgresql. 132 | return 133 | with self.assertRaises(SQLAlchemyError): 134 | tbl = self.db['weather'] 135 | tbl.insert({'date': True, 'temperature': 'wrong_value', 'place': 'tmp_place'}) 136 | 137 | def test_load_table(self): 138 | tbl = self.db.load_table('weather') 139 | assert tbl.table.name == self.tbl.table.name 140 | 141 | def test_query(self): 142 | r = self.db.query('SELECT COUNT(*) AS num FROM weather').next() 143 | assert r['num'] == len(TEST_DATA), r 144 | 145 | def test_table_cache_updates(self): 146 | tbl1 = self.db.get_table('people') 147 | data = OrderedDict([('first_name', 'John'), ('last_name', 'Smith')]) 148 | tbl1.insert(data) 149 | data['id'] = 1 150 | tbl2 = self.db.get_table('people') 151 | assert dict(tbl2.all().next()) == dict(data), (tbl2.all().next(), data) 152 | 153 | 154 | class TableTestCase(unittest.TestCase): 155 | 156 | def setUp(self): 157 | self.db = connect('sqlite:///:memory:') 158 | self.tbl = self.db['weather'] 159 | for row in TEST_DATA: 160 | self.tbl.insert(row) 161 | 162 | def test_insert(self): 163 | assert len(self.tbl) == len(TEST_DATA), len(self.tbl) 164 | last_id = self.tbl.insert({ 165 | 'date': datetime(2011, 1, 2), 166 | 'temperature': -10, 167 | 'place': 'Berlin'} 168 | ) 169 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 170 | assert self.tbl.find_one(id=last_id)['place'] == 'Berlin' 171 | 172 | def test_insert_ignore(self): 173 | self.tbl.insert_ignore({ 174 | 'date': datetime(2011, 1, 2), 175 | 'temperature': -10, 176 | 'place': 'Berlin'}, 177 | ['place'] 178 | ) 179 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 180 | self.tbl.insert_ignore({ 181 | 'date': datetime(2011, 1, 2), 182 | 'temperature': -10, 183 | 'place': 'Berlin'}, 184 | ['place'] 185 | ) 186 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 187 | 188 | def test_insert_ignore_all_key(self): 189 | for i in range(0, 4): 190 | self.tbl.insert_ignore({ 191 | 'date': datetime(2011, 1, 2), 192 | 'temperature': -10, 193 | 'place': 'Berlin'}, 194 | ['date', 'temperature', 'place'] 195 | ) 196 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 197 | 198 | def test_upsert(self): 199 | self.tbl.upsert({ 200 | 'date': datetime(2011, 1, 2), 201 | 'temperature': -10, 202 | 'place': 'Berlin'}, 203 | ['place'] 204 | ) 205 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 206 | self.tbl.upsert({ 207 | 'date': datetime(2011, 1, 2), 208 | 'temperature': -10, 209 | 'place': 'Berlin'}, 210 | ['place'] 211 | ) 212 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 213 | 214 | def test_upsert_single_column(self): 215 | table = self.db['banana_single_col'] 216 | table.upsert({ 217 | 'color': 'Yellow'}, 218 | ['color'] 219 | ) 220 | assert len(table) == 1, len(table) 221 | table.upsert({ 222 | 'color': 'Yellow'}, 223 | ['color'] 224 | ) 225 | assert len(table) == 1, len(table) 226 | 227 | def test_upsert_all_key(self): 228 | assert len(self.tbl) == len(TEST_DATA), len(self.tbl) 229 | for i in range(0, 2): 230 | self.tbl.upsert({ 231 | 'date': datetime(2011, 1, 2), 232 | 'temperature': -10, 233 | 'place': 'Berlin'}, 234 | ['date', 'temperature', 'place'] 235 | ) 236 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 237 | 238 | def test_update_while_iter(self): 239 | for row in self.tbl: 240 | row['foo'] = 'bar' 241 | self.tbl.update(row, ['place', 'date']) 242 | assert len(self.tbl) == len(TEST_DATA), len(self.tbl) 243 | 244 | def test_weird_column_names(self): 245 | with self.assertRaises(ValueError): 246 | self.tbl.insert({ 247 | 'date': datetime(2011, 1, 2), 248 | 'temperature': -10, 249 | 'foo.bar': 'Berlin', 250 | 'qux.bar': 'Huhu' 251 | }) 252 | 253 | def test_invalid_column_names(self): 254 | tbl = self.db['weather'] 255 | with self.assertRaises(ValueError): 256 | tbl.insert({None: 'banana'}) 257 | 258 | with self.assertRaises(ValueError): 259 | tbl.insert({'': 'banana'}) 260 | 261 | with self.assertRaises(ValueError): 262 | tbl.insert({'-': 'banana'}) 263 | 264 | def test_delete(self): 265 | self.tbl.insert({ 266 | 'date': datetime(2011, 1, 2), 267 | 'temperature': -10, 268 | 'place': 'Berlin'} 269 | ) 270 | original_count = len(self.tbl) 271 | assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl) 272 | # Test bad use of API 273 | with self.assertRaises(ArgumentError): 274 | self.tbl.delete({'place': 'Berlin'}) 275 | assert len(self.tbl) == original_count, len(self.tbl) 276 | 277 | assert self.tbl.delete(place='Berlin') is True, 'should return 1' 278 | assert len(self.tbl) == len(TEST_DATA), len(self.tbl) 279 | assert self.tbl.delete() is True, 'should return non zero' 280 | assert len(self.tbl) == 0, len(self.tbl) 281 | 282 | def test_repr(self): 283 | assert repr(self.tbl) == '', \ 284 | 'the representation should be ' 285 | 286 | def test_delete_nonexist_entry(self): 287 | assert self.tbl.delete(place='Berlin') is False, \ 288 | 'entry not exist, should fail to delete' 289 | 290 | def test_find_one(self): 291 | self.tbl.insert({ 292 | 'date': datetime(2011, 1, 2), 293 | 'temperature': -10, 294 | 'place': 'Berlin' 295 | }) 296 | d = self.tbl.find_one(place='Berlin') 297 | assert d['temperature'] == -10, d 298 | d = self.tbl.find_one(place='Atlantis') 299 | assert d is None, d 300 | 301 | def test_count(self): 302 | assert len(self.tbl) == 6, len(self.tbl) 303 | length = self.tbl.count(place=TEST_CITY_1) 304 | assert length == 3, length 305 | 306 | def test_find(self): 307 | ds = list(self.tbl.find(place=TEST_CITY_1)) 308 | assert len(ds) == 3, ds 309 | ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2)) 310 | assert len(ds) == 2, ds 311 | ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2, _step=1)) 312 | assert len(ds) == 2, ds 313 | ds = list(self.tbl.find(place=TEST_CITY_1, _limit=1, _step=2)) 314 | assert len(ds) == 1, ds 315 | ds = list(self.tbl.find(_step=2)) 316 | assert len(ds) == len(TEST_DATA), ds 317 | ds = list(self.tbl.find(order_by=['temperature'])) 318 | assert ds[0]['temperature'] == -1, ds 319 | ds = list(self.tbl.find(order_by=['-temperature'])) 320 | assert ds[0]['temperature'] == 8, ds 321 | ds = list(self.tbl.find(self.tbl.table.columns.temperature > 4)) 322 | assert len(ds) == 3, ds 323 | 324 | def test_find_dsl(self): 325 | ds = list(self.tbl.find(place={'like': '%lw%'})) 326 | assert len(ds) == 3, ds 327 | ds = list(self.tbl.find(temperature={'>': 5})) 328 | assert len(ds) == 2, ds 329 | ds = list(self.tbl.find(temperature={'>=': 5})) 330 | assert len(ds) == 3, ds 331 | ds = list(self.tbl.find(temperature={'<': 0})) 332 | assert len(ds) == 1, ds 333 | ds = list(self.tbl.find(temperature={'<=': 0})) 334 | assert len(ds) == 2, ds 335 | ds = list(self.tbl.find(temperature={'!=': -1})) 336 | assert len(ds) == 5, ds 337 | ds = list(self.tbl.find(temperature={'between': [5, 8]})) 338 | assert len(ds) == 3, ds 339 | 340 | def test_offset(self): 341 | ds = list(self.tbl.find(place=TEST_CITY_1, _offset=1)) 342 | assert len(ds) == 2, ds 343 | ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2, _offset=2)) 344 | assert len(ds) == 1, ds 345 | 346 | def test_streamed(self): 347 | ds = list(self.tbl.find(place=TEST_CITY_1, _streamed=True, _step=1)) 348 | assert len(ds) == 3, len(ds) 349 | for row in self.tbl.find(place=TEST_CITY_1, _streamed=True, _step=1): 350 | row['temperature'] = -1 351 | self.tbl.update(row, ['id']) 352 | 353 | def test_distinct(self): 354 | x = list(self.tbl.distinct('place')) 355 | assert len(x) == 2, x 356 | x = list(self.tbl.distinct('place', 'date')) 357 | assert len(x) == 6, x 358 | x = list(self.tbl.distinct( 359 | 'place', 'date', 360 | self.tbl.table.columns.date >= datetime(2011, 1, 2, 0, 0))) 361 | assert len(x) == 4, x 362 | 363 | def test_insert_many(self): 364 | data = TEST_DATA * 100 365 | self.tbl.insert_many(data, chunk_size=13) 366 | assert len(self.tbl) == len(data) + 6 367 | 368 | def test_drop_operations(self): 369 | assert self.tbl._table is not None, \ 370 | 'table shouldn\'t be dropped yet' 371 | self.tbl.drop() 372 | assert self.tbl._table is None, \ 373 | 'table should be dropped now' 374 | assert list(self.tbl.all()) == [], self.tbl.all() 375 | assert self.tbl.count() == 0, self.tbl.count() 376 | 377 | def test_table_drop(self): 378 | assert 'weather' in self.db 379 | self.db['weather'].drop() 380 | assert 'weather' not in self.db 381 | 382 | def test_table_drop_then_create(self): 383 | assert 'weather' in self.db 384 | self.db['weather'].drop() 385 | assert 'weather' not in self.db 386 | self.db['weather'].insert({'foo': 'bar'}) 387 | 388 | def test_columns(self): 389 | cols = self.tbl.columns 390 | assert len(list(cols)) == 4, 'column count mismatch' 391 | assert 'date' in cols and 'temperature' in cols and 'place' in cols 392 | 393 | def test_drop_column(self): 394 | try: 395 | self.tbl.drop_column('date') 396 | assert 'date' not in self.tbl.columns 397 | except RuntimeError: 398 | pass 399 | 400 | def test_iter(self): 401 | c = 0 402 | for row in self.tbl: 403 | c += 1 404 | assert c == len(self.tbl) 405 | 406 | def test_update(self): 407 | date = datetime(2011, 1, 2) 408 | res = self.tbl.update({ 409 | 'date': date, 410 | 'temperature': -10, 411 | 'place': TEST_CITY_1}, 412 | ['place', 'date'] 413 | ) 414 | assert res, 'update should return True' 415 | m = self.tbl.find_one(place=TEST_CITY_1, date=date) 416 | assert m['temperature'] == -10, \ 417 | 'new temp. should be -10 but is %d' % m['temperature'] 418 | 419 | def test_create_column(self): 420 | tbl = self.tbl 421 | tbl.create_column('foo', FLOAT) 422 | assert 'foo' in tbl.table.c, tbl.table.c 423 | assert isinstance(tbl.table.c['foo'].type, FLOAT), \ 424 | tbl.table.c['foo'].type 425 | assert 'foo' in tbl.columns, tbl.columns 426 | 427 | def test_ensure_column(self): 428 | tbl = self.tbl 429 | tbl.create_column_by_example('foo', 0.1) 430 | assert 'foo' in tbl.table.c, tbl.table.c 431 | assert isinstance(tbl.table.c['foo'].type, FLOAT), \ 432 | tbl.table.c['bar'].type 433 | tbl.create_column_by_example('bar', 1) 434 | assert 'bar' in tbl.table.c, tbl.table.c 435 | assert isinstance(tbl.table.c['bar'].type, INTEGER), \ 436 | tbl.table.c['bar'].type 437 | tbl.create_column_by_example('pippo', 'test') 438 | assert 'pippo' in tbl.table.c, tbl.table.c 439 | assert isinstance(tbl.table.c['pippo'].type, TEXT), \ 440 | tbl.table.c['pippo'].type 441 | 442 | def test_key_order(self): 443 | res = self.db.query('SELECT temperature, place FROM weather LIMIT 1') 444 | keys = list(res.next().keys()) 445 | assert keys[0] == 'temperature' 446 | assert keys[1] == 'place' 447 | 448 | def test_empty_query(self): 449 | empty = list(self.tbl.find(place='not in data')) 450 | assert len(empty) == 0, empty 451 | 452 | 453 | class Constructor(dict): 454 | """ Very simple low-functionality extension to ``dict`` to 455 | provide attribute access to dictionary contents""" 456 | def __getattr__(self, name): 457 | return self[name] 458 | 459 | 460 | class RowTypeTestCase(unittest.TestCase): 461 | 462 | def setUp(self): 463 | self.db = connect('sqlite:///:memory:', row_type=Constructor) 464 | self.tbl = self.db['weather'] 465 | for row in TEST_DATA: 466 | self.tbl.insert(row) 467 | 468 | def tearDown(self): 469 | for table in self.db.tables: 470 | self.db[table].drop() 471 | 472 | def test_find_one(self): 473 | self.tbl.insert({ 474 | 'date': datetime(2011, 1, 2), 475 | 'temperature': -10, 476 | 'place': 'Berlin'} 477 | ) 478 | d = self.tbl.find_one(place='Berlin') 479 | assert d['temperature'] == -10, d 480 | assert d.temperature == -10, d 481 | d = self.tbl.find_one(place='Atlantis') 482 | assert d is None, d 483 | 484 | def test_find(self): 485 | ds = list(self.tbl.find(place=TEST_CITY_1)) 486 | assert len(ds) == 3, ds 487 | for item in ds: 488 | assert isinstance(item, Constructor), item 489 | ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2)) 490 | assert len(ds) == 2, ds 491 | for item in ds: 492 | assert isinstance(item, Constructor), item 493 | 494 | def test_distinct(self): 495 | x = list(self.tbl.distinct('place')) 496 | assert len(x) == 2, x 497 | for item in x: 498 | assert isinstance(item, Constructor), item 499 | x = list(self.tbl.distinct('place', 'date')) 500 | assert len(x) == 6, x 501 | for item in x: 502 | assert isinstance(item, Constructor), item 503 | 504 | def test_iter(self): 505 | c = 0 506 | for row in self.tbl: 507 | c += 1 508 | assert isinstance(row, Constructor), row 509 | assert c == len(self.tbl) 510 | 511 | 512 | if __name__ == '__main__': 513 | unittest.main() 514 | -------------------------------------------------------------------------------- /dataset/table.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import warnings 3 | import threading 4 | 5 | from sqlalchemy.sql import and_, expression 6 | from sqlalchemy.sql.expression import ClauseElement 7 | from sqlalchemy.schema import Column, Index 8 | from sqlalchemy import func, select, false 9 | from sqlalchemy.schema import Table as SQLATable 10 | from sqlalchemy.exc import NoSuchTableError 11 | 12 | from dataset.types import Types 13 | from dataset.util import normalize_column_name, index_name, ensure_tuple 14 | from dataset.util import DatasetException, ResultIter, QUERY_STEP 15 | from dataset.util import normalize_table_name, pad_chunk_columns 16 | 17 | 18 | log = logging.getLogger(__name__) 19 | 20 | 21 | class Table(object): 22 | """Represents a table in a database and exposes common operations.""" 23 | PRIMARY_DEFAULT = 'id' 24 | 25 | def __init__(self, database, table_name, primary_id=None, 26 | primary_type=None, auto_create=False): 27 | """Initialise the table from database schema.""" 28 | self.db = database 29 | self.name = normalize_table_name(table_name) 30 | self._table = None 31 | self._indexes = [] 32 | self._primary_id = primary_id 33 | self._primary_type = primary_type 34 | self._auto_create = auto_create 35 | 36 | @property 37 | def exists(self): 38 | """Check to see if the table currently exists in the database.""" 39 | if self._table is not None: 40 | return True 41 | return self.name in self.db 42 | 43 | @property 44 | def table(self): 45 | """Get a reference to the table, which may be reflected or created.""" 46 | if self._table is None: 47 | self._sync_table(()) 48 | return self._table 49 | 50 | @property 51 | def columns(self): 52 | """Get a listing of all columns that exist in the table.""" 53 | if not self.exists: 54 | return [] 55 | return self.table.columns.keys() 56 | 57 | def has_column(self, column): 58 | """Check if a column with the given name exists on this table.""" 59 | return normalize_column_name(column) in self.columns 60 | 61 | def insert(self, row, ensure=None, types=None): 62 | """Add a ``row`` dict by inserting it into the table. 63 | 64 | If ``ensure`` is set, any of the keys of the row are not 65 | table columns, they will be created automatically. 66 | 67 | During column creation, ``types`` will be checked for a key 68 | matching the name of a column to be created, and the given 69 | SQLAlchemy column type will be used. Otherwise, the type is 70 | guessed from the row value, defaulting to a simple unicode 71 | field. 72 | :: 73 | 74 | data = dict(title='I am a banana!') 75 | table.insert(data) 76 | 77 | Returns the inserted row's primary key. 78 | """ 79 | row = self._sync_columns(row, ensure, types=types) 80 | res = self.db.executable.execute(self.table.insert(row)) 81 | if len(res.inserted_primary_key) > 0: 82 | return res.inserted_primary_key[0] 83 | return True 84 | 85 | def insert_ignore(self, row, keys, ensure=None, types=None): 86 | """Add a ``row`` dict into the table if the row does not exist. 87 | 88 | If rows with matching ``keys`` exist they will be added to the table. 89 | 90 | Setting ``ensure`` results in automatically creating missing columns, 91 | i.e., keys of the row are not table columns. 92 | 93 | During column creation, ``types`` will be checked for a key 94 | matching the name of a column to be created, and the given 95 | SQLAlchemy column type will be used. Otherwise, the type is 96 | guessed from the row value, defaulting to a simple unicode 97 | field. 98 | :: 99 | 100 | data = dict(id=10, title='I am a banana!') 101 | table.insert_ignore(data, ['id']) 102 | """ 103 | row = self._sync_columns(row, ensure, types=types) 104 | if self._check_ensure(ensure): 105 | self.create_index(keys) 106 | args, _ = self._keys_to_args(row, keys) 107 | if self.count(**args) == 0: 108 | return self.insert(row, ensure=False) 109 | return False 110 | 111 | def insert_many(self, rows, chunk_size=1000, ensure=None, types=None): 112 | """Add many rows at a time. 113 | 114 | This is significantly faster than adding them one by one. Per default 115 | the rows are processed in chunks of 1000 per commit, unless you specify 116 | a different ``chunk_size``. 117 | 118 | See :py:meth:`insert() ` for details on 119 | the other parameters. 120 | :: 121 | 122 | rows = [dict(name='Dolly')] * 10000 123 | table.insert_many(rows) 124 | """ 125 | chunk = [] 126 | for row in rows: 127 | row = self._sync_columns(row, ensure, types=types) 128 | chunk.append(row) 129 | if len(chunk) == chunk_size: 130 | chunk = pad_chunk_columns(chunk) 131 | self.table.insert().execute(chunk) 132 | chunk = [] 133 | 134 | if len(chunk): 135 | chunk = pad_chunk_columns(chunk) 136 | self.table.insert().execute(chunk) 137 | 138 | def update(self, row, keys, ensure=None, types=None, return_count=False): 139 | """Update a row in the table. 140 | 141 | The update is managed via the set of column names stated in ``keys``: 142 | they will be used as filters for the data to be updated, using the 143 | values in ``row``. 144 | :: 145 | 146 | # update all entries with id matching 10, setting their title columns 147 | data = dict(id=10, title='I am a banana!') 148 | table.update(data, ['id']) 149 | 150 | If keys in ``row`` update columns not present in the table, they will 151 | be created based on the settings of ``ensure`` and ``types``, matching 152 | the behavior of :py:meth:`insert() `. 153 | """ 154 | row = self._sync_columns(row, ensure, types=types) 155 | args, row = self._keys_to_args(row, keys) 156 | clause = self._args_to_clause(args) 157 | if not len(row): 158 | return self.count(clause) 159 | stmt = self.table.update(whereclause=clause, values=row) 160 | rp = self.db.executable.execute(stmt) 161 | if rp.supports_sane_rowcount(): 162 | return rp.rowcount 163 | if return_count: 164 | return self.count(clause) 165 | 166 | def upsert(self, row, keys, ensure=None, types=None): 167 | """An UPSERT is a smart combination of insert and update. 168 | 169 | If rows with matching ``keys`` exist they will be updated, otherwise a 170 | new row is inserted in the table. 171 | :: 172 | 173 | data = dict(id=10, title='I am a banana!') 174 | table.upsert(data, ['id']) 175 | """ 176 | row = self._sync_columns(row, ensure, types=types) 177 | if self._check_ensure(ensure): 178 | self.create_index(keys) 179 | row_count = self.update(row, keys, ensure=False, return_count=True) 180 | if row_count == 0: 181 | return self.insert(row, ensure=False) 182 | return True 183 | 184 | def delete(self, *clauses, **filters): 185 | """Delete rows from the table. 186 | 187 | Keyword arguments can be used to add column-based filters. The filter 188 | criterion will always be equality: 189 | :: 190 | 191 | table.delete(place='Berlin') 192 | 193 | If no arguments are given, all records are deleted. 194 | """ 195 | if not self.exists: 196 | return False 197 | clause = self._args_to_clause(filters, clauses=clauses) 198 | stmt = self.table.delete(whereclause=clause) 199 | rp = self.db.executable.execute(stmt) 200 | return rp.rowcount > 0 201 | 202 | def _reflect_table(self): 203 | """Load the tables definition from the database.""" 204 | with self.db.lock: 205 | try: 206 | self._table = SQLATable(self.name, 207 | self.db.metadata, 208 | schema=self.db.schema, 209 | autoload=True) 210 | except NoSuchTableError: 211 | pass 212 | 213 | def _threading_warn(self): 214 | if self.db.in_transaction and threading.active_count() > 1: 215 | warnings.warn("Changing the database schema inside a transaction " 216 | "in a multi-threaded environment is likely to lead " 217 | "to race conditions and synchronization issues.", 218 | RuntimeWarning) 219 | 220 | def _sync_table(self, columns): 221 | """Lazy load, create or adapt the table structure in the database.""" 222 | if self._table is None: 223 | # Load an existing table from the database. 224 | self._reflect_table() 225 | if self._table is None: 226 | # Create the table with an initial set of columns. 227 | if not self._auto_create: 228 | raise DatasetException("Table does not exist: %s" % self.name) 229 | # Keep the lock scope small because this is run very often. 230 | with self.db.lock: 231 | self._threading_warn() 232 | self._table = SQLATable(self.name, 233 | self.db.metadata, 234 | schema=self.db.schema) 235 | if self._primary_id is not False: 236 | # This can go wrong on DBMS like MySQL and SQLite where 237 | # tables cannot have no columns. 238 | primary_id = self._primary_id or self.PRIMARY_DEFAULT 239 | primary_type = self._primary_type or Types.integer 240 | increment = primary_type in [Types.integer, Types.bigint] 241 | column = Column(primary_id, primary_type, 242 | primary_key=True, 243 | autoincrement=increment) 244 | self._table.append_column(column) 245 | for column in columns: 246 | if not column.name == self._primary_id: 247 | self._table.append_column(column) 248 | self._table.create(self.db.executable, checkfirst=True) 249 | elif len(columns): 250 | with self.db.lock: 251 | self._reflect_table() 252 | self._threading_warn() 253 | for column in columns: 254 | if not self.has_column(column.name): 255 | self.db.op.add_column(self.name, column, self.db.schema) 256 | self._reflect_table() 257 | 258 | def _sync_columns(self, row, ensure, types=None): 259 | """Create missing columns (or the table) prior to writes. 260 | 261 | If automatic schema generation is disabled (``ensure`` is ``False``), 262 | this will remove any keys from the ``row`` for which there is no 263 | matching column. 264 | """ 265 | columns = self.columns 266 | ensure = self._check_ensure(ensure) 267 | types = types or {} 268 | types = {normalize_column_name(k): v for (k, v) in types.items()} 269 | out = {} 270 | sync_columns = [] 271 | for name, value in row.items(): 272 | name = normalize_column_name(name) 273 | if ensure and name not in columns: 274 | _type = types.get(name) 275 | if _type is None: 276 | _type = self.db.types.guess(value) 277 | sync_columns.append(Column(name, _type)) 278 | columns.append(name) 279 | if name in columns: 280 | out[name] = value 281 | self._sync_table(sync_columns) 282 | return out 283 | 284 | def _check_ensure(self, ensure): 285 | if ensure is None: 286 | return self.db.ensure_schema 287 | return ensure 288 | 289 | def _args_to_clause(self, args, clauses=()): 290 | clauses = list(clauses) 291 | for column, value in args.items(): 292 | if not self.has_column(column): 293 | clauses.append(false()) 294 | elif isinstance(value, (list, tuple)): 295 | clauses.append(self.table.c[column].in_(value)) 296 | elif isinstance(value, dict): 297 | key = list(value.keys())[0] 298 | if key in ('like',): 299 | clauses.append(self.table.c[column].like(value[key])) 300 | elif key in ('>', 'gt'): 301 | clauses.append(self.table.c[column] > value[key]) 302 | elif key in ('<', 'lt'): 303 | clauses.append(self.table.c[column] < value[key]) 304 | elif key in ('>=', 'gte'): 305 | clauses.append(self.table.c[column] >= value[key]) 306 | elif key in ('<=', 'lte'): 307 | clauses.append(self.table.c[column] <= value[key]) 308 | elif key in ('!=', '<>', 'not'): 309 | clauses.append(self.table.c[column] != value[key]) 310 | elif key in ('between', '..'): 311 | clauses.append(self.table.c[column].between(value[key][0], value[key][1])) 312 | else: 313 | clauses.append(false()) 314 | else: 315 | clauses.append(self.table.c[column] == value) 316 | return and_(*clauses) 317 | 318 | def _args_to_order_by(self, order_by): 319 | orderings = [] 320 | for ordering in ensure_tuple(order_by): 321 | if ordering is None: 322 | continue 323 | column = ordering.lstrip('-') 324 | if column not in self.table.columns: 325 | continue 326 | if ordering.startswith('-'): 327 | orderings.append(self.table.c[column].desc()) 328 | else: 329 | orderings.append(self.table.c[column].asc()) 330 | return orderings 331 | 332 | def _keys_to_args(self, row, keys): 333 | keys = ensure_tuple(keys) 334 | keys = [normalize_column_name(k) for k in keys] 335 | # keys = [self.has_column(k) for k in keys] 336 | row = row.copy() 337 | args = {k: row.pop(k) for k in keys if k in row} 338 | return args, row 339 | 340 | def create_column(self, name, type): 341 | """Create a new column ``name`` of a specified type. 342 | :: 343 | 344 | table.create_column('created_at', db.types.datetime) 345 | 346 | `type` corresponds to an SQLAlchemy type as described by `dataset.db.Types` 347 | """ 348 | name = normalize_column_name(name) 349 | if self.has_column(name): 350 | log.debug("Column exists: %s" % name) 351 | return 352 | self._sync_table((Column(name, type),)) 353 | 354 | def create_column_by_example(self, name, value): 355 | """ 356 | Explicitly create a new column ``name`` with a type that is appropriate 357 | to store the given example ``value``. The type is guessed in the same 358 | way as for the insert method with ``ensure=True``. 359 | :: 360 | 361 | table.create_column_by_example('length', 4.2) 362 | 363 | If a column of the same name already exists, no action is taken, even 364 | if it is not of the type we would have created. 365 | """ 366 | type_ = self.db.types.guess(value) 367 | self.create_column(name, type_) 368 | 369 | def drop_column(self, name): 370 | """Drop the column ``name``. 371 | :: 372 | table.drop_column('created_at') 373 | """ 374 | if self.db.engine.dialect.name == 'sqlite': 375 | raise RuntimeError("SQLite does not support dropping columns.") 376 | name = normalize_column_name(name) 377 | with self.db.lock: 378 | if not self.exists or not self.has_column(name): 379 | log.debug("Column does not exist: %s", name) 380 | return 381 | 382 | self._threading_warn() 383 | self.db.op.drop_column( 384 | self.table.name, 385 | name, 386 | self.table.schema 387 | ) 388 | self._reflect_table() 389 | 390 | def drop(self): 391 | """Drop the table from the database. 392 | 393 | Deletes both the schema and all the contents within it. 394 | """ 395 | with self.db.lock: 396 | if self.exists: 397 | self._threading_warn() 398 | self.table.drop(self.db.executable, checkfirst=True) 399 | self._table = None 400 | 401 | def has_index(self, columns): 402 | """Check if an index exists to cover the given ``columns``.""" 403 | if not self.exists: 404 | return False 405 | columns = set([normalize_column_name(c) for c in columns]) 406 | if columns in self._indexes: 407 | return True 408 | for column in columns: 409 | if not self.has_column(column): 410 | return False 411 | indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema) 412 | for index in indexes: 413 | if columns == set(index.get('column_names', [])): 414 | self._indexes.append(columns) 415 | return True 416 | return False 417 | 418 | def create_index(self, columns, name=None, **kw): 419 | """Create an index to speed up queries on a table. 420 | 421 | If no ``name`` is given a random name is created. 422 | :: 423 | 424 | table.create_index(['name', 'country']) 425 | """ 426 | columns = [normalize_column_name(c) for c in ensure_tuple(columns)] 427 | with self.db.lock: 428 | if not self.exists: 429 | raise DatasetException("Table has not been created yet.") 430 | 431 | for column in columns: 432 | if not self.has_column(column): 433 | return 434 | 435 | if not self.has_index(columns): 436 | self._threading_warn() 437 | name = name or index_name(self.name, columns) 438 | columns = [self.table.c[c] for c in columns] 439 | idx = Index(name, *columns, **kw) 440 | idx.create(self.db.executable) 441 | 442 | def find(self, *_clauses, **kwargs): 443 | """Perform a simple search on the table. 444 | 445 | Simply pass keyword arguments as ``filter``. 446 | :: 447 | 448 | results = table.find(country='France') 449 | results = table.find(country='France', year=1980) 450 | 451 | Using ``_limit``:: 452 | 453 | # just return the first 10 rows 454 | results = table.find(country='France', _limit=10) 455 | 456 | You can sort the results by single or multiple columns. Append a minus 457 | sign to the column name for descending order:: 458 | 459 | # sort results by a column 'year' 460 | results = table.find(country='France', order_by='year') 461 | # return all rows sorted by multiple columns (descending by year) 462 | results = table.find(order_by=['country', '-year']) 463 | 464 | To perform complex queries with advanced filters or to perform 465 | aggregation, use :py:meth:`db.query() ` 466 | instead. 467 | """ 468 | if not self.exists: 469 | return iter([]) 470 | 471 | _limit = kwargs.pop('_limit', None) 472 | _offset = kwargs.pop('_offset', 0) 473 | order_by = kwargs.pop('order_by', None) 474 | _streamed = kwargs.pop('_streamed', False) 475 | _step = kwargs.pop('_step', QUERY_STEP) 476 | if _step is False or _step == 0: 477 | _step = None 478 | 479 | order_by = self._args_to_order_by(order_by) 480 | args = self._args_to_clause(kwargs, clauses=_clauses) 481 | query = self.table.select(whereclause=args, 482 | limit=_limit, 483 | offset=_offset) 484 | if len(order_by): 485 | query = query.order_by(*order_by) 486 | 487 | conn = self.db.executable 488 | if _streamed: 489 | conn = self.db.engine.connect() 490 | conn = conn.execution_options(stream_results=True) 491 | 492 | return ResultIter(conn.execute(query), 493 | row_type=self.db.row_type, 494 | step=_step) 495 | 496 | def find_one(self, *args, **kwargs): 497 | """Get a single result from the table. 498 | 499 | Works just like :py:meth:`find() ` but returns one 500 | result, or ``None``. 501 | :: 502 | 503 | row = table.find_one(country='United States') 504 | """ 505 | if not self.exists: 506 | return None 507 | 508 | kwargs['_limit'] = 1 509 | kwargs['_step'] = None 510 | resiter = self.find(*args, **kwargs) 511 | try: 512 | for row in resiter: 513 | return row 514 | finally: 515 | resiter.close() 516 | 517 | def count(self, *_clauses, **kwargs): 518 | """Return the count of results for the given filter set.""" 519 | # NOTE: this does not have support for limit and offset since I can't 520 | # see how this is useful. Still, there might be compatibility issues 521 | # with people using these flags. Let's see how it goes. 522 | if not self.exists: 523 | return 0 524 | 525 | args = self._args_to_clause(kwargs, clauses=_clauses) 526 | query = select([func.count()], whereclause=args) 527 | query = query.select_from(self.table) 528 | rp = self.db.executable.execute(query) 529 | return rp.fetchone()[0] 530 | 531 | def __len__(self): 532 | """Return the number of rows in the table.""" 533 | return self.count() 534 | 535 | def distinct(self, *args, **_filter): 536 | """Return all the unique (distinct) values for the given ``columns``. 537 | :: 538 | 539 | # returns only one row per year, ignoring the rest 540 | table.distinct('year') 541 | # works with multiple columns, too 542 | table.distinct('year', 'country') 543 | # you can also combine this with a filter 544 | table.distinct('year', country='China') 545 | """ 546 | if not self.exists: 547 | return iter([]) 548 | 549 | filters = [] 550 | for column, value in _filter.items(): 551 | if not self.has_column(column): 552 | raise DatasetException("No such column: %s" % column) 553 | filters.append(self.table.c[column] == value) 554 | 555 | columns = [] 556 | for column in args: 557 | if isinstance(column, ClauseElement): 558 | filters.append(column) 559 | else: 560 | if not self.has_column(column): 561 | raise DatasetException("No such column: %s" % column) 562 | columns.append(self.table.c[column]) 563 | 564 | if not len(columns): 565 | return iter([]) 566 | 567 | q = expression.select(columns, 568 | distinct=True, 569 | whereclause=and_(*filters), 570 | order_by=[c.asc() for c in columns]) 571 | return self.db.query(q) 572 | 573 | # Legacy methods for running find queries. 574 | all = find 575 | 576 | def __iter__(self): 577 | """Return all rows of the table as simple dictionaries. 578 | 579 | Allows for iterating over all rows in the table without explicetly 580 | calling :py:meth:`find() `. 581 | :: 582 | 583 | for row in table: 584 | print(row) 585 | """ 586 | return self.find() 587 | 588 | def __repr__(self): 589 | """Get table representation.""" 590 | return '' % self.table.name 591 | --------------------------------------------------------------------------------