├── .bumpversion.cfg
├── .github
    ├── FUNDING.yml
    ├── dependabot.yml
    └── workflows
    │   └── build.yml
├── .gitignore
├── CHANGELOG.md
├── LICENSE.txt
├── Makefile
├── README.md
├── dataset
    ├── __init__.py
    ├── chunked.py
    ├── database.py
    ├── table.py
    ├── types.py
    └── util.py
├── docs
    ├── .gitignore
    ├── Makefile
    ├── _static
    │   ├── dataset-logo-dark.png
    │   ├── dataset-logo-light.png
    │   ├── dataset-logo.png
    │   └── knight_mozilla_on.jpg
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── install.rst
    ├── queries.rst
    ├── quickstart.rst
    └── requirements.txt
├── setup.cfg
├── setup.py
└── test
    ├── __init__.py
    ├── sample_data.py
    └── test_dataset.py


/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 1.6.2
 3 | tag_name = {new_version}
 4 | commit = True
 5 | tag = True
 6 | 
 7 | [bumpversion:file:setup.py]
 8 | search = version="{current_version}"
 9 | replace = version="{new_version}"
10 | 
11 | [bumpversion:file:dataset/__init__.py]
12 | 
13 | [bumpversion:file:docs/conf.py]
14 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: pudo
4 | open_collective: pudo
5 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: pip
4 |   directory: "/"
5 |   schedule:
6 |     interval: daily
7 |     time: "04:00"
8 |   open-pull-requests-limit: 100
9 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   python:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     services:
10 |       postgres:
11 |         image: postgres
12 |         env:
13 |           POSTGRES_USER: postgres
14 |           POSTGRES_PASSWORD: postgres
15 |           POSTGRES_DB: dataset
16 |         ports:
17 |           - 5432/tcp
18 |         options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
19 |       mysql:
20 |         image: mysql
21 |         env:
22 |           MYSQL_USER: mysql
23 |           MYSQL_PASSWORD: mysql
24 |           MYSQL_DATABASE: dataset
25 |           MYSQL_ROOT_PASSWORD: mysql
26 |         ports:
27 |           - 3306/tcp
28 |         options: --health-cmd="mysqladmin ping" --health-interval=5s --health-timeout=2s --health-retries=3
29 | 
30 |     steps:
31 |       - uses: actions/checkout@v1
32 |       - name: Show ref
33 |         run: |
34 |           echo "$GITHUB_REF"
35 |       - name: Set up Python
36 |         uses: actions/setup-python@v1
37 |         with:
38 |           python-version: "3.x"
39 |       - name: Install dependencies
40 |         env:
41 |           DEBIAN_FRONTEND: noninteractive
42 |         run: |
43 |           sudo apt-get -qq update
44 |           pip install -e ".[dev]"
45 |       - name: Run SQLite tests
46 |         env:
47 |           DATABASE_URL: "sqlite:///:memory:"
48 |         run: |
49 |           make test
50 |       - name: Run PostgreSQL tests
51 |         env:
52 |           DATABASE_URL: "postgresql://postgres:postgres@127.0.0.1:${{ job.services.postgres.ports[5432] }}/dataset"
53 |         run: |
54 |           make test
55 |       - name: Run mysql tests
56 |         env:
57 |           DATABASE_URL: "mysql+pymysql://mysql:mysql@127.0.0.1:${{ job.services.mysql.ports[3306] }}/dataset?charset=utf8"
58 |         run: |
59 |           make test
60 |       - name: Run flake8 to lint
61 |         run: |
62 |           flake8 --ignore=E501,E123,E124,E126,E127,E128 dataset
63 |       - name: Build a distribution
64 |         run: |
65 |           python setup.py sdist bdist_wheel
66 |       - name: Publish a Python distribution to PyPI
67 |         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
68 |         uses: pypa/gh-action-pypi-publish@master
69 |         with:
70 |           user: __token__
71 |           password: ${{ secrets.pypi_password }}
72 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.egg-info
 3 | *.egg
 4 | .eggs/
 5 | dist/*
 6 | .tox/*
 7 | .vscode/*
 8 | build/*
 9 | .DS_Store
10 | .watchr
11 | .coverage
12 | htmlcov/
13 | 
14 | *.pyo
15 | env3/*
16 | env/*
17 | Test.yaml
18 | Freezefile.yaml
19 | :memory:
20 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # dataset ChangeLog
 2 | 
 3 | *The changelog has only been started with version 0.3.12, previous
 4 | changes must be reconstructed from revision history.*
 5 | 
 6 | * 1.2.0: Add support for views, multiple comparison operators.
 7 |   Remove support for Python 2.
 8 | * 1.1.0: Introduce `types` system to shortcut for SQLA types.
 9 | * 1.0.0: Massive re-factor and code cleanup.
10 | * 0.6.0: Remove sqlite_datetime_fix for automatic int-casting of dates,
11 |   make table['foo', 'bar'] an alias for table.distinct('foo', 'bar'),
12 |   check validity of column and table names more thoroughly, rename
13 |   reflectMetadata constructor argument to reflect_metadata, fix
14 |   ResultIter to not leave queries open (so you can update in a loop).
15 | * 0.5.7: dataset Databases can now have customized row types. This allows,
16 |   for example, information to be retrieved in attribute-accessible dict
17 |   subclasses, such as stuf.
18 | * 0.5.4: Context manager for transactions, thanks to @victorkashirin.
19 | * 0.5.1: Fix a regression where empty queries would raise an exception.
20 | * 0.5: Improve overall code quality and testing, including Travis CI.
21 |   An advanced __getitem__ syntax which allowed for the specification 
22 |   of primary keys when getting a table was dropped. 
23 |   DDL is no longer run against a transaction, but the base connection. 
24 | * 0.4: Python 3 support and switch to alembic for migrations.
25 | * 0.3.15: Fixes to update and insertion of data, thanks to @cli248
26 |   and @abhinav-upadhyay.
27 | * 0.3.14: dataset went viral somehow. Thanks to @gtsafas for
28 |   refactorings, @alasdairnicol for fixing the Freezfile example in 
29 |   the documentation. @diegoguimaraes fixed the behaviour of insert to
30 |   return the newly-created primary key ID. table.find_one() now
31 |   returns a dict, not an SQLAlchemy ResultProxy. Slugs are now generated
32 |   using the Python-Slugify package, removing slug code from dataset. 
33 | * 0.3.13: Fixed logging, added support for transformations on result
34 |   rows to support slug generation in output (#28).
35 | * 0.3.12: Makes table primary key's types and names configurable, fixing
36 |   #19. Contributed by @dnatag.
37 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013, Open Knowledge Foundation, Friedrich Lindenberg,
 2 |   Gregor Aisch
 3 | 
 4 | Permission is hereby granted, free of charge, to any person obtaining a
 5 | copy of this software and associated documentation files (the
 6 | "Software"), to deal in the Software without restriction, including
 7 | without limitation the rights to use, copy, modify, merge, publish,
 8 | distribute, sublicense, and/or sell copies of the Software, and to
 9 | permit persons to whom the Software is furnished to do so, subject to
10 | the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included
13 | in all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all: clean test dists
 3 | 
 4 | .PHONY: test
 5 | test:
 6 | 	pytest
 7 | 
 8 | dists:
 9 | 	python setup.py sdist
10 | 	python setup.py bdist_wheel
11 | 
12 | release: dists
13 | 	pip install -q twine
14 | 	twine upload dist/*
15 | 
16 | .PHONY: clean
17 | clean:
18 | 	rm -rf dist build .eggs
19 | 	find . -name '*.egg-info' -exec rm -fr {} +
20 | 	find . -name '*.egg' -exec rm -f {} +
21 | 	find . -name '*.pyc' -exec rm -f {} +
22 | 	find . -name '*.pyo' -exec rm -f {} +
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | dataset: databases for lazy people
 2 | ==================================
 3 | 
 4 | ![build](https://github.com/pudo/dataset/workflows/build/badge.svg)
 5 | 
 6 | In short, **dataset** makes reading and writing data in databases as simple as reading and writing JSON files.
 7 | 
 8 | [Read the docs](https://dataset.readthedocs.io/)
 9 | 
10 | To install dataset, fetch it with ``pip``:
11 | 
12 | ```bash
13 | $ pip install dataset
14 | ```
15 | 
16 | **Note:** as of version 1.0, **dataset** is split into two packages, with the
17 | data export features now extracted into a stand-alone package, **datafreeze**.
18 | See the relevant repository [here](https://github.com/pudo/datafreeze).
19 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import warnings
 3 | from dataset.database import Database
 4 | from dataset.table import Table
 5 | from dataset.util import row_type
 6 | 
 7 | # shut up useless SA warning:
 8 | warnings.filterwarnings("ignore", "Unicode type received non-unicode bind param value.")
 9 | warnings.filterwarnings(
10 |     "ignore", "Skipping unsupported ALTER for creation of implicit constraint"
11 | )
12 | 
13 | __all__ = ["Database", "Table", "connect"]
14 | __version__ = "1.6.2"
15 | 
16 | 
17 | def connect(
18 |     url=None,
19 |     schema=None,
20 |     engine_kwargs=None,
21 |     ensure_schema=True,
22 |     row_type=row_type,
23 |     sqlite_wal_mode=True,
24 |     on_connect_statements=None,
25 | ):
26 |     """Opens a new connection to a database.
27 | 
28 |     *url* can be any valid `SQLAlchemy engine URL`_.  If *url* is not defined
29 |     it will try to use *DATABASE_URL* from environment variable.  Returns an
30 |     instance of :py:class:`Database <dataset.Database>`. Additionally,
31 |     *engine_kwargs* will be directly passed to SQLAlchemy, e.g. set
32 |     *engine_kwargs={'pool_recycle': 3600}* will avoid `DB connection timeout`_.
33 |     Set *row_type* to an alternate dict-like class to change the type of
34 |     container rows are stored in.::
35 | 
36 |         db = dataset.connect('sqlite:///factbook.db')
37 | 
38 |     One of the main features of `dataset` is to automatically create tables and
39 |     columns as data is inserted. This behaviour can optionally be disabled via
40 |     the `ensure_schema` argument. It can also be overridden in a lot of the
41 |     data manipulation methods using the `ensure` flag.
42 | 
43 |     If you want to run custom SQLite pragmas on database connect, you can add them
44 |     to on_connect_statements as a set of strings. You can view a full
45 |     `list of PRAGMAs here`_.
46 | 
47 |     .. _SQLAlchemy Engine URL: http://docs.sqlalchemy.org/en/latest/core/engines.html#sqlalchemy.create_engine
48 |     .. _DB connection timeout: http://docs.sqlalchemy.org/en/latest/core/pooling.html#setting-pool-recycle
49 |     .. _list of PRAGMAs here: https://www.sqlite.org/pragma.html
50 |     """
51 |     if url is None:
52 |         url = os.environ.get("DATABASE_URL", "sqlite://")
53 | 
54 |     return Database(
55 |         url,
56 |         schema=schema,
57 |         engine_kwargs=engine_kwargs,
58 |         ensure_schema=ensure_schema,
59 |         row_type=row_type,
60 |         sqlite_wal_mode=sqlite_wal_mode,
61 |         on_connect_statements=on_connect_statements,
62 |     )
63 | 


--------------------------------------------------------------------------------
/dataset/chunked.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | 
 4 | class InvalidCallback(ValueError):
 5 |     pass
 6 | 
 7 | 
 8 | class _Chunker(object):
 9 |     def __init__(self, table, chunksize, callback):
10 |         self.queue = []
11 |         self.table = table
12 |         self.chunksize = chunksize
13 |         if callback and not callable(callback):
14 |             raise InvalidCallback
15 |         self.callback = callback
16 | 
17 |     def flush(self):
18 |         self.queue.clear()
19 | 
20 |     def _queue_add(self, item):
21 |         self.queue.append(item)
22 |         if len(self.queue) >= self.chunksize:
23 |             self.flush()
24 | 
25 |     def __enter__(self):
26 |         return self
27 | 
28 |     def __exit__(self, exc_type, exc_val, exc_tb):
29 |         self.flush()
30 | 
31 | 
32 | class ChunkedInsert(_Chunker):
33 |     """Batch up insert operations
34 |     with ChunkedInsert(my_table) as inserter:
35 |         inserter(row)
36 | 
37 |     Rows will be inserted in groups of `chunksize` (defaulting to 1000). An
38 |     optional callback can be provided that will be called before the insert.
39 |     This callback takes one parameter which is the queue which is about to be
40 |     inserted into the database
41 |     """
42 | 
43 |     def __init__(self, table, chunksize=1000, callback=None):
44 |         self.fields = set()
45 |         super().__init__(table, chunksize, callback)
46 | 
47 |     def insert(self, item):
48 |         self.fields.update(item.keys())
49 |         super()._queue_add(item)
50 | 
51 |     def flush(self):
52 |         for item in self.queue:
53 |             for field in self.fields:
54 |                 item[field] = item.get(field)
55 |         if self.callback is not None:
56 |             self.callback(self.queue)
57 |         self.table.insert_many(self.queue)
58 |         super().flush()
59 | 
60 | 
61 | class ChunkedUpdate(_Chunker):
62 |     """Batch up update operations
63 |     with ChunkedUpdate(my_table) as updater:
64 |         updater(row)
65 | 
66 |     Rows will be updated in groups of `chunksize` (defaulting to 1000). An
67 |     optional callback can be provided that will be called before the update.
68 |     This callback takes one parameter which is the queue which is about to be
69 |     updated into the database
70 |     """
71 | 
72 |     def __init__(self, table, keys, chunksize=1000, callback=None):
73 |         self.keys = keys
74 |         super().__init__(table, chunksize, callback)
75 | 
76 |     def update(self, item):
77 |         super()._queue_add(item)
78 | 
79 |     def flush(self):
80 |         if self.callback is not None:
81 |             self.callback(self.queue)
82 |         self.queue.sort(key=dict.keys)
83 |         for fields, items in itertools.groupby(self.queue, key=dict.keys):
84 |             self.table.update_many(list(items), self.keys)
85 |         super().flush()
86 | 


--------------------------------------------------------------------------------
/dataset/database.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import threading
  3 | from urllib.parse import parse_qs, urlparse
  4 | 
  5 | from sqlalchemy import create_engine, inspect
  6 | from sqlalchemy.sql import text
  7 | from sqlalchemy.schema import MetaData
  8 | from sqlalchemy.util import safe_reraise
  9 | from sqlalchemy import event
 10 | 
 11 | from alembic.migration import MigrationContext
 12 | from alembic.operations import Operations
 13 | 
 14 | from dataset.table import Table
 15 | from dataset.util import ResultIter, row_type, safe_url, QUERY_STEP
 16 | from dataset.util import normalize_table_name
 17 | from dataset.types import Types
 18 | 
 19 | log = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class Database(object):
 23 |     """A database object represents a SQL database with multiple tables."""
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |         url,
 28 |         schema=None,
 29 |         engine_kwargs=None,
 30 |         ensure_schema=True,
 31 |         row_type=row_type,
 32 |         sqlite_wal_mode=True,
 33 |         on_connect_statements=None,
 34 |     ):
 35 |         """Configure and connect to the database."""
 36 |         if engine_kwargs is None:
 37 |             engine_kwargs = {}
 38 | 
 39 |         parsed_url = urlparse(url)
 40 |         # if parsed_url.scheme.lower() in 'sqlite':
 41 |         #     # ref: https://github.com/pudo/dataset/issues/163
 42 |         #     if 'poolclass' not in engine_kwargs:
 43 |         #         engine_kwargs['poolclass'] = StaticPool
 44 | 
 45 |         self.lock = threading.RLock()
 46 |         self.local = threading.local()
 47 |         self.connections = {}
 48 | 
 49 |         if len(parsed_url.query):
 50 |             query = parse_qs(parsed_url.query)
 51 |             if schema is None:
 52 |                 schema_qs = query.get("schema", query.get("searchpath", []))
 53 |                 if len(schema_qs):
 54 |                     schema = schema_qs.pop()
 55 | 
 56 |         self.schema = schema
 57 |         self.engine = create_engine(url, **engine_kwargs)
 58 |         self.is_postgres = self.engine.dialect.name == "postgresql"
 59 |         self.is_sqlite = self.engine.dialect.name == "sqlite"
 60 |         if on_connect_statements is None:
 61 |             on_connect_statements = []
 62 | 
 63 |         def _run_on_connect(dbapi_con, con_record):
 64 |             # reference:
 65 |             # https://stackoverflow.com/questions/9671490/how-to-set-sqlite-pragma-statements-with-sqlalchemy
 66 |             # https://stackoverflow.com/a/7831210/1890086
 67 |             for statement in on_connect_statements:
 68 |                 dbapi_con.execute(statement)
 69 | 
 70 |         if self.is_sqlite and parsed_url.path != "" and sqlite_wal_mode:
 71 |             # we only enable WAL mode for sqlite databases that are not in-memory
 72 |             on_connect_statements.append("PRAGMA journal_mode=WAL")
 73 | 
 74 |         if len(on_connect_statements):
 75 |             event.listen(self.engine, "connect", _run_on_connect)
 76 | 
 77 |         self.types = Types(is_postgres=self.is_postgres)
 78 |         self.url = url
 79 |         self.row_type = row_type
 80 |         self.ensure_schema = ensure_schema
 81 |         self._tables = {}
 82 | 
 83 |     @property
 84 |     def executable(self):
 85 |         """Connection against which statements will be executed."""
 86 |         with self.lock:
 87 |             tid = threading.get_ident()
 88 |             if tid not in self.connections:
 89 |                 self.connections[tid] = self.engine.connect()
 90 |             return self.connections[tid]
 91 | 
 92 |     @property
 93 |     def op(self):
 94 |         """Get an alembic operations context."""
 95 |         ctx = MigrationContext.configure(self.executable)
 96 |         return Operations(ctx)
 97 | 
 98 |     @property
 99 |     def inspect(self):
100 |         """Get a SQLAlchemy inspector."""
101 |         return inspect(self.executable)
102 | 
103 |     def has_table(self, name):
104 |         return self.inspect.has_table(name, schema=self.schema)
105 | 
106 |     @property
107 |     def metadata(self):
108 |         """Return a SQLAlchemy schema cache object."""
109 |         return MetaData(schema=self.schema, bind=self.executable)
110 | 
111 |     @property
112 |     def in_transaction(self):
113 |         """Check if this database is in a transactional context."""
114 |         if not hasattr(self.local, "tx"):
115 |             return False
116 |         return len(self.local.tx) > 0
117 | 
118 |     def _flush_tables(self):
119 |         """Clear the table metadata after transaction rollbacks."""
120 |         for table in self._tables.values():
121 |             table._table = None
122 | 
123 |     def begin(self):
124 |         """Enter a transaction explicitly.
125 | 
126 |         No data will be written until the transaction has been committed.
127 |         """
128 |         if not hasattr(self.local, "tx"):
129 |             self.local.tx = []
130 |         self.local.tx.append(self.executable.begin())
131 | 
132 |     def commit(self):
133 |         """Commit the current transaction.
134 | 
135 |         Make all statements executed since the transaction was begun permanent.
136 |         """
137 |         if hasattr(self.local, "tx") and self.local.tx:
138 |             tx = self.local.tx.pop()
139 |             tx.commit()
140 |             # Removed in 2020-12, I'm a bit worried this means that some DDL
141 |             # operations in transactions won't cause metadata to refresh any
142 |             # more:
143 |             # self._flush_tables()
144 | 
145 |     def rollback(self):
146 |         """Roll back the current transaction.
147 | 
148 |         Discard all statements executed since the transaction was begun.
149 |         """
150 |         if hasattr(self.local, "tx") and self.local.tx:
151 |             tx = self.local.tx.pop()
152 |             tx.rollback()
153 |             self._flush_tables()
154 | 
155 |     def __enter__(self):
156 |         """Start a transaction."""
157 |         self.begin()
158 |         return self
159 | 
160 |     def __exit__(self, error_type, error_value, traceback):
161 |         """End a transaction by committing or rolling back."""
162 |         if error_type is None:
163 |             try:
164 |                 self.commit()
165 |             except Exception:
166 |                 with safe_reraise():
167 |                     self.rollback()
168 |         else:
169 |             self.rollback()
170 | 
171 |     def close(self):
172 |         """Close database connections. Makes this object unusable."""
173 |         with self.lock:
174 |             for conn in self.connections.values():
175 |                 conn.close()
176 |             self.connections.clear()
177 |         self.engine.dispose()
178 |         self._tables = {}
179 |         self.engine = None
180 | 
181 |     @property
182 |     def tables(self):
183 |         """Get a listing of all tables that exist in the database."""
184 |         return self.inspect.get_table_names(schema=self.schema)
185 | 
186 |     @property
187 |     def views(self):
188 |         """Get a listing of all views that exist in the database."""
189 |         return self.inspect.get_view_names(schema=self.schema)
190 | 
191 |     def __contains__(self, table_name):
192 |         """Check if the given table name exists in the database."""
193 |         try:
194 |             table_name = normalize_table_name(table_name)
195 |             if table_name in self.tables:
196 |                 return True
197 |             if table_name in self.views:
198 |                 return True
199 |             return False
200 |         except ValueError:
201 |             return False
202 | 
203 |     def create_table(
204 |         self, table_name, primary_id=None, primary_type=None, primary_increment=None
205 |     ):
206 |         """Create a new table.
207 | 
208 |         Either loads a table or creates it if it doesn't exist yet. You can
209 |         define the name and type of the primary key field, if a new table is to
210 |         be created. The default is to create an auto-incrementing integer,
211 |         ``id``. You can also set the primary key to be a string or big integer.
212 |         The caller will be responsible for the uniqueness of ``primary_id`` if
213 |         it is defined as a text type. You can disable auto-increment behaviour
214 |         for numeric primary keys by setting `primary_increment` to `False`.
215 | 
216 |         Returns a :py:class:`Table <dataset.Table>` instance.
217 |         ::
218 | 
219 |             table = db.create_table('population')
220 | 
221 |             # custom id and type
222 |             table2 = db.create_table('population2', 'age')
223 |             table3 = db.create_table('population3',
224 |                                      primary_id='city',
225 |                                      primary_type=db.types.text)
226 |             # custom length of String
227 |             table4 = db.create_table('population4',
228 |                                      primary_id='city',
229 |                                      primary_type=db.types.string(25))
230 |             # no primary key
231 |             table5 = db.create_table('population5',
232 |                                      primary_id=False)
233 |         """
234 |         assert not isinstance(
235 |             primary_type, str
236 |         ), "Text-based primary_type support is dropped, use db.types."
237 |         table_name = normalize_table_name(table_name)
238 |         with self.lock:
239 |             if table_name not in self._tables:
240 |                 self._tables[table_name] = Table(
241 |                     self,
242 |                     table_name,
243 |                     primary_id=primary_id,
244 |                     primary_type=primary_type,
245 |                     primary_increment=primary_increment,
246 |                     auto_create=True,
247 |                 )
248 |             return self._tables.get(table_name)
249 | 
250 |     def load_table(self, table_name):
251 |         """Load a table.
252 | 
253 |         This will fail if the tables does not already exist in the database. If
254 |         the table exists, its columns will be reflected and are available on
255 |         the :py:class:`Table <dataset.Table>` object.
256 | 
257 |         Returns a :py:class:`Table <dataset.Table>` instance.
258 |         ::
259 | 
260 |             table = db.load_table('population')
261 |         """
262 |         table_name = normalize_table_name(table_name)
263 |         with self.lock:
264 |             if table_name not in self._tables:
265 |                 self._tables[table_name] = Table(self, table_name)
266 |             return self._tables.get(table_name)
267 | 
268 |     def get_table(
269 |         self,
270 |         table_name,
271 |         primary_id=None,
272 |         primary_type=None,
273 |         primary_increment=None,
274 |     ):
275 |         """Load or create a table.
276 | 
277 |         This is now the same as ``create_table``.
278 |         ::
279 | 
280 |             table = db.get_table('population')
281 |             # you can also use the short-hand syntax:
282 |             table = db['population']
283 |         """
284 |         if not self.ensure_schema:
285 |             return self.load_table(table_name)
286 |         return self.create_table(
287 |             table_name, primary_id, primary_type, primary_increment
288 |         )
289 | 
290 |     def __getitem__(self, table_name):
291 |         """Get a given table."""
292 |         return self.get_table(table_name)
293 | 
294 |     def _ipython_key_completions_(self):
295 |         """Completion for table names with IPython."""
296 |         return self.tables
297 | 
298 |     def query(self, query, *args, **kwargs):
299 |         """Run a statement on the database directly.
300 | 
301 |         Allows for the execution of arbitrary read/write queries. A query can
302 |         either be a plain text string, or a `SQLAlchemy expression
303 |         <http://docs.sqlalchemy.org/en/latest/core/tutorial.html#selecting>`_.
304 |         If a plain string is passed in, it will be converted to an expression
305 |         automatically.
306 | 
307 |         Further positional and keyword arguments will be used for parameter
308 |         binding. To include a positional argument in your query, use question
309 |         marks in the query (i.e. ``SELECT * FROM tbl WHERE a = ?``). For
310 |         keyword arguments, use a bind parameter (i.e. ``SELECT * FROM tbl
311 |         WHERE a = :foo``).
312 |         ::
313 | 
314 |             statement = 'SELECT user, COUNT(*) c FROM photos GROUP BY user'
315 |             for row in db.query(statement):
316 |                 print(row['user'], row['c'])
317 | 
318 |         The returned iterator will yield each result sequentially.
319 |         """
320 |         if isinstance(query, str):
321 |             query = text(query)
322 |         _step = kwargs.pop("_step", QUERY_STEP)
323 |         if _step is False or _step == 0:
324 |             _step = None
325 |         rp = self.executable.execute(query, *args, **kwargs)
326 |         return ResultIter(rp, row_type=self.row_type, step=_step)
327 | 
328 |     def __repr__(self):
329 |         """Text representation contains the URL."""
330 |         return "<Database(%s)>" % safe_url(self.url)
331 | 


--------------------------------------------------------------------------------
/dataset/table.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import warnings
  3 | import threading
  4 | from banal import ensure_list
  5 | 
  6 | from sqlalchemy import func, select, false
  7 | from sqlalchemy.sql import and_, expression
  8 | from sqlalchemy.sql.expression import bindparam, ClauseElement
  9 | from sqlalchemy.schema import Column, Index
 10 | from sqlalchemy.schema import Table as SQLATable
 11 | from sqlalchemy.exc import NoSuchTableError
 12 | 
 13 | from dataset.types import Types, MYSQL_LENGTH_TYPES
 14 | from dataset.util import index_name
 15 | from dataset.util import DatasetException, ResultIter, QUERY_STEP
 16 | from dataset.util import normalize_table_name, pad_chunk_columns
 17 | from dataset.util import normalize_column_name, normalize_column_key
 18 | 
 19 | 
 20 | log = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class Table(object):
 24 |     """Represents a table in a database and exposes common operations."""
 25 | 
 26 |     PRIMARY_DEFAULT = "id"
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         database,
 31 |         table_name,
 32 |         primary_id=None,
 33 |         primary_type=None,
 34 |         primary_increment=None,
 35 |         auto_create=False,
 36 |     ):
 37 |         """Initialise the table from database schema."""
 38 |         self.db = database
 39 |         self.name = normalize_table_name(table_name)
 40 |         self._table = None
 41 |         self._columns = None
 42 |         self._indexes = []
 43 |         self._primary_id = (
 44 |             primary_id if primary_id is not None else self.PRIMARY_DEFAULT
 45 |         )
 46 |         self._primary_type = primary_type if primary_type is not None else Types.integer
 47 |         if primary_increment is None:
 48 |             primary_increment = self._primary_type in (Types.integer, Types.bigint)
 49 |         self._primary_increment = primary_increment
 50 |         self._auto_create = auto_create
 51 | 
 52 |     @property
 53 |     def exists(self):
 54 |         """Check to see if the table currently exists in the database."""
 55 |         if self._table is not None:
 56 |             return True
 57 |         return self.name in self.db
 58 | 
 59 |     @property
 60 |     def table(self):
 61 |         """Get a reference to the table, which may be reflected or created."""
 62 |         if self._table is None:
 63 |             self._sync_table(())
 64 |         return self._table
 65 | 
 66 |     @property
 67 |     def _column_keys(self):
 68 |         """Get a dictionary of all columns and their case mapping."""
 69 |         if not self.exists:
 70 |             return {}
 71 |         with self.db.lock:
 72 |             if self._columns is None:
 73 |                 # Initialise the table if it doesn't exist
 74 |                 table = self.table
 75 |                 self._columns = {}
 76 |                 for column in table.columns:
 77 |                     name = normalize_column_name(column.name)
 78 |                     key = normalize_column_key(name)
 79 |                     if key in self._columns:
 80 |                         log.warning("Duplicate column: %s", name)
 81 |                     self._columns[key] = name
 82 |             return self._columns
 83 | 
 84 |     @property
 85 |     def columns(self):
 86 |         """Get a listing of all columns that exist in the table."""
 87 |         return list(self._column_keys.values())
 88 | 
 89 |     def has_column(self, column):
 90 |         """Check if a column with the given name exists on this table."""
 91 |         key = normalize_column_key(normalize_column_name(column))
 92 |         return key in self._column_keys
 93 | 
 94 |     def _get_column_name(self, name):
 95 |         """Find the best column name with case-insensitive matching."""
 96 |         name = normalize_column_name(name)
 97 |         key = normalize_column_key(name)
 98 |         return self._column_keys.get(key, name)
 99 | 
100 |     def insert(self, row, ensure=None, types=None):
101 |         """Add a ``row`` dict by inserting it into the table.
102 | 
103 |         If ``ensure`` is set, any of the keys of the row are not
104 |         table columns, they will be created automatically.
105 | 
106 |         During column creation, ``types`` will be checked for a key
107 |         matching the name of a column to be created, and the given
108 |         SQLAlchemy column type will be used. Otherwise, the type is
109 |         guessed from the row value, defaulting to a simple unicode
110 |         field.
111 |         ::
112 | 
113 |             data = dict(title='I am a banana!')
114 |             table.insert(data)
115 | 
116 |         Returns the inserted row's primary key.
117 |         """
118 |         row = self._sync_columns(row, ensure, types=types)
119 |         res = self.db.executable.execute(self.table.insert(row))
120 |         if len(res.inserted_primary_key) > 0:
121 |             return res.inserted_primary_key[0]
122 |         return True
123 | 
124 |     def insert_ignore(self, row, keys, ensure=None, types=None):
125 |         """Add a ``row`` dict into the table if the row does not exist.
126 | 
127 |         If rows with matching ``keys`` exist no change is made.
128 | 
129 |         Setting ``ensure`` results in automatically creating missing columns,
130 |         i.e., keys of the row are not table columns.
131 | 
132 |         During column creation, ``types`` will be checked for a key
133 |         matching the name of a column to be created, and the given
134 |         SQLAlchemy column type will be used. Otherwise, the type is
135 |         guessed from the row value, defaulting to a simple unicode
136 |         field.
137 |         ::
138 | 
139 |             data = dict(id=10, title='I am a banana!')
140 |             table.insert_ignore(data, ['id'])
141 |         """
142 |         row = self._sync_columns(row, ensure, types=types)
143 |         if self._check_ensure(ensure):
144 |             self.create_index(keys)
145 |         args, _ = self._keys_to_args(row, keys)
146 |         if self.count(**args) == 0:
147 |             return self.insert(row, ensure=False)
148 |         return False
149 | 
150 |     def insert_many(self, rows, chunk_size=1000, ensure=None, types=None):
151 |         """Add many rows at a time.
152 | 
153 |         This is significantly faster than adding them one by one. Per default
154 |         the rows are processed in chunks of 1000 per commit, unless you specify
155 |         a different ``chunk_size``.
156 | 
157 |         See :py:meth:`insert() <dataset.Table.insert>` for details on
158 |         the other parameters.
159 |         ::
160 | 
161 |             rows = [dict(name='Dolly')] * 10000
162 |             table.insert_many(rows)
163 |         """
164 |         # Sync table before inputting rows.
165 |         sync_row = {}
166 |         for row in rows:
167 |             # Only get non-existing columns.
168 |             sync_keys = list(sync_row.keys())
169 |             for key in [k for k in row.keys() if k not in sync_keys]:
170 |                 # Get a sample of the new column(s) from the row.
171 |                 sync_row[key] = row[key]
172 |         self._sync_columns(sync_row, ensure, types=types)
173 | 
174 |         # Get columns name list to be used for padding later.
175 |         columns = sync_row.keys()
176 | 
177 |         chunk = []
178 |         for index, row in enumerate(rows):
179 |             chunk.append(row)
180 | 
181 |             # Insert when chunk_size is fulfilled or this is the last row
182 |             if len(chunk) == chunk_size or index == len(rows) - 1:
183 |                 chunk = pad_chunk_columns(chunk, columns)
184 |                 self.table.insert().execute(chunk)
185 |                 chunk = []
186 | 
187 |     def update(self, row, keys, ensure=None, types=None, return_count=False):
188 |         """Update a row in the table.
189 | 
190 |         The update is managed via the set of column names stated in ``keys``:
191 |         they will be used as filters for the data to be updated, using the
192 |         values in ``row``.
193 |         ::
194 | 
195 |             # update all entries with id matching 10, setting their title
196 |             # columns
197 |             data = dict(id=10, title='I am a banana!')
198 |             table.update(data, ['id'])
199 | 
200 |         If keys in ``row`` update columns not present in the table, they will
201 |         be created based on the settings of ``ensure`` and ``types``, matching
202 |         the behavior of :py:meth:`insert() <dataset.Table.insert>`.
203 |         """
204 |         row = self._sync_columns(row, ensure, types=types)
205 |         args, row = self._keys_to_args(row, keys)
206 |         clause = self._args_to_clause(args)
207 |         if not len(row):
208 |             return self.count(clause)
209 |         stmt = self.table.update(whereclause=clause, values=row)
210 |         rp = self.db.executable.execute(stmt)
211 |         if rp.supports_sane_rowcount():
212 |             return rp.rowcount
213 |         if return_count:
214 |             return self.count(clause)
215 | 
216 |     def update_many(self, rows, keys, chunk_size=1000, ensure=None, types=None):
217 |         """Update many rows in the table at a time.
218 | 
219 |         This is significantly faster than updating them one by one. Per default
220 |         the rows are processed in chunks of 1000 per commit, unless you specify
221 |         a different ``chunk_size``.
222 | 
223 |         See :py:meth:`update() <dataset.Table.update>` for details on
224 |         the other parameters.
225 |         """
226 |         keys = ensure_list(keys)
227 | 
228 |         chunk = []
229 |         columns = []
230 |         for index, row in enumerate(rows):
231 |             columns.extend(
232 |                 col for col in row.keys() if (col not in columns) and (col not in keys)
233 |             )
234 | 
235 |             # bindparam requires names to not conflict (cannot be "id" for id)
236 |             for key in keys:
237 |                 row["_%s" % key] = row[key]
238 |                 row.pop(key)
239 |             chunk.append(row)
240 | 
241 |             # Update when chunk_size is fulfilled or this is the last row
242 |             if len(chunk) == chunk_size or index == len(rows) - 1:
243 |                 cl = [self.table.c[k] == bindparam("_%s" % k) for k in keys]
244 |                 stmt = self.table.update(
245 |                     whereclause=and_(True, *cl),
246 |                     values={col: bindparam(col, required=False) for col in columns},
247 |                 )
248 |                 self.db.executable.execute(stmt, chunk)
249 |                 chunk = []
250 | 
251 |     def upsert(self, row, keys, ensure=None, types=None):
252 |         """An UPSERT is a smart combination of insert and update.
253 | 
254 |         If rows with matching ``keys`` exist they will be updated, otherwise a
255 |         new row is inserted in the table.
256 |         ::
257 | 
258 |             data = dict(id=10, title='I am a banana!')
259 |             table.upsert(data, ['id'])
260 |         """
261 |         row = self._sync_columns(row, ensure, types=types)
262 |         if self._check_ensure(ensure):
263 |             self.create_index(keys)
264 |         row_count = self.update(row, keys, ensure=False, return_count=True)
265 |         if row_count == 0:
266 |             return self.insert(row, ensure=False)
267 |         return True
268 | 
269 |     def upsert_many(self, rows, keys, chunk_size=1000, ensure=None, types=None):
270 |         """
271 |         Sorts multiple input rows into upserts and inserts. Inserts are passed
272 |         to insert and upserts are updated.
273 | 
274 |         See :py:meth:`upsert() <dataset.Table.upsert>` and
275 |         :py:meth:`insert_many() <dataset.Table.insert_many>`.
276 |         """
277 |         # Removing a bulk implementation in 5e09aba401. Doing this one by one
278 |         # is incredibly slow, but doesn't run into issues with column creation.
279 |         for row in rows:
280 |             self.upsert(row, keys, ensure=ensure, types=types)
281 | 
282 |     def delete(self, *clauses, **filters):
283 |         """Delete rows from the table.
284 | 
285 |         Keyword arguments can be used to add column-based filters. The filter
286 |         criterion will always be equality:
287 |         ::
288 | 
289 |             table.delete(place='Berlin')
290 | 
291 |         If no arguments are given, all records are deleted.
292 |         """
293 |         if not self.exists:
294 |             return False
295 |         clause = self._args_to_clause(filters, clauses=clauses)
296 |         stmt = self.table.delete(whereclause=clause)
297 |         rp = self.db.executable.execute(stmt)
298 |         return rp.rowcount > 0
299 | 
300 |     def _reflect_table(self):
301 |         """Load the tables definition from the database."""
302 |         with self.db.lock:
303 |             self._columns = None
304 |             try:
305 |                 self._table = SQLATable(
306 |                     self.name, self.db.metadata, schema=self.db.schema, autoload=True
307 |                 )
308 |             except NoSuchTableError:
309 |                 self._table = None
310 | 
311 |     def _threading_warn(self):
312 |         if self.db.in_transaction and threading.active_count() > 1:
313 |             warnings.warn(
314 |                 "Changing the database schema inside a transaction "
315 |                 "in a multi-threaded environment is likely to lead "
316 |                 "to race conditions and synchronization issues.",
317 |                 RuntimeWarning,
318 |             )
319 | 
320 |     def _sync_table(self, columns):
321 |         """Lazy load, create or adapt the table structure in the database."""
322 |         if self._table is None:
323 |             # Load an existing table from the database.
324 |             self._reflect_table()
325 |         if self._table is None:
326 |             # Create the table with an initial set of columns.
327 |             if not self._auto_create:
328 |                 raise DatasetException("Table does not exist: %s" % self.name)
329 |             # Keep the lock scope small because this is run very often.
330 |             with self.db.lock:
331 |                 self._threading_warn()
332 |                 self._table = SQLATable(
333 |                     self.name, self.db.metadata, schema=self.db.schema
334 |                 )
335 |                 if self._primary_id is not False:
336 |                     # This can go wrong on DBMS like MySQL and SQLite where
337 |                     # tables cannot have no columns.
338 |                     column = Column(
339 |                         self._primary_id,
340 |                         self._primary_type,
341 |                         primary_key=True,
342 |                         autoincrement=self._primary_increment,
343 |                     )
344 |                     self._table.append_column(column)
345 |                 for column in columns:
346 |                     if not column.name == self._primary_id:
347 |                         self._table.append_column(column)
348 |                 self._table.create(self.db.executable, checkfirst=True)
349 |                 self._columns = None
350 |         elif len(columns):
351 |             with self.db.lock:
352 |                 self._reflect_table()
353 |                 self._threading_warn()
354 |                 for column in columns:
355 |                     if not self.has_column(column.name):
356 |                         self.db.op.add_column(self.name, column, schema=self.db.schema)
357 |                 self._reflect_table()
358 | 
359 |     def _sync_columns(self, row, ensure, types=None):
360 |         """Create missing columns (or the table) prior to writes.
361 | 
362 |         If automatic schema generation is disabled (``ensure`` is ``False``),
363 |         this will remove any keys from the ``row`` for which there is no
364 |         matching column.
365 |         """
366 |         ensure = self._check_ensure(ensure)
367 |         types = types or {}
368 |         types = {self._get_column_name(k): v for (k, v) in types.items()}
369 |         out = {}
370 |         sync_columns = {}
371 |         for name, value in row.items():
372 |             name = self._get_column_name(name)
373 |             if self.has_column(name):
374 |                 out[name] = value
375 |             elif ensure:
376 |                 _type = types.get(name)
377 |                 if _type is None:
378 |                     _type = self.db.types.guess(value)
379 |                 sync_columns[name] = Column(name, _type)
380 |                 out[name] = value
381 |         self._sync_table(sync_columns.values())
382 |         return out
383 | 
384 |     def _check_ensure(self, ensure):
385 |         if ensure is None:
386 |             return self.db.ensure_schema
387 |         return ensure
388 | 
389 |     def _generate_clause(self, column, op, value):
390 |         if op in ("like",):
391 |             return self.table.c[column].like(value)
392 |         if op in ("ilike",):
393 |             return self.table.c[column].ilike(value)
394 |         if op in ("notlike",):
395 |             return self.table.c[column].notlike(value)
396 |         if op in ("notilike",):
397 |             return self.table.c[column].notilike(value)
398 |         if op in (">", "gt"):
399 |             return self.table.c[column] > value
400 |         if op in ("<", "lt"):
401 |             return self.table.c[column] < value
402 |         if op in (">=", "gte"):
403 |             return self.table.c[column] >= value
404 |         if op in ("<=", "lte"):
405 |             return self.table.c[column] <= value
406 |         if op in ("=", "==", "is"):
407 |             return self.table.c[column] == value
408 |         if op in ("!=", "<>", "not"):
409 |             return self.table.c[column] != value
410 |         if op in ("in",):
411 |             return self.table.c[column].in_(value)
412 |         if op in ("notin",):
413 |             return self.table.c[column].notin_(value)
414 |         if op in ("between", ".."):
415 |             start, end = value
416 |             return self.table.c[column].between(start, end)
417 |         if op in ("startswith",):
418 |             return self.table.c[column].like(value + "%")
419 |         if op in ("endswith",):
420 |             return self.table.c[column].like("%" + value)
421 |         return false()
422 | 
423 |     def _args_to_clause(self, args, clauses=()):
424 |         clauses = list(clauses)
425 |         for column, value in args.items():
426 |             column = self._get_column_name(column)
427 |             if not self.has_column(column):
428 |                 clauses.append(false())
429 |             elif isinstance(value, (list, tuple, set)):
430 |                 clauses.append(self._generate_clause(column, "in", value))
431 |             elif isinstance(value, dict):
432 |                 for op, op_value in value.items():
433 |                     clauses.append(self._generate_clause(column, op, op_value))
434 |             else:
435 |                 clauses.append(self._generate_clause(column, "=", value))
436 |         return and_(True, *clauses)
437 | 
438 |     def _args_to_order_by(self, order_by):
439 |         orderings = []
440 |         for ordering in ensure_list(order_by):
441 |             if ordering is None:
442 |                 continue
443 |             column = ordering.lstrip("-")
444 |             column = self._get_column_name(column)
445 |             if not self.has_column(column):
446 |                 continue
447 |             if ordering.startswith("-"):
448 |                 orderings.append(self.table.c[column].desc())
449 |             else:
450 |                 orderings.append(self.table.c[column].asc())
451 |         return orderings
452 | 
453 |     def _keys_to_args(self, row, keys):
454 |         keys = [self._get_column_name(k) for k in ensure_list(keys)]
455 |         row = row.copy()
456 |         args = {k: row.pop(k, None) for k in keys}
457 |         return args, row
458 | 
459 |     def create_column(self, name, type, **kwargs):
460 |         """Create a new column ``name`` of a specified type.
461 |         ::
462 | 
463 |             table.create_column('created_at', db.types.datetime)
464 | 
465 |         `type` corresponds to an SQLAlchemy type as described by
466 |         `dataset.db.Types`. Additional keyword arguments are passed
467 |         to the constructor of `Column`, so that default values, and
468 |         options like `nullable` and `unique` can be set.
469 |         ::
470 | 
471 |             table.create_column('key', unique=True, nullable=False)
472 |             table.create_column('food', default='banana')
473 |         """
474 |         name = self._get_column_name(name)
475 |         if self.has_column(name):
476 |             log.debug("Column exists: %s" % name)
477 |             return
478 |         self._sync_table((Column(name, type, **kwargs),))
479 | 
480 |     def create_column_by_example(self, name, value):
481 |         """
482 |         Explicitly create a new column ``name`` with a type that is appropriate
483 |         to store the given example ``value``.  The type is guessed in the same
484 |         way as for the insert method with ``ensure=True``.
485 |         ::
486 | 
487 |             table.create_column_by_example('length', 4.2)
488 | 
489 |         If a column of the same name already exists, no action is taken, even
490 |         if it is not of the type we would have created.
491 |         """
492 |         type_ = self.db.types.guess(value)
493 |         self.create_column(name, type_)
494 | 
495 |     def drop_column(self, name):
496 |         """
497 |         Drop the column ``name``.
498 |         ::
499 | 
500 |             table.drop_column('created_at')
501 | 
502 |         """
503 |         if self.db.engine.dialect.name == "sqlite":
504 |             raise RuntimeError("SQLite does not support dropping columns.")
505 |         name = self._get_column_name(name)
506 |         with self.db.lock:
507 |             if not self.exists or not self.has_column(name):
508 |                 log.debug("Column does not exist: %s", name)
509 |                 return
510 | 
511 |             self._threading_warn()
512 |             self.db.op.drop_column(self.table.name, name, schema=self.table.schema)
513 |             self._reflect_table()
514 | 
515 |     def drop(self):
516 |         """Drop the table from the database.
517 | 
518 |         Deletes both the schema and all the contents within it.
519 |         """
520 |         with self.db.lock:
521 |             if self.exists:
522 |                 self._threading_warn()
523 |                 self.table.drop(self.db.executable, checkfirst=True)
524 |                 self._table = None
525 |                 self._columns = None
526 |                 self.db._tables.pop(self.name, None)
527 | 
528 |     def has_index(self, columns):
529 |         """Check if an index exists to cover the given ``columns``."""
530 |         if not self.exists:
531 |             return False
532 |         columns = set([self._get_column_name(c) for c in ensure_list(columns)])
533 |         if columns in self._indexes:
534 |             return True
535 |         for column in columns:
536 |             if not self.has_column(column):
537 |                 return False
538 |         indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema)
539 |         for index in indexes:
540 |             idx_columns = index.get("column_names", [])
541 |             if len(columns.intersection(idx_columns)) == len(columns):
542 |                 self._indexes.append(columns)
543 |                 return True
544 |         if self.table.primary_key is not None:
545 |             pk_columns = [c.name for c in self.table.primary_key.columns]
546 |             if len(columns.intersection(pk_columns)) == len(columns):
547 |                 self._indexes.append(columns)
548 |                 return True
549 |         return False
550 | 
551 |     def create_index(self, columns, name=None, **kw):
552 |         """Create an index to speed up queries on a table.
553 | 
554 |         If no ``name`` is given a random name is created.
555 |         ::
556 | 
557 |             table.create_index(['name', 'country'])
558 |         """
559 |         columns = [self._get_column_name(c) for c in ensure_list(columns)]
560 |         with self.db.lock:
561 |             if not self.exists:
562 |                 raise DatasetException("Table has not been created yet.")
563 | 
564 |             for column in columns:
565 |                 if not self.has_column(column):
566 |                     return
567 | 
568 |             if not self.has_index(columns):
569 |                 self._threading_warn()
570 |                 name = name or index_name(self.name, columns)
571 |                 columns = [self.table.c[c] for c in columns]
572 | 
573 |                 # MySQL crashes out if you try to index very long text fields,
574 |                 # apparently. This defines (a somewhat random) prefix that
575 |                 # will be captured by the index, after which I assume the engine
576 |                 # conducts a more linear scan:
577 |                 mysql_length = {}
578 |                 for col in columns:
579 |                     if isinstance(col.type, MYSQL_LENGTH_TYPES):
580 |                         mysql_length[col.name] = 10
581 |                 kw["mysql_length"] = mysql_length
582 | 
583 |                 idx = Index(name, *columns, **kw)
584 |                 idx.create(self.db.executable)
585 | 
586 |     def find(self, *_clauses, **kwargs):
587 |         """Perform a simple search on the table.
588 | 
589 |         Simply pass keyword arguments as ``filter``.
590 |         ::
591 | 
592 |             results = table.find(country='France')
593 |             results = table.find(country='France', year=1980)
594 | 
595 |         Using ``_limit``::
596 | 
597 |             # just return the first 10 rows
598 |             results = table.find(country='France', _limit=10)
599 | 
600 |         You can sort the results by single or multiple columns. Append a minus
601 |         sign to the column name for descending order::
602 | 
603 |             # sort results by a column 'year'
604 |             results = table.find(country='France', order_by='year')
605 |             # return all rows sorted by multiple columns (descending by year)
606 |             results = table.find(order_by=['country', '-year'])
607 | 
608 |         You can also submit filters based on criteria other than equality,
609 |         see :ref:`advanced_filters` for details.
610 | 
611 |         To run more complex queries with JOINs, or to perform GROUP BY-style
612 |         aggregation, you can also use :py:meth:`db.query() <dataset.Database.query>`
613 |         to run raw SQL queries instead.
614 |         """
615 |         if not self.exists:
616 |             return iter([])
617 | 
618 |         _limit = kwargs.pop("_limit", None)
619 |         _offset = kwargs.pop("_offset", 0)
620 |         order_by = kwargs.pop("order_by", None)
621 |         _streamed = kwargs.pop("_streamed", False)
622 |         _step = kwargs.pop("_step", QUERY_STEP)
623 |         if _step is False or _step == 0:
624 |             _step = None
625 | 
626 |         order_by = self._args_to_order_by(order_by)
627 |         args = self._args_to_clause(kwargs, clauses=_clauses)
628 |         query = self.table.select(whereclause=args, limit=_limit, offset=_offset)
629 |         if len(order_by):
630 |             query = query.order_by(*order_by)
631 | 
632 |         conn = self.db.executable
633 |         if _streamed:
634 |             conn = self.db.engine.connect()
635 |             conn = conn.execution_options(stream_results=True)
636 | 
637 |         return ResultIter(conn.execute(query), row_type=self.db.row_type, step=_step)
638 | 
639 |     def find_one(self, *args, **kwargs):
640 |         """Get a single result from the table.
641 | 
642 |         Works just like :py:meth:`find() <dataset.Table.find>` but returns one
643 |         result, or ``None``.
644 |         ::
645 | 
646 |             row = table.find_one(country='United States')
647 |         """
648 |         if not self.exists:
649 |             return None
650 | 
651 |         kwargs["_limit"] = 1
652 |         kwargs["_step"] = None
653 |         resiter = self.find(*args, **kwargs)
654 |         try:
655 |             for row in resiter:
656 |                 return row
657 |         finally:
658 |             resiter.close()
659 | 
660 |     def count(self, *_clauses, **kwargs):
661 |         """Return the count of results for the given filter set."""
662 |         # NOTE: this does not have support for limit and offset since I can't
663 |         # see how this is useful. Still, there might be compatibility issues
664 |         # with people using these flags. Let's see how it goes.
665 |         if not self.exists:
666 |             return 0
667 | 
668 |         args = self._args_to_clause(kwargs, clauses=_clauses)
669 |         query = select([func.count()], whereclause=args)
670 |         query = query.select_from(self.table)
671 |         rp = self.db.executable.execute(query)
672 |         return rp.fetchone()[0]
673 | 
674 |     def __len__(self):
675 |         """Return the number of rows in the table."""
676 |         return self.count()
677 | 
678 |     def distinct(self, *args, **kwargs):
679 |         """Return all the unique (distinct) values for the given ``columns``.
680 |         ::
681 | 
682 |             # returns only one row per year, ignoring the rest
683 |             table.distinct('year')
684 |             # works with multiple columns, too
685 |             table.distinct('year', 'country')
686 |             # you can also combine this with a filter
687 |             table.distinct('year', country='China')
688 |         """
689 |         if not self.exists:
690 |             return iter([])
691 | 
692 |         columns = []
693 |         clauses = []
694 |         for column in args:
695 |             if isinstance(column, ClauseElement):
696 |                 clauses.append(column)
697 |             else:
698 |                 if not self.has_column(column):
699 |                     raise DatasetException("No such column: %s" % column)
700 |                 columns.append(self.table.c[column])
701 | 
702 |         _limit = kwargs.pop("_limit", None)
703 |         _offset = kwargs.pop("_offset", 0)
704 |         clause = self._args_to_clause(kwargs, clauses=clauses)
705 |         if not len(columns):
706 |             return iter([])
707 | 
708 |         q = expression.select(
709 |             columns,
710 |             distinct=True,
711 |             whereclause=clause,
712 |             limit=_limit,
713 |             offset=_offset,
714 |             order_by=[c.asc() for c in columns],
715 |         )
716 |         return self.db.query(q)
717 | 
718 |     # Legacy methods for running find queries.
719 |     all = find
720 | 
721 |     def __iter__(self):
722 |         """Return all rows of the table as simple dictionaries.
723 | 
724 |         Allows for iterating over all rows in the table without explicitly
725 |         calling :py:meth:`find() <dataset.Table.find>`.
726 |         ::
727 | 
728 |             for row in table:
729 |                 print(row)
730 |         """
731 |         return self.find()
732 | 
733 |     def __repr__(self):
734 |         """Get table representation."""
735 |         return "<Table(%s)>" % self.table.name
736 | 


--------------------------------------------------------------------------------
/dataset/types.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime, date
 2 | 
 3 | from sqlalchemy import Integer, UnicodeText, Float, BigInteger
 4 | from sqlalchemy import String, Boolean, Date, DateTime, Unicode, JSON
 5 | from sqlalchemy.dialects.postgresql import JSONB
 6 | from sqlalchemy.types import TypeEngine, _Binary
 7 | 
 8 | MYSQL_LENGTH_TYPES = (String, _Binary)
 9 | 
10 | 
11 | class Types(object):
12 |     """A holder class for easy access to SQLAlchemy type names."""
13 | 
14 |     integer = Integer
15 |     string = Unicode
16 |     text = UnicodeText
17 |     float = Float
18 |     bigint = BigInteger
19 |     boolean = Boolean
20 |     date = Date
21 |     datetime = DateTime
22 | 
23 |     def __init__(self, is_postgres=None):
24 |         self.json = JSONB if is_postgres else JSON
25 | 
26 |     def guess(self, sample):
27 |         """Given a single sample, guess the column type for the field.
28 | 
29 |         If the sample is an instance of an SQLAlchemy type, the type will be
30 |         used instead.
31 |         """
32 |         if isinstance(sample, TypeEngine):
33 |             return sample
34 |         if isinstance(sample, bool):
35 |             return self.boolean
36 |         elif isinstance(sample, int):
37 |             return self.bigint
38 |         elif isinstance(sample, float):
39 |             return self.float
40 |         elif isinstance(sample, datetime):
41 |             return self.datetime
42 |         elif isinstance(sample, date):
43 |             return self.date
44 |         elif isinstance(sample, dict):
45 |             return self.json
46 |         return self.text
47 | 


--------------------------------------------------------------------------------
/dataset/util.py:
--------------------------------------------------------------------------------
  1 | from hashlib import sha1
  2 | from urllib.parse import urlparse, urlencode
  3 | from collections import OrderedDict
  4 | from sqlalchemy.exc import ResourceClosedError
  5 | 
  6 | QUERY_STEP = 1000
  7 | row_type = OrderedDict
  8 | 
  9 | try:
 10 |     # SQLAlchemy > 1.4.0, new row model.
 11 |     from sqlalchemy.engine import Row  # noqa
 12 | 
 13 |     def convert_row(row_type, row):
 14 |         if row is None:
 15 |             return None
 16 |         return row_type(row._mapping.items())
 17 | 
 18 | 
 19 | except ImportError:
 20 |     # SQLAlchemy < 1.4.0, no _mapping.
 21 | 
 22 |     def convert_row(row_type, row):
 23 |         if row is None:
 24 |             return None
 25 |         return row_type(row.items())
 26 | 
 27 | 
 28 | class DatasetException(Exception):
 29 |     pass
 30 | 
 31 | 
 32 | def iter_result_proxy(rp, step=None):
 33 |     """Iterate over the ResultProxy."""
 34 |     while True:
 35 |         if step is None:
 36 |             chunk = rp.fetchall()
 37 |         else:
 38 |             chunk = rp.fetchmany(size=step)
 39 |         if not chunk:
 40 |             break
 41 |         for row in chunk:
 42 |             yield row
 43 | 
 44 | 
 45 | def make_sqlite_url(
 46 |     path,
 47 |     cache=None,
 48 |     timeout=None,
 49 |     mode=None,
 50 |     check_same_thread=True,
 51 |     immutable=False,
 52 |     nolock=False,
 53 | ):
 54 |     # NOTE: this PR
 55 |     # https://gerrit.sqlalchemy.org/c/sqlalchemy/sqlalchemy/+/1474/
 56 |     # added support for URIs in SQLite
 57 |     # The full list of supported URIs is a combination of:
 58 |     # https://docs.python.org/3/library/sqlite3.html#sqlite3.connect
 59 |     # and
 60 |     # https://www.sqlite.org/uri.html
 61 |     params = {}
 62 |     if cache:
 63 |         assert cache in ("shared", "private")
 64 |         params["cache"] = cache
 65 |     if timeout:
 66 |         # Note: if timeout is None, it uses the default timeout
 67 |         params["timeout"] = timeout
 68 |     if mode:
 69 |         assert mode in ("ro", "rw", "rwc")
 70 |         params["mode"] = mode
 71 |     if nolock:
 72 |         params["nolock"] = 1
 73 |     if immutable:
 74 |         params["immutable"] = 1
 75 |     if not check_same_thread:
 76 |         params["check_same_thread"] = "false"
 77 |     if not params:
 78 |         return "sqlite:///" + path
 79 |     params["uri"] = "true"
 80 |     return "sqlite:///file:" + path + "?" + urlencode(params)
 81 | 
 82 | 
 83 | class ResultIter(object):
 84 |     """SQLAlchemy ResultProxies are not iterable to get a
 85 |     list of dictionaries. This is to wrap them."""
 86 | 
 87 |     def __init__(self, result_proxy, row_type=row_type, step=None):
 88 |         self.row_type = row_type
 89 |         self.result_proxy = result_proxy
 90 |         try:
 91 |             self.keys = list(result_proxy.keys())
 92 |             self._iter = iter_result_proxy(result_proxy, step=step)
 93 |         except ResourceClosedError:
 94 |             self.keys = []
 95 |             self._iter = iter([])
 96 | 
 97 |     def __next__(self):
 98 |         try:
 99 |             return convert_row(self.row_type, next(self._iter))
100 |         except StopIteration:
101 |             self.close()
102 |             raise
103 | 
104 |     next = __next__
105 | 
106 |     def __iter__(self):
107 |         return self
108 | 
109 |     def close(self):
110 |         self.result_proxy.close()
111 | 
112 | 
113 | def normalize_column_name(name):
114 |     """Check if a string is a reasonable thing to use as a column name."""
115 |     if not isinstance(name, str):
116 |         raise ValueError("%r is not a valid column name." % name)
117 | 
118 |     # limit to 63 characters
119 |     name = name.strip()[:63]
120 |     # column names can be 63 *bytes* max in postgresql
121 |     if isinstance(name, str):
122 |         while len(name.encode("utf-8")) >= 64:
123 |             name = name[: len(name) - 1]
124 | 
125 |     if not len(name) or "." in name or "-" in name:
126 |         raise ValueError("%r is not a valid column name." % name)
127 |     return name
128 | 
129 | 
130 | def normalize_column_key(name):
131 |     """Return a comparable column name."""
132 |     if name is None or not isinstance(name, str):
133 |         return None
134 |     return name.upper().strip().replace(" ", "")
135 | 
136 | 
137 | def normalize_table_name(name):
138 |     """Check if the table name is obviously invalid."""
139 |     if not isinstance(name, str):
140 |         raise ValueError("Invalid table name: %r" % name)
141 |     name = name.strip()[:63]
142 |     if not len(name):
143 |         raise ValueError("Invalid table name: %r" % name)
144 |     return name
145 | 
146 | 
147 | def safe_url(url):
148 |     """Remove password from printed connection URLs."""
149 |     parsed = urlparse(url)
150 |     if parsed.password is not None:
151 |         pwd = ":%s@" % parsed.password
152 |         url = url.replace(pwd, ":*****@")
153 |     return url
154 | 
155 | 
156 | def index_name(table, columns):
157 |     """Generate an artificial index name."""
158 |     sig = "||".join(columns)
159 |     key = sha1(sig.encode("utf-8")).hexdigest()[:16]
160 |     return "ix_%s_%s" % (table, key)
161 | 
162 | 
163 | def pad_chunk_columns(chunk, columns):
164 |     """Given a set of items to be inserted, make sure they all have the
165 |     same columns by padding columns with None if they are missing."""
166 |     for record in chunk:
167 |         for column in columns:
168 |             record.setdefault(column, None)
169 |     return chunk
170 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/dataset.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/dataset.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/dataset"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/dataset"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/_static/dataset-logo-dark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pudo/dataset/b2ab09e58c6f17334e4286009b20887b6a8a8fac/docs/_static/dataset-logo-dark.png


--------------------------------------------------------------------------------
/docs/_static/dataset-logo-light.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pudo/dataset/b2ab09e58c6f17334e4286009b20887b6a8a8fac/docs/_static/dataset-logo-light.png


--------------------------------------------------------------------------------
/docs/_static/dataset-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pudo/dataset/b2ab09e58c6f17334e4286009b20887b6a8a8fac/docs/_static/dataset-logo.png


--------------------------------------------------------------------------------
/docs/_static/knight_mozilla_on.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pudo/dataset/b2ab09e58c6f17334e4286009b20887b6a8a8fac/docs/_static/knight_mozilla_on.jpg


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | API documentation
 3 | =================
 4 | 
 5 | Connecting
 6 | ----------
 7 | 
 8 | .. autofunction:: dataset.connect
 9 | 
10 | Notes
11 | -----
12 | 
13 | * **dataset** uses SQLAlchemy connection pooling when connecting to the
14 |   database. There is no way of explicitly clearing or shutting down the
15 |   connections, other than having the dataset instance garbage collected.
16 | 
17 | Database
18 | --------
19 | 
20 | .. autoclass:: dataset.Database
21 |    :members: tables, get_table, create_table, load_table, query, begin, commit, rollback
22 |    :special-members:
23 | 
24 | 
25 | Table
26 | -----
27 | 
28 | .. autoclass:: dataset.Table
29 |    :members: columns, find, find_one, all, count, distinct, insert, insert_ignore, insert_many, update, update_many, upsert, upsert_many, delete, create_column, create_column_by_example, drop_column, create_index, drop, has_column, has_index
30 |    :special-members: __len__, __iter__
31 | 
32 | 
33 | Data Export
34 | -----------
35 | 
36 |   **Note:** Data exporting has been extracted into a stand-alone package, datafreeze. See the relevant repository here_.
37 | 
38 | .. _here: https://github.com/pudo/datafreeze
39 | 
40 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # dataset documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Apr  1 18:41:21 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | sys.path.insert(0, os.path.abspath("../"))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | # needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"]
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ["_templates"]
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = ".rst"
 35 | 
 36 | # The master toctree document.
 37 | master_doc = "index"
 38 | 
 39 | # General information about the project.
 40 | project = u"dataset"
 41 | copyright = u"2013-2021, Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer"
 42 | 
 43 | # The version info for the project you're documenting, acts as replacement for
 44 | # |version| and |release|, also used in various other places throughout the
 45 | # built documents.
 46 | #
 47 | # The short X.Y version.
 48 | version = "1.6.2"
 49 | # The full version, including alpha/beta/rc tags.
 50 | release = "1.6.2"
 51 | 
 52 | # There are two options for replacing |today|: either, you set today to some
 53 | # non-false value, then it is used:
 54 | # today = ''
 55 | # Else, today_fmt is used as the format for a strftime call.
 56 | # today_fmt = '%B %d, %Y'
 57 | 
 58 | # List of patterns, relative to source directory, that match files and
 59 | # directories to ignore when looking for source files.
 60 | exclude_patterns = ["_build"]
 61 | 
 62 | # The reST default role (used for this markup: `text`) to use for all documents.
 63 | # default_role = None
 64 | 
 65 | # If true, '()' will be appended to :func: etc. cross-reference text.
 66 | # add_function_parentheses = True
 67 | 
 68 | # If true, the current module name will be prepended to all description
 69 | # unit titles (such as .. function::).
 70 | # add_module_names = True
 71 | 
 72 | # If true, sectionauthor and moduleauthor directives will be shown in the
 73 | # output. They are ignored by default.
 74 | # show_authors = False
 75 | 
 76 | # The name of the Pygments (syntax highlighting) style to use.
 77 | pygments_style = "sphinx"
 78 | 
 79 | # A list of ignored prefixes for module index sorting.
 80 | # modindex_common_prefix = []
 81 | 
 82 | # If true, keep warnings as "system message" paragraphs in the built documents.
 83 | # keep_warnings = False
 84 | 
 85 | 
 86 | # -- Options for HTML output ---------------------------------------------------
 87 | 
 88 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 89 | # a list of builtin themes.
 90 | html_theme = "furo"
 91 | 
 92 | html_static_path = ["_static"]
 93 | html_theme_options = {
 94 |     "light_logo": "dataset-logo-light.png",
 95 |     "dark_logo": "dataset-logo-dark.png",
 96 | }
 97 | 
 98 | # If true, links to the reST sources are added to the pages.
 99 | html_show_sourcelink = False
100 | 
101 | 
102 | # Output file base name for HTML help builder.
103 | htmlhelp_basename = "datasetdoc"
104 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. dataset documentation master file, created by
 2 |    sphinx-quickstart on Mon Apr  1 18:41:21 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | dataset: databases for lazy people
 7 | ==================================
 8 | 
 9 | .. toctree::
10 |    :hidden:
11 | 
12 | 
13 | Although managing data in relational databases has plenty of benefits, they're
14 | rarely used in day-to-day work with small to medium scale datasets. But why is
15 | that? Why do we see an awful lot of data stored in static files in CSV or JSON
16 | format, even though they are hard to query and update incrementally?
17 | 
18 | The answer is that **programmers are lazy**, and thus they tend to prefer the
19 | easiest solution they find. And in **Python**, a database isn't the simplest
20 | solution for storing a bunch of structured data. This is what **dataset** is
21 | going to change!
22 | 
23 | **dataset** provides a simple abstraction layer that removes most direct SQL
24 | statements without the necessity for a full ORM model - essentially, databases
25 | can be used like a JSON file or NoSQL store.
26 | 
27 | A simple data loading script using **dataset** might look like this:
28 | 
29 | ::
30 | 
31 |    import dataset
32 | 
33 |    db = dataset.connect('sqlite:///:memory:')
34 | 
35 |    table = db['sometable']
36 |    table.insert(dict(name='John Doe', age=37))
37 |    table.insert(dict(name='Jane Doe', age=34, gender='female'))
38 | 
39 |    john = table.find_one(name='John Doe')
40 | 
41 | 
42 | Here is `similar code, without dataset <https://gist.github.com/gka/5296492>`_.
43 | 
44 | 
45 | Features
46 | --------
47 | 
48 | * **Automatic schema**: If a table or column is written that does not
49 |   exist in the database, it will be created automatically.
50 | * **Upserts**: Records are either created or updated, depending on
51 |   whether an existing version can be found.
52 | * **Query helpers** for simple queries such as :py:meth:`all <dataset.Table.all>` rows in a table or
53 |   all :py:meth:`distinct <dataset.Table.distinct>` values across a set of columns.
54 | * **Compatibility**: Being built on top of `SQLAlchemy <http://www.sqlalchemy.org/>`_, ``dataset`` works with all major databases, such as SQLite, PostgreSQL and MySQL.
55 | 
56 | Contents
57 | --------
58 | 
59 | .. toctree::
60 |    :maxdepth: 2
61 | 
62 |    install
63 |    quickstart
64 |    api
65 |    queries
66 | 
67 | Contributors
68 | ------------
69 | 
70 | ``dataset`` is written and maintained by `Friedrich Lindenberg <https://github.com/pudo>`_,
71 | `Gregor Aisch <https://github.com/gka>`_ and `Stefan Wehrmeyer <https://github.com/stefanw>`_.
72 | Its code is largely based on the preceding libraries `sqlaload <https://github.com/okfn/sqlaload>`_
73 | and datafreeze. And of course, we're standing on the `shoulders of giants <http://www.sqlalchemy.org/>`_.
74 | 
75 | Our cute little `naked mole rat <http://www.youtube.com/watch?feature=player_detailpage&v=A5DcOEzW1wA#t=14s>`_ was drawn by `Johannes Koch <http://chechuchape.com/>`_.
76 | 


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Installation Guide
 3 | ==================
 4 | 
 5 | The easiest way is to install ``dataset`` from the `Python Package Index
 6 | <https://pypi.python.org/pypi/dataset/>`_ using ``pip`` or ``easy_install``:
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |    $ pip install dataset
11 | 
12 | To install it manually simply download the repository from Github:
13 | 
14 | .. code-block:: bash
15 | 
16 |    $ git clone git://github.com/pudo/dataset.git
17 |    $ cd dataset/
18 |    $ python setup.py install
19 | 
20 | Depending on the type of database backend, you may also need to install a
21 | database specific driver package. For MySQL, this is ``MySQLdb``, for Postgres
22 | its ``psycopg2``. SQLite support is integrated into Python.
23 | 


--------------------------------------------------------------------------------
/docs/queries.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _advanced_filters:
 3 | 
 4 | Advanced filters
 5 | ================
 6 | 
 7 | ``dataset`` provides two methods for running queries: :py:meth:`table.find() <dataset.Table.find>`
 8 | and :py:meth:`db.query() <dataset.Database.query>`. The table find helper method provides 
 9 | limited, but simple filtering options::
10 | 
11 |     results = table.find(column={operator: value})
12 |     # e.g.:
13 |     results = table.find(name={'like': '%mole rat%'})
14 | 
15 | A special form is using keyword searches on specific columns::
16 | 
17 |     results = table.find(value=5)
18 |     # equal to:
19 |     results = table.find(value={'=': 5})
20 | 
21 |     # Lists, tuples and sets are turned into `IN` queries:
22 |     results = table.find(category=('foo', 'bar'))
23 |     # equal to:
24 |     results = table.find(value={'in': ('foo', 'bar')})
25 | 
26 | The following comparison operators are supported:
27 | 
28 | ============== ============================================================
29 | Operator       Description
30 | ============== ============================================================
31 | gt, >          Greater than
32 | lt, <          Less than
33 | gte, >=        Greater or equal
34 | lte, <=        Less or equal
35 | !=, <>, not    Not equal to a single value
36 | in             Value is in the given sequence
37 | notin          Value is not in the given sequence
38 | like, ilike    Text search, ILIKE is case-insensitive. Use ``%`` as a wildcard
39 | notlike        Like text search, except check if pattern does not exist
40 | between, ..    Value is between two values in the given tuple
41 | startswith     String starts with
42 | endswith       String ends with
43 | ============== ============================================================
44 | 
45 | Querying for a specific value on a column that does not exist on the table
46 | will return no results.
47 | 
48 | You can also pass additional SQLAlchemy clauses into the :py:meth:`table.find() <dataset.Table.find>` method
49 | by falling back onto the SQLAlchemy core objects wrapped by `dataset`::
50 | 
51 |     # Get the column `city` from the dataset table:
52 |     column = table.table.columns.city
53 |     # Define a SQLAlchemy clause:
54 |     clause = column.ilike('amsterda%')
55 |     # Query using the clause:
56 |     results = table.find(clause)
57 | 
58 | This can also be used to define combined OR clauses if needed (e.g. `city = 'Bla' OR country = 'Foo'`).
59 | 
60 | Queries using raw SQL
61 | ---------------------
62 | 
63 | To run more complex queries with JOINs, or to perform GROUP BY-style
64 | aggregation, you can also use :py:meth:`db.query() <dataset.Database.query>`
65 | to run raw SQL queries instead. This also supports parameterisation to avoid
66 | SQL injections.
67 | 
68 | Finally, you should consider falling back to SQLAlchemy_ core to construct
69 | queries if you are looking for a programmatic, composable method of generating
70 | SQL in Python.
71 | 
72 | .. _SQLALchemy: https://docs.sqlalchemy.org/


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Quickstart
  3 | ==========
  4 | 
  5 | 
  6 | Hi, welcome to the twelve-minute quick-start tutorial.
  7 | 
  8 | Connecting to a database
  9 | ------------------------
 10 | 
 11 | At first you need to import the dataset package :) ::
 12 | 
 13 |    import dataset
 14 | 
 15 | To connect to a database you need to identify it by its `URL <http://docs.sqlalchemy.org/en/latest/core/engines.html#engine-creation-api>`_, which basically is a string of the form ``"dialect://user:password@host/dbname"``. Here are a few examples for different database backends::
 16 | 
 17 |    # connecting to a SQLite database
 18 |    db = dataset.connect('sqlite:///mydatabase.db')
 19 | 
 20 |    # connecting to a MySQL database with user and password
 21 |    db = dataset.connect('mysql://user:password@localhost/mydatabase')
 22 | 
 23 |    # connecting to a PostgreSQL database
 24 |    db = dataset.connect('postgresql://scott:tiger@localhost:5432/mydatabase')
 25 | 
 26 | It is also possible to define the `URL` as an environment variable called `DATABASE_URL`
 27 | so you can initialize database connection without explicitly passing an `URL`::
 28 | 
 29 |    db = dataset.connect()
 30 | 
 31 | Depending on which database you're using, you may also have to install
 32 | the database bindings to support that database. SQLite is included in
 33 | the Python core, but PostgreSQL requires ``psycopg2`` to be installed.
 34 | MySQL can be enabled by installing the ``mysql-db`` drivers.
 35 | 
 36 | 
 37 | Storing data
 38 | ------------
 39 | 
 40 | To store some data you need to get a reference to a table. You don't need
 41 | to worry about whether the table already exists or not, since dataset
 42 | will create it automatically::
 43 | 
 44 |    # get a reference to the table 'user'
 45 |    table = db['user']
 46 | 
 47 | Now storing data in a table is a matter of a single function call. Just
 48 | pass a `dict`_ to *insert*. Note that you don't need to create the columns
 49 | *name* and *age* – dataset will do this automatically::
 50 | 
 51 |    # Insert a new record.
 52 |    table.insert(dict(name='John Doe', age=46, country='China'))
 53 | 
 54 |    # dataset will create "missing" columns any time you insert a dict with an unknown key
 55 |    table.insert(dict(name='Jane Doe', age=37, country='France', gender='female'))
 56 | 
 57 | .. _dict: http://docs.python.org/2/library/stdtypes.html#dict
 58 | 
 59 | Updating existing entries is easy, too::
 60 | 
 61 |    table.update(dict(name='John Doe', age=47), ['name'])
 62 | 
 63 | The list of filter columns given as the second argument filter using the
 64 | values in the first column. If you don't want to update over a
 65 | particular value, just use the auto-generated ``id`` column.
 66 | 
 67 | Using Transactions
 68 | ------------------
 69 | 
 70 | You can group a set of database updates in a transaction. In that case, all updates
 71 | are committed at once or, in case of exception, all of them are reverted. Transactions
 72 | are supported through a context manager, so they can be used through a ``with``
 73 | statement::
 74 | 
 75 |     with dataset.connect() as tx:
 76 |         tx['user'].insert(dict(name='John Doe', age=46, country='China'))
 77 | 
 78 | You can get same functionality by invoking the methods :py:meth:`begin() <dataset.Table.begin>`,
 79 | :py:meth:`commit() <dataset.Table.commit>` and :py:meth:`rollback() <dataset.Table.rollback>`
 80 | explicitly::
 81 | 
 82 |     db = dataset.connect()
 83 |     db.begin()
 84 |     try:
 85 |         db['user'].insert(dict(name='John Doe', age=46, country='China'))
 86 |         db.commit()
 87 |     except:
 88 |         db.rollback()
 89 | 
 90 | Nested transactions are supported too::
 91 | 
 92 |     db = dataset.connect()
 93 |     with db as tx1:
 94 |         tx1['user'].insert(dict(name='John Doe', age=46, country='China'))
 95 |         with db as tx2:
 96 |             tx2['user'].insert(dict(name='Jane Doe', age=37, country='France', gender='female'))
 97 | 
 98 | 
 99 | 
100 | Inspecting databases and tables
101 | -------------------------------
102 | 
103 | When dealing with unknown databases we might want to check their structure
104 | first. To start exploring, let's find out what tables are stored in the
105 | database:
106 | 
107 |    >>> print(db.tables)
108 |    [u'user']
109 | 
110 | Now, let's list all columns available in the table ``user``:
111 | 
112 |    >>> print(db['user'].columns)
113 |    [u'id', u'country', u'age', u'name', u'gender']
114 | 
115 | Using ``len()`` we can get the total number of rows in a table:
116 | 
117 |    >>> print(len(db['user']))
118 |    2
119 | 
120 | Reading data from tables
121 | ------------------------
122 | 
123 | Now let's get some real data out of the table::
124 | 
125 |    users = db['user'].all()
126 | 
127 | If we simply want to iterate over all rows in a table, we can omit :py:meth:`all() <dataset.Table.all>`::
128 | 
129 |    for user in db['user']:
130 |       print(user['age'])
131 | 
132 | We can search for specific entries using :py:meth:`find() <dataset.Table.find>` and
133 | :py:meth:`find_one() <dataset.Table.find_one>`::
134 | 
135 |    # All users from China
136 |    chinese_users = table.find(country='China')
137 | 
138 |    # Get a specific user
139 |    john = table.find_one(name='John Doe')
140 | 
141 |    # Find multiple at once
142 |    winners = table.find(id=[1, 3, 7])
143 | 
144 |    # Find by comparison operator
145 |    elderly_users = table.find(age={'>=': 70})
146 |    possible_customers = table.find(age={'between': [21, 80]})
147 | 
148 |    # Use the underlying SQLAlchemy directly
149 |    elderly_users = table.find(table.table.columns.age >= 70)
150 | 
151 | See :ref:`advanced_filters` for details on complex filters.
152 | 
153 | Using  :py:meth:`distinct() <dataset.Table.distinct>` we can grab a set of rows
154 | with unique values in one or more columns::
155 | 
156 |    # Get one user per country
157 |    db['user'].distinct('country')
158 | 
159 | Finally, you can use the ``row_type`` parameter to choose the data type in which
160 | results will be returned::
161 | 
162 |     import dataset
163 |     from stuf import stuf
164 | 
165 |     db = dataset.connect('sqlite:///mydatabase.db', row_type=stuf)
166 | 
167 | Now contents will be returned in ``stuf`` objects (basically, ``dict``
168 | objects whose elements can be accessed as attributes (``item.name``) as well as
169 | by index (``item['name']``).
170 | 
171 | Running custom SQL queries
172 | --------------------------
173 | 
174 | Of course the main reason you're using a database is that you want to
175 | use the full power of SQL queries. Here's how you run them with ``dataset``::
176 | 
177 |    result = db.query('SELECT country, COUNT(*) c FROM user GROUP BY country')
178 |    for row in result:
179 |       print(row['country'], row['c'])
180 | 
181 | The :py:meth:`query() <dataset.Table.query>` method can also be used to
182 | access the underlying `SQLAlchemy core API <http://docs.sqlalchemy.org/en/latest/orm/query.html#the-query-object>`_, which allows for the
183 | programmatic construction of more complex queries::
184 | 
185 |    table = db['user'].table
186 |    statement = table.select(table.c.name.like('%John%'))
187 |    result = db.query(statement)
188 | 
189 | Limitations of dataset
190 | ----------------------
191 | 
192 | The goal of ``dataset`` is to make basic database operations simpler, by expressing
193 | some relatively basic operations in a Pythonic way. The downside of this approach
194 | is that as your application grows more complex, you may begin to need access to
195 | more advanced operations and be forced to switch to using SQLAlchemy proper, 
196 | without the dataset layer (instead, you may want to play with SQLAlchemy's ORM).
197 | 
198 | When that moment comes, take the hit. SQLAlchemy is an amazing piece of Python
199 | code, and it will provide you with idiomatic access to all of SQL's functions.
200 | 
201 | Some of the specific aspects of SQL that are not exposed in ``dataset``, and are 
202 | considered out of scope for the project, include:
203 | 
204 | * Foreign key relationships between tables, and expressing one-to-many and
205 |   many-to-many relationships in idiomatic Python.
206 | * Python-wrapped ``JOIN`` queries.
207 | * Creating databases, or managing DBMS software.
208 | * Support for Python 2.x
209 | 
210 | There's also some functionality that might be cool to support in the future, but
211 | that requires significant engineering:
212 | 
213 | * Async operations
214 | * Database-native ``UPSERT`` semantics
215 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | furo


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 
4 | [flake8]
5 | ignore = E501,E123,E124,E126,E127,E128,E722,E741
6 | 
7 | [bdist_wheel]
8 | universal = 1
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open("README.md") as f:
 4 |     long_description = f.read()
 5 | 
 6 | setup(
 7 |     name="dataset",
 8 |     version="1.6.2",
 9 |     description="Toolkit for Python-based database access.",
10 |     long_description=long_description,
11 |     long_description_content_type="text/markdown",
12 |     classifiers=[
13 |         "Development Status :: 3 - Alpha",
14 |         "Intended Audience :: Developers",
15 |         "License :: OSI Approved :: MIT License",
16 |         "Operating System :: OS Independent",
17 |         "Programming Language :: Python :: 3.5",
18 |         "Programming Language :: Python :: 3.6",
19 |         "Programming Language :: Python :: 3.7",
20 |         "Programming Language :: Python :: 3.8",
21 |         "Programming Language :: Python :: 3.9",
22 |     ],
23 |     keywords="sql sqlalchemy etl loading utility",
24 |     author="Friedrich Lindenberg, Gregor Aisch, Stefan Wehrmeyer",
25 |     author_email="friedrich.lindenberg@gmail.com",
26 |     url="http://github.com/pudo/dataset",
27 |     license="MIT",
28 |     packages=find_packages(exclude=["ez_setup", "examples", "test"]),
29 |     namespace_packages=[],
30 |     include_package_data=False,
31 |     zip_safe=False,
32 |     install_requires=[
33 |         "sqlalchemy >= 1.3.2, < 2.0.0",
34 |         "alembic >= 0.6.2",
35 |         "banal >= 1.0.1",
36 |     ],
37 |     extras_require={
38 |         "dev": [
39 |             "pip",
40 |             "pytest",
41 |             "wheel",
42 |             "flake8",
43 |             "coverage",
44 |             "psycopg2-binary",
45 |             "PyMySQL",
46 |             "cryptography",
47 |         ]
48 |     },
49 |     tests_require=["pytest"],
50 |     test_suite="test",
51 |     entry_points={},
52 | )
53 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pudo/dataset/b2ab09e58c6f17334e4286009b20887b6a8a8fac/test/__init__.py


--------------------------------------------------------------------------------
/test/sample_data.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | from __future__ import unicode_literals
 3 | 
 4 | from datetime import datetime
 5 | 
 6 | 
 7 | TEST_CITY_1 = "B€rkeley"
 8 | TEST_CITY_2 = "G€lway"
 9 | 
10 | TEST_DATA = [
11 |     {"date": datetime(2011, 1, 1), "temperature": 1, "place": TEST_CITY_2},
12 |     {"date": datetime(2011, 1, 2), "temperature": -1, "place": TEST_CITY_2},
13 |     {"date": datetime(2011, 1, 3), "temperature": 0, "place": TEST_CITY_2},
14 |     {"date": datetime(2011, 1, 1), "temperature": 6, "place": TEST_CITY_1},
15 |     {"date": datetime(2011, 1, 2), "temperature": 8, "place": TEST_CITY_1},
16 |     {"date": datetime(2011, 1, 3), "temperature": 5, "place": TEST_CITY_1},
17 | ]
18 | 


--------------------------------------------------------------------------------
/test/test_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import unittest
  3 | from datetime import datetime
  4 | from collections import OrderedDict
  5 | from sqlalchemy import TEXT, BIGINT
  6 | from sqlalchemy.exc import IntegrityError, SQLAlchemyError, ArgumentError
  7 | 
  8 | from dataset import connect, chunked
  9 | 
 10 | from .sample_data import TEST_DATA, TEST_CITY_1
 11 | 
 12 | 
 13 | class DatabaseTestCase(unittest.TestCase):
 14 |     def setUp(self):
 15 |         self.db = connect()
 16 |         self.tbl = self.db["weather"]
 17 |         self.tbl.insert_many(TEST_DATA)
 18 | 
 19 |     def tearDown(self):
 20 |         for table in self.db.tables:
 21 |             self.db[table].drop()
 22 | 
 23 |     def test_valid_database_url(self):
 24 |         assert self.db.url, os.environ["DATABASE_URL"]
 25 | 
 26 |     def test_database_url_query_string(self):
 27 |         db = connect("sqlite:///:memory:/?cached_statements=1")
 28 |         assert "cached_statements" in db.url, db.url
 29 | 
 30 |     def test_tables(self):
 31 |         assert self.db.tables == ["weather"], self.db.tables
 32 | 
 33 |     def test_contains(self):
 34 |         assert "weather" in self.db, self.db.tables
 35 | 
 36 |     def test_create_table(self):
 37 |         table = self.db["foo"]
 38 |         assert self.db.has_table(table.table.name)
 39 |         assert len(table.table.columns) == 1, table.table.columns
 40 |         assert "id" in table.table.c, table.table.c
 41 | 
 42 |     def test_create_table_no_ids(self):
 43 |         if "mysql" in self.db.engine.dialect.dbapi.__name__:
 44 |             return
 45 |         if "sqlite" in self.db.engine.dialect.dbapi.__name__:
 46 |             return
 47 |         table = self.db.create_table("foo_no_id", primary_id=False)
 48 |         assert table.table.exists()
 49 |         assert len(table.table.columns) == 0, table.table.columns
 50 | 
 51 |     def test_create_table_custom_id1(self):
 52 |         pid = "string_id"
 53 |         table = self.db.create_table("foo2", pid, self.db.types.string(255))
 54 |         assert self.db.has_table(table.table.name)
 55 |         assert len(table.table.columns) == 1, table.table.columns
 56 |         assert pid in table.table.c, table.table.c
 57 |         table.insert({pid: "foobar"})
 58 |         assert table.find_one(string_id="foobar")[pid] == "foobar"
 59 | 
 60 |     def test_create_table_custom_id2(self):
 61 |         pid = "string_id"
 62 |         table = self.db.create_table("foo3", pid, self.db.types.string(50))
 63 |         assert self.db.has_table(table.table.name)
 64 |         assert len(table.table.columns) == 1, table.table.columns
 65 |         assert pid in table.table.c, table.table.c
 66 | 
 67 |         table.insert({pid: "foobar"})
 68 |         assert table.find_one(string_id="foobar")[pid] == "foobar"
 69 | 
 70 |     def test_create_table_custom_id3(self):
 71 |         pid = "int_id"
 72 |         table = self.db.create_table("foo4", primary_id=pid)
 73 |         assert self.db.has_table(table.table.name)
 74 |         assert len(table.table.columns) == 1, table.table.columns
 75 |         assert pid in table.table.c, table.table.c
 76 | 
 77 |         table.insert({pid: 123})
 78 |         table.insert({pid: 124})
 79 |         assert table.find_one(int_id=123)[pid] == 123
 80 |         assert table.find_one(int_id=124)[pid] == 124
 81 |         self.assertRaises(IntegrityError, lambda: table.insert({pid: 123}))
 82 | 
 83 |     def test_create_table_shorthand1(self):
 84 |         pid = "int_id"
 85 |         table = self.db.get_table("foo5", pid)
 86 |         assert table.table.exists
 87 |         assert len(table.table.columns) == 1, table.table.columns
 88 |         assert pid in table.table.c, table.table.c
 89 | 
 90 |         table.insert({"int_id": 123})
 91 |         table.insert({"int_id": 124})
 92 |         assert table.find_one(int_id=123)["int_id"] == 123
 93 |         assert table.find_one(int_id=124)["int_id"] == 124
 94 |         self.assertRaises(IntegrityError, lambda: table.insert({"int_id": 123}))
 95 | 
 96 |     def test_create_table_shorthand2(self):
 97 |         pid = "string_id"
 98 |         table = self.db.get_table(
 99 |             "foo6", primary_id=pid, primary_type=self.db.types.string(255)
100 |         )
101 |         assert table.table.exists
102 |         assert len(table.table.columns) == 1, table.table.columns
103 |         assert pid in table.table.c, table.table.c
104 | 
105 |         table.insert({"string_id": "foobar"})
106 |         assert table.find_one(string_id="foobar")["string_id"] == "foobar"
107 | 
108 |     def test_with(self):
109 |         init_length = len(self.db["weather"])
110 |         with self.assertRaises(ValueError):
111 |             with self.db as tx:
112 |                 tx["weather"].insert(
113 |                     {
114 |                         "date": datetime(2011, 1, 1),
115 |                         "temperature": 1,
116 |                         "place": "tmp_place",
117 |                     }
118 |                 )
119 |                 raise ValueError()
120 |         assert len(self.db["weather"]) == init_length
121 | 
122 |     def test_invalid_values(self):
123 |         if "mysql" in self.db.engine.dialect.dbapi.__name__:
124 |             # WARNING: mysql seems to be doing some weird type casting
125 |             # upon insert. The mysql-python driver is not affected but
126 |             # it isn't compatible with Python 3
127 |             # Conclusion: use postgresql.
128 |             return
129 |         with self.assertRaises(SQLAlchemyError):
130 |             tbl = self.db["weather"]
131 |             tbl.insert(
132 |                 {"date": True, "temperature": "wrong_value", "place": "tmp_place"}
133 |             )
134 | 
135 |     def test_load_table(self):
136 |         tbl = self.db.load_table("weather")
137 |         assert tbl.table.name == self.tbl.table.name
138 | 
139 |     def test_query(self):
140 |         r = self.db.query("SELECT COUNT(*) AS num FROM weather").next()
141 |         assert r["num"] == len(TEST_DATA), r
142 | 
143 |     def test_table_cache_updates(self):
144 |         tbl1 = self.db.get_table("people")
145 |         data = OrderedDict([("first_name", "John"), ("last_name", "Smith")])
146 |         tbl1.insert(data)
147 |         data["id"] = 1
148 |         tbl2 = self.db.get_table("people")
149 |         assert dict(tbl2.all().next()) == dict(data), (tbl2.all().next(), data)
150 | 
151 | 
152 | class TableTestCase(unittest.TestCase):
153 |     def setUp(self):
154 |         self.db = connect()
155 |         self.tbl = self.db["weather"]
156 |         for row in TEST_DATA:
157 |             self.tbl.insert(row)
158 | 
159 |     def tearDown(self):
160 |         self.tbl.drop()
161 | 
162 |     def test_insert(self):
163 |         assert len(self.tbl) == len(TEST_DATA), len(self.tbl)
164 |         last_id = self.tbl.insert(
165 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"}
166 |         )
167 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
168 |         assert self.tbl.find_one(id=last_id)["place"] == "Berlin"
169 | 
170 |     def test_insert_ignore(self):
171 |         self.tbl.insert_ignore(
172 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"},
173 |             ["place"],
174 |         )
175 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
176 |         self.tbl.insert_ignore(
177 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"},
178 |             ["place"],
179 |         )
180 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
181 | 
182 |     def test_insert_ignore_all_key(self):
183 |         for i in range(0, 4):
184 |             self.tbl.insert_ignore(
185 |                 {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"},
186 |                 ["date", "temperature", "place"],
187 |             )
188 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
189 | 
190 |     def test_insert_json(self):
191 |         last_id = self.tbl.insert(
192 |             {
193 |                 "date": datetime(2011, 1, 2),
194 |                 "temperature": -10,
195 |                 "place": "Berlin",
196 |                 "info": {
197 |                     "currency": "EUR",
198 |                     "language": "German",
199 |                     "population": 3292365,
200 |                 },
201 |             }
202 |         )
203 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
204 |         assert self.tbl.find_one(id=last_id)["place"] == "Berlin"
205 | 
206 |     def test_upsert(self):
207 |         self.tbl.upsert(
208 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"},
209 |             ["place"],
210 |         )
211 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
212 |         self.tbl.upsert(
213 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"},
214 |             ["place"],
215 |         )
216 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
217 | 
218 |     def test_upsert_single_column(self):
219 |         table = self.db["banana_single_col"]
220 |         table.upsert({"color": "Yellow"}, ["color"])
221 |         assert len(table) == 1, len(table)
222 |         table.upsert({"color": "Yellow"}, ["color"])
223 |         assert len(table) == 1, len(table)
224 | 
225 |     def test_upsert_all_key(self):
226 |         assert len(self.tbl) == len(TEST_DATA), len(self.tbl)
227 |         for i in range(0, 2):
228 |             self.tbl.upsert(
229 |                 {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"},
230 |                 ["date", "temperature", "place"],
231 |             )
232 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
233 | 
234 |     def test_upsert_id(self):
235 |         table = self.db["banana_with_id"]
236 |         data = dict(id=10, title="I am a banana!")
237 |         table.upsert(data, ["id"])
238 |         assert len(table) == 1, len(table)
239 | 
240 |     def test_update_while_iter(self):
241 |         for row in self.tbl:
242 |             row["foo"] = "bar"
243 |             self.tbl.update(row, ["place", "date"])
244 |         assert len(self.tbl) == len(TEST_DATA), len(self.tbl)
245 | 
246 |     def test_weird_column_names(self):
247 |         with self.assertRaises(ValueError):
248 |             self.tbl.insert(
249 |                 {
250 |                     "date": datetime(2011, 1, 2),
251 |                     "temperature": -10,
252 |                     "foo.bar": "Berlin",
253 |                     "qux.bar": "Huhu",
254 |                 }
255 |             )
256 | 
257 |     def test_cased_column_names(self):
258 |         tbl = self.db["cased_column_names"]
259 |         tbl.insert({"place": "Berlin"})
260 |         tbl.insert({"Place": "Berlin"})
261 |         tbl.insert({"PLACE ": "Berlin"})
262 |         assert len(tbl.columns) == 2, tbl.columns
263 |         assert len(list(tbl.find(Place="Berlin"))) == 3
264 |         assert len(list(tbl.find(place="Berlin"))) == 3
265 |         assert len(list(tbl.find(PLACE="Berlin"))) == 3
266 | 
267 |     def test_invalid_column_names(self):
268 |         tbl = self.db["weather"]
269 |         with self.assertRaises(ValueError):
270 |             tbl.insert({None: "banana"})
271 | 
272 |         with self.assertRaises(ValueError):
273 |             tbl.insert({"": "banana"})
274 | 
275 |         with self.assertRaises(ValueError):
276 |             tbl.insert({"-": "banana"})
277 | 
278 |     def test_delete(self):
279 |         self.tbl.insert(
280 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"}
281 |         )
282 |         original_count = len(self.tbl)
283 |         assert len(self.tbl) == len(TEST_DATA) + 1, len(self.tbl)
284 |         # Test bad use of API
285 |         with self.assertRaises(ArgumentError):
286 |             self.tbl.delete({"place": "Berlin"})
287 |         assert len(self.tbl) == original_count, len(self.tbl)
288 | 
289 |         assert self.tbl.delete(place="Berlin") is True, "should return 1"
290 |         assert len(self.tbl) == len(TEST_DATA), len(self.tbl)
291 |         assert self.tbl.delete() is True, "should return non zero"
292 |         assert len(self.tbl) == 0, len(self.tbl)
293 | 
294 |     def test_repr(self):
295 |         assert (
296 |             repr(self.tbl) == "<Table(weather)>"
297 |         ), "the representation should be <Table(weather)>"
298 | 
299 |     def test_delete_nonexist_entry(self):
300 |         assert (
301 |             self.tbl.delete(place="Berlin") is False
302 |         ), "entry not exist, should fail to delete"
303 | 
304 |     def test_find_one(self):
305 |         self.tbl.insert(
306 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"}
307 |         )
308 |         d = self.tbl.find_one(place="Berlin")
309 |         assert d["temperature"] == -10, d
310 |         d = self.tbl.find_one(place="Atlantis")
311 |         assert d is None, d
312 | 
313 |     def test_count(self):
314 |         assert len(self.tbl) == 6, len(self.tbl)
315 |         length = self.tbl.count(place=TEST_CITY_1)
316 |         assert length == 3, length
317 | 
318 |     def test_find(self):
319 |         ds = list(self.tbl.find(place=TEST_CITY_1))
320 |         assert len(ds) == 3, ds
321 |         ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2))
322 |         assert len(ds) == 2, ds
323 |         ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2, _step=1))
324 |         assert len(ds) == 2, ds
325 |         ds = list(self.tbl.find(place=TEST_CITY_1, _limit=1, _step=2))
326 |         assert len(ds) == 1, ds
327 |         ds = list(self.tbl.find(_step=2))
328 |         assert len(ds) == len(TEST_DATA), ds
329 |         ds = list(self.tbl.find(order_by=["temperature"]))
330 |         assert ds[0]["temperature"] == -1, ds
331 |         ds = list(self.tbl.find(order_by=["-temperature"]))
332 |         assert ds[0]["temperature"] == 8, ds
333 |         ds = list(self.tbl.find(self.tbl.table.columns.temperature > 4))
334 |         assert len(ds) == 3, ds
335 | 
336 |     def test_find_dsl(self):
337 |         ds = list(self.tbl.find(place={"like": "%lw%"}))
338 |         assert len(ds) == 3, ds
339 |         ds = list(self.tbl.find(temperature={">": 5}))
340 |         assert len(ds) == 2, ds
341 |         ds = list(self.tbl.find(temperature={">=": 5}))
342 |         assert len(ds) == 3, ds
343 |         ds = list(self.tbl.find(temperature={"<": 0}))
344 |         assert len(ds) == 1, ds
345 |         ds = list(self.tbl.find(temperature={"<=": 0}))
346 |         assert len(ds) == 2, ds
347 |         ds = list(self.tbl.find(temperature={"!=": -1}))
348 |         assert len(ds) == 5, ds
349 |         ds = list(self.tbl.find(temperature={"between": [5, 8]}))
350 |         assert len(ds) == 3, ds
351 |         ds = list(self.tbl.find(place={"=": "G€lway"}))
352 |         assert len(ds) == 3, ds
353 |         ds = list(self.tbl.find(place={"ilike": "%LwAy"}))
354 |         assert len(ds) == 3, ds
355 | 
356 |     def test_offset(self):
357 |         ds = list(self.tbl.find(place=TEST_CITY_1, _offset=1))
358 |         assert len(ds) == 2, ds
359 |         ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2, _offset=2))
360 |         assert len(ds) == 1, ds
361 | 
362 |     def test_streamed(self):
363 |         ds = list(self.tbl.find(place=TEST_CITY_1, _streamed=True, _step=1))
364 |         assert len(ds) == 3, len(ds)
365 |         for row in self.tbl.find(place=TEST_CITY_1, _streamed=True, _step=1):
366 |             row["temperature"] = -1
367 |             self.tbl.update(row, ["id"])
368 | 
369 |     def test_distinct(self):
370 |         x = list(self.tbl.distinct("place"))
371 |         assert len(x) == 2, x
372 |         x = list(self.tbl.distinct("place", "date"))
373 |         assert len(x) == 6, x
374 |         x = list(
375 |             self.tbl.distinct(
376 |                 "place",
377 |                 "date",
378 |                 self.tbl.table.columns.date >= datetime(2011, 1, 2, 0, 0),
379 |             )
380 |         )
381 |         assert len(x) == 4, x
382 | 
383 |         x = list(self.tbl.distinct("temperature", place="B€rkeley"))
384 |         assert len(x) == 3, x
385 |         x = list(self.tbl.distinct("temperature", place=["B€rkeley", "G€lway"]))
386 |         assert len(x) == 6, x
387 |         x = list(self.tbl.distinct("temperature", _limit=3, place=["B€rkeley", "G€lway"]))
388 |         assert len(x) == 3, x
389 |         x = list(self.tbl.distinct("temperature", _limit=6, _offset=1, place=["B€rkeley", "G€lway"]))
390 |         assert len(x) == 5, x
391 | 
392 |     def test_insert_many(self):
393 |         data = TEST_DATA * 100
394 |         self.tbl.insert_many(data, chunk_size=13)
395 |         assert len(self.tbl) == len(data) + 6, (len(self.tbl), len(data))
396 | 
397 |     def test_chunked_insert(self):
398 |         data = TEST_DATA * 100
399 |         with chunked.ChunkedInsert(self.tbl) as chunk_tbl:
400 |             for item in data:
401 |                 chunk_tbl.insert(item)
402 |         assert len(self.tbl) == len(data) + 6, (len(self.tbl), len(data))
403 | 
404 |     def test_chunked_insert_callback(self):
405 |         data = TEST_DATA * 100
406 |         N = 0
407 | 
408 |         def callback(queue):
409 |             nonlocal N
410 |             N += len(queue)
411 | 
412 |         with chunked.ChunkedInsert(self.tbl, callback=callback) as chunk_tbl:
413 |             for item in data:
414 |                 chunk_tbl.insert(item)
415 |         assert len(data) == N
416 |         assert len(self.tbl) == len(data) + 6
417 | 
418 |     def test_update_many(self):
419 |         tbl = self.db["update_many_test"]
420 |         tbl.insert_many([dict(temp=10), dict(temp=20), dict(temp=30)])
421 |         tbl.update_many([dict(id=1, temp=50), dict(id=3, temp=50)], "id")
422 | 
423 |         # Ensure data has been updated.
424 |         assert tbl.find_one(id=1)["temp"] == tbl.find_one(id=3)["temp"]
425 | 
426 |     def test_chunked_update(self):
427 |         tbl = self.db["update_many_test"]
428 |         tbl.insert_many(
429 |             [
430 |                 dict(temp=10, location="asdf"),
431 |                 dict(temp=20, location="qwer"),
432 |                 dict(temp=30, location="asdf"),
433 |             ]
434 |         )
435 | 
436 |         chunked_tbl = chunked.ChunkedUpdate(tbl, "id")
437 |         chunked_tbl.update(dict(id=1, temp=50))
438 |         chunked_tbl.update(dict(id=2, location="asdf"))
439 |         chunked_tbl.update(dict(id=3, temp=50))
440 |         chunked_tbl.flush()
441 | 
442 |         # Ensure data has been updated.
443 |         assert tbl.find_one(id=1)["temp"] == tbl.find_one(id=3)["temp"] == 50
444 |         assert (
445 |             tbl.find_one(id=2)["location"] == tbl.find_one(id=3)["location"] == "asdf"
446 |         )  # noqa
447 | 
448 |     def test_upsert_many(self):
449 |         # Also tests updating on records with different attributes
450 |         tbl = self.db["upsert_many_test"]
451 | 
452 |         W = 100
453 |         tbl.upsert_many([dict(age=10), dict(weight=W)], "id")
454 |         assert tbl.find_one(id=1)["age"] == 10
455 | 
456 |         tbl.upsert_many([dict(id=1, age=70), dict(id=2, weight=W / 2)], "id")
457 |         assert tbl.find_one(id=2)["weight"] == W / 2
458 | 
459 |     def test_drop_operations(self):
460 |         assert self.tbl._table is not None, "table shouldn't be dropped yet"
461 |         self.tbl.drop()
462 |         assert self.tbl._table is None, "table should be dropped now"
463 |         assert list(self.tbl.all()) == [], self.tbl.all()
464 |         assert self.tbl.count() == 0, self.tbl.count()
465 | 
466 |     def test_table_drop(self):
467 |         assert "weather" in self.db
468 |         self.db["weather"].drop()
469 |         assert "weather" not in self.db
470 | 
471 |     def test_table_drop_then_create(self):
472 |         assert "weather" in self.db
473 |         self.db["weather"].drop()
474 |         assert "weather" not in self.db
475 |         self.db["weather"].insert({"foo": "bar"})
476 | 
477 |     def test_columns(self):
478 |         cols = self.tbl.columns
479 |         assert len(list(cols)) == 4, "column count mismatch"
480 |         assert "date" in cols and "temperature" in cols and "place" in cols
481 | 
482 |     def test_drop_column(self):
483 |         try:
484 |             self.tbl.drop_column("date")
485 |             assert "date" not in self.tbl.columns
486 |         except RuntimeError:
487 |             pass
488 | 
489 |     def test_iter(self):
490 |         c = 0
491 |         for row in self.tbl:
492 |             c += 1
493 |         assert c == len(self.tbl)
494 | 
495 |     def test_update(self):
496 |         date = datetime(2011, 1, 2)
497 |         res = self.tbl.update(
498 |             {"date": date, "temperature": -10, "place": TEST_CITY_1}, ["place", "date"]
499 |         )
500 |         assert res, "update should return True"
501 |         m = self.tbl.find_one(place=TEST_CITY_1, date=date)
502 |         assert m["temperature"] == -10, (
503 |             "new temp. should be -10 but is %d" % m["temperature"]
504 |         )
505 | 
506 |     def test_create_column(self):
507 |         tbl = self.tbl
508 |         flt = self.db.types.float
509 |         tbl.create_column("foo", flt)
510 |         assert "foo" in tbl.table.c, tbl.table.c
511 |         assert isinstance(tbl.table.c["foo"].type, flt), tbl.table.c["foo"].type
512 |         assert "foo" in tbl.columns, tbl.columns
513 | 
514 |     def test_ensure_column(self):
515 |         tbl = self.tbl
516 |         flt = self.db.types.float
517 |         tbl.create_column_by_example("foo", 0.1)
518 |         assert "foo" in tbl.table.c, tbl.table.c
519 |         assert isinstance(tbl.table.c["foo"].type, flt), tbl.table.c["bar"].type
520 |         tbl.create_column_by_example("bar", 1)
521 |         assert "bar" in tbl.table.c, tbl.table.c
522 |         assert isinstance(tbl.table.c["bar"].type, BIGINT), tbl.table.c["bar"].type
523 |         tbl.create_column_by_example("pippo", "test")
524 |         assert "pippo" in tbl.table.c, tbl.table.c
525 |         assert isinstance(tbl.table.c["pippo"].type, TEXT), tbl.table.c["pippo"].type
526 |         tbl.create_column_by_example("bigbar", 11111111111)
527 |         assert "bigbar" in tbl.table.c, tbl.table.c
528 |         assert isinstance(tbl.table.c["bigbar"].type, BIGINT), tbl.table.c[
529 |             "bigbar"
530 |         ].type
531 |         tbl.create_column_by_example("littlebar", -11111111111)
532 |         assert "littlebar" in tbl.table.c, tbl.table.c
533 |         assert isinstance(tbl.table.c["littlebar"].type, BIGINT), tbl.table.c[
534 |             "littlebar"
535 |         ].type
536 | 
537 |     def test_key_order(self):
538 |         res = self.db.query("SELECT temperature, place FROM weather LIMIT 1")
539 |         keys = list(res.next().keys())
540 |         assert keys[0] == "temperature"
541 |         assert keys[1] == "place"
542 | 
543 |     def test_empty_query(self):
544 |         empty = list(self.tbl.find(place="not in data"))
545 |         assert len(empty) == 0, empty
546 | 
547 | 
548 | class Constructor(dict):
549 |     """Very simple low-functionality extension to ``dict`` to
550 |     provide attribute access to dictionary contents"""
551 | 
552 |     def __getattr__(self, name):
553 |         return self[name]
554 | 
555 | 
556 | class RowTypeTestCase(unittest.TestCase):
557 |     def setUp(self):
558 |         self.db = connect(row_type=Constructor)
559 |         self.tbl = self.db["weather"]
560 |         for row in TEST_DATA:
561 |             self.tbl.insert(row)
562 | 
563 |     def tearDown(self):
564 |         for table in self.db.tables:
565 |             self.db[table].drop()
566 | 
567 |     def test_find_one(self):
568 |         self.tbl.insert(
569 |             {"date": datetime(2011, 1, 2), "temperature": -10, "place": "Berlin"}
570 |         )
571 |         d = self.tbl.find_one(place="Berlin")
572 |         assert d["temperature"] == -10, d
573 |         assert d.temperature == -10, d
574 |         d = self.tbl.find_one(place="Atlantis")
575 |         assert d is None, d
576 | 
577 |     def test_find(self):
578 |         ds = list(self.tbl.find(place=TEST_CITY_1))
579 |         assert len(ds) == 3, ds
580 |         for item in ds:
581 |             assert isinstance(item, Constructor), item
582 |         ds = list(self.tbl.find(place=TEST_CITY_1, _limit=2))
583 |         assert len(ds) == 2, ds
584 |         for item in ds:
585 |             assert isinstance(item, Constructor), item
586 | 
587 |     def test_distinct(self):
588 |         x = list(self.tbl.distinct("place"))
589 |         assert len(x) == 2, x
590 |         for item in x:
591 |             assert isinstance(item, Constructor), item
592 |         x = list(self.tbl.distinct("place", "date"))
593 |         assert len(x) == 6, x
594 |         for item in x:
595 |             assert isinstance(item, Constructor), item
596 | 
597 |     def test_iter(self):
598 |         c = 0
599 |         for row in self.tbl:
600 |             c += 1
601 |             assert isinstance(row, Constructor), row
602 |         assert c == len(self.tbl)
603 | 
604 | 
605 | if __name__ == "__main__":
606 |     unittest.main()
607 | 


--------------------------------------------------------------------------------