├── docs
    ├── _static
    │   ├── .keep
    │   ├── example.sqlite3
    │   ├── data_prep_poll.png
    │   ├── development_build.jpg
    │   ├── reference_data_example.zip
    │   ├── failure_message_example.zip
    │   ├── example.csv
    │   ├── tutorial
    │   │   ├── estimated_totals.csv
    │   │   ├── test_movies_df.py
    │   │   ├── movies.csv
    │   │   ├── test_movies_df_unit.py
    │   │   ├── test_country_of_birth.py
    │   │   ├── test_country_of_birth_unit.py
    │   │   ├── modified_test_country_of_birth.py
    │   │   ├── modified_test_country_of_birth_unit.py
    │   │   ├── test_intro1.py
    │   │   ├── test_intro2.py
    │   │   ├── modified_country_of_birth.csv
    │   │   ├── country_of_birth.csv
    │   │   ├── test_intro1_unit.py
    │   │   └── test_intro2_unit.py
    │   ├── test_users.py
    │   ├── test_users_unit.py
    │   ├── excel_autoformat.csv
    │   ├── theme_overrides.css
    │   ├── test_validation.py
    │   ├── mydata.csv
    │   ├── users.csv
    │   └── test_errors.py
    ├── _build
    │   └── .gitignore
    ├── _templates
    │   └── layout.html
    ├── discussion
    │   ├── terminology.rst
    │   ├── project-history.rst
    │   ├── index.rst
    │   ├── validate-vs-accept.rst
    │   ├── organizing-tests.rst
    │   └── data-preparation.rst
    ├── requirements.txt
    ├── how-to
    │   ├── install.rst
    │   ├── run-tests.rst
    │   ├── index.rst
    │   ├── negative-matches.rst
    │   ├── reorder-acceptances.rst
    │   ├── get-started.rst
    │   ├── date-time-str.rst
    │   ├── fuzzy-matching.rst
    │   ├── sequences.rst
    │   ├── phone-numbers.rst
    │   ├── excel-auto-formatting.rst
    │   └── customize-differences.rst
    ├── intro
    │   └── index.rst
    ├── reference
    │   ├── index.rst
    │   └── unittest-support.rst
    ├── _ext
    │   └── autodoc_classinstance.py
    ├── index.rst
    └── tutorial
    │   └── testing-pandas.rst
├── tests
    ├── __init__.py
    ├── sample_files
    │   ├── sample_text_utf8.csv
    │   ├── sample_excel1997.xls
    │   ├── sample_excel2007.xlsx
    │   ├── sample_dbase.dbf
    │   ├── sample_text_iso88591.csv
    │   ├── test_sources_excel.xlsx
    │   └── sample_multiworksheet.xlsx
    ├── _io.py
    ├── past_api07_sources_base.py
    ├── past_api09.py
    ├── _contextlib.py
    ├── past_api07_sources_excel.py
    ├── test_past_subprocesses.py
    ├── past_api07_error.py
    ├── common.py
    ├── test_pandas_integration.py
    ├── past_api07_sources_pandas.py
    ├── test_runner.py
    ├── test_utils_misc.py
    ├── past_api00.py
    ├── past_api07_sources_sqlite.py
    └── past_api09_load_csv.py
├── setup.cfg
├── datatest
    ├── _vendor
    │   └── __init__.py
    ├── _compatibility
    │   ├── __init__.py
    │   ├── itertools.py
    │   ├── abc.py
    │   ├── statistics.py
    │   ├── textwrap.py
    │   ├── contextlib.py
    │   ├── collections
    │   │   └── abc.py
    │   ├── builtins.py
    │   ├── functools.py
    │   └── decimal.py
    ├── __past__
    │   ├── api_dev0.py
    │   ├── api_dev1.py
    │   ├── api_dev2.py
    │   ├── api010.py
    │   ├── squint
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── api07_error.py
    │   ├── api09.py
    │   ├── api00.py
    │   ├── load_csv.py
    │   └── api06.py
    ├── __main__.py
    ├── __init__.py
    ├── _excepthook.py
    ├── _working_directory.py
    ├── main.py
    └── _normalize.py
├── MANIFEST.in
├── requirements-dev.txt
├── AUTHORS
├── .readthedocs.yml
├── LICENSE
├── .travis.yml
├── .gitignore
├── run-tests.sh
├── run-tests.bat
└── release-checklist.rst


/docs/_static/.keep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 


--------------------------------------------------------------------------------
/datatest/_vendor/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/tests/sample_files/sample_text_utf8.csv:
--------------------------------------------------------------------------------
1 | col1,col2
2 | utf8,α
3 | 


--------------------------------------------------------------------------------
/docs/_build/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in this directory
2 | *
3 | # Except this file
4 | !.gitignore
5 | 


--------------------------------------------------------------------------------
/docs/_static/example.sqlite3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/docs/_static/example.sqlite3


--------------------------------------------------------------------------------
/docs/_static/data_prep_poll.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/docs/_static/data_prep_poll.png


--------------------------------------------------------------------------------
/datatest/__past__/api_dev0.py:
--------------------------------------------------------------------------------
1 | """alias for api00"""
2 | from __future__ import absolute_import
3 | from .api00 import *
4 | 


--------------------------------------------------------------------------------
/datatest/__past__/api_dev1.py:
--------------------------------------------------------------------------------
1 | """alias for api06"""
2 | from __future__ import absolute_import
3 | from .api06 import *
4 | 


--------------------------------------------------------------------------------
/datatest/__past__/api_dev2.py:
--------------------------------------------------------------------------------
1 | """alias for api07"""
2 | from __future__ import absolute_import
3 | from .api07 import *
4 | 


--------------------------------------------------------------------------------
/docs/_static/development_build.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/docs/_static/development_build.jpg


--------------------------------------------------------------------------------
/docs/_static/reference_data_example.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/docs/_static/reference_data_example.zip


--------------------------------------------------------------------------------
/tests/sample_files/sample_excel1997.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/tests/sample_files/sample_excel1997.xls


--------------------------------------------------------------------------------
/docs/_static/failure_message_example.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/docs/_static/failure_message_example.zip


--------------------------------------------------------------------------------
/tests/sample_files/sample_excel2007.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/tests/sample_files/sample_excel2007.xlsx


--------------------------------------------------------------------------------
/docs/_static/example.csv:
--------------------------------------------------------------------------------
1 | "A","B","C"
2 | "x","foo",20
3 | "x","foo",30
4 | "y","foo",10
5 | "y","bar",20
6 | "z","bar",10
7 | "z","bar",10
8 | 


--------------------------------------------------------------------------------
/tests/sample_files/sample_dbase.dbf:
--------------------------------------------------------------------------------
1 |    a                      COL1       C                   COL2       N                    dBASE1


--------------------------------------------------------------------------------
/tests/sample_files/sample_text_iso88591.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/tests/sample_files/sample_text_iso88591.csv


--------------------------------------------------------------------------------
/tests/sample_files/test_sources_excel.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/tests/sample_files/test_sources_excel.xlsx


--------------------------------------------------------------------------------
/tests/sample_files/sample_multiworksheet.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shawnbrown/datatest/HEAD/tests/sample_files/sample_multiworksheet.xlsx


--------------------------------------------------------------------------------
/datatest/__past__/api010.py:
--------------------------------------------------------------------------------
1 | """Backward compatibility for version 0.10 API."""
2 | from __future__ import absolute_import
3 | 
4 | # This is a stub for future use.
5 | 
6 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst
2 | include AUTHORS
3 | include LICENSE
4 | include requirements.txt
5 | recursive-include datatest *.py
6 | include tests *.py
7 | include tests/sample_files *.*
8 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | 
3 | {#
4 |   {% block menu %}
5 |     {{ super() }}
6 |     <a href="{{ url_root }}genindex.html">Package Index</a>
7 |   {% endblock %}
8 | #}
9 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/estimated_totals.csv:
--------------------------------------------------------------------------------
 1 | state/territory,population
 2 | Australian Capital Territory,389785
 3 | Jervis Bay Territory,388
 4 | New South Wales,7507350
 5 | Northern Territory,226412
 6 | Queensland,4721503
 7 | South Australia,1637325
 8 | Tasmania,514245
 9 | Victoria,5849330
10 | Western Australia,2451380
11 | 


--------------------------------------------------------------------------------
/datatest/__past__/squint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """squint: simple query interface for tabular data
 3 | 
 4 | PYTEST_DONT_REWRITE
 5 | """
 6 | from __future__ import absolute_import
 7 | 
 8 | from .query import BaseElement
 9 | from .query import Select
10 | from .query import Query
11 | from .query import Result
12 | 


--------------------------------------------------------------------------------
/docs/discussion/terminology.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. currentmodule:: datatest
 4 | 
 5 | .. meta::
 6 |     :description: A discussion about the language and vocabulary used in datatest.
 7 |     :keywords: data, validation, quality, glossary, terms
 8 | 
 9 | 
10 | ####################
11 | Notes on Terminology
12 | ####################
13 | 
14 | 


--------------------------------------------------------------------------------
/docs/discussion/project-history.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. currentmodule:: datatest
 4 | 
 5 | .. meta::
 6 |     :description: A brief discussion on the history and origins of datatest at the NCEC.
 7 |     :keywords: datatest, history, NCEC, National Committee for an Effective Congress
 8 | 
 9 | 
10 | ################
11 | Datatest History
12 | ################
13 | 
14 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/itertools.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for itertools (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from itertools import *
 4 | 
 5 | try:
 6 |     filterfalse  # New in Python 3.
 7 | except NameError:
 8 |     filterfalse = ifilterfalse
 9 | 
10 | 
11 | try:
12 |     zip_longest
13 | except NameError:
14 |     zip_longest = izip_longest
15 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | # ==============================
 2 | # Requirements for Read The Docs
 3 | # ==============================
 4 | #
 5 | # The following requirements are additional dependencies that
 6 | # https://readthedocs.io needs to install so it can properly
 7 | # generate the documentation for datatest.
 8 | 
 9 | sphinx>=2.1.0
10 | sphinx-tabs
11 | sphinx_rtd_theme>=0.3.1
12 | 
13 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/abc.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for abc (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from abc import *
 4 | 
 5 | 
 6 | try:
 7 |     ABC  # New in version 3.4.
 8 |     ABC.__slots__  # New in version 3.7
 9 | except (NameError, AttributeError):
10 |     # Using Python 2 and 3 compatible syntax.
11 |     ABC = ABCMeta('ABC', (object,), {'__slots__': ()})
12 | 


--------------------------------------------------------------------------------
/docs/how-to/install.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: How to Install Datatest
 6 |     :keywords: installing, datatest, python
 7 | 
 8 | 
 9 | #######################
10 | How to Install Datatest
11 | #######################
12 | 
13 | .. include:: ../../README.rst
14 |     :start-after: start-inclusion-marker-install
15 |     :end-before: end-inclusion-marker-install
16 | 


--------------------------------------------------------------------------------
/datatest/__past__/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Backwards compatibility for phased-out features and behaviors.
 3 | 
 4 | To use a feature that is no longer supported in the current version of
 5 | datatest, use the following:
 6 | 
 7 |     from datatest.__past__ import api<version-number>
 8 | 
 9 | For example, importing 'api07' would provide backwards compatibility
10 | for the API as implemented in the 0.7 version of datatest.
11 | """
12 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | # ========================
 2 | # Development Dependencies
 3 | # ========================
 4 | #
 5 | # These are not installation requirements!
 6 | #
 7 | # The following dependencies are only required for
 8 | # testing, building, and documentation generation.
 9 | #
10 | # pip install -r requirements-dev.txt
11 | 
12 | dbfread
13 | ipython
14 | numpy
15 | pandas
16 | squint
17 | xlrd==1.2.0
18 | sphinx>=2.1.0
19 | sphinx-tabs
20 | sphinx_rtd_theme>=0.3.1
21 | twine
22 | wheel
23 | 
24 | 


--------------------------------------------------------------------------------
/tests/_io.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for io (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from io import *
 4 | from sys import version_info as _version_info
 5 | 
 6 | 
 7 | if _version_info[:2] <= (2, 7):  # For version 2.7 and earlier.
 8 |     import StringIO as _StringIO
 9 | 
10 |     StringIO = _StringIO.StringIO
11 |     class StringIO(_StringIO.StringIO):
12 |         def write(self, str):
13 |             str = unicode(str)
14 |             return _StringIO.StringIO.write(self, str)
15 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/statistics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | 
 4 | try:
 5 |     from statistics import *
 6 | except ImportError:
 7 | 
 8 |     class StatisticsError(ValueError):
 9 |         pass
10 | 
11 | 
12 |     def median(data):
13 |         data = sorted(data)
14 |         n = len(data)
15 |         if n == 0:
16 |             raise StatisticsError('no median for empty data')
17 |         if n % 2 == 1:
18 |             return data[n // 2]
19 |         else:
20 |             i = n // 2
21 |             return (data[i - 1] + data[i]) / 2
22 | 


--------------------------------------------------------------------------------
/datatest/__main__.py:
--------------------------------------------------------------------------------
 1 | """Main entry point"""
 2 | 
 3 | import sys
 4 | if sys.argv[0].endswith('__main__.py'):
 5 |     import os.path
 6 |     # We change sys.argv[0] to make help message more useful
 7 |     # use executable without path, unquoted
 8 |     # (it's just a hint anyway)
 9 |     # (if you have spaces in your executable you get what you deserve!)
10 |     executable = os.path.basename(sys.executable)
11 |     sys.argv[0] = executable + ' -m datatest'
12 |     del os
13 | 
14 | __unittest = True
15 | __datatest = True
16 | 
17 | 
18 | from .main import main, DataTestProgram
19 | 
20 | main(module=None)
21 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/textwrap.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for textwrap (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from textwrap import *
 4 | 
 5 | 
 6 | try:
 7 |     indent  # New in 3.3
 8 | except NameError:
 9 |     def indent(text, prefix, predicate=None):
10 |         if predicate is None:
11 |             def predicate(line):
12 |                 return line.strip()
13 | 
14 |         def prefixed_lines():
15 |             for line in text.splitlines(True):
16 |                 yield (prefix + line if predicate(line) else line)
17 |         return ''.join(prefixed_lines())
18 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | Datatest was originally created at NCEC Services, LLC in 2014
 2 | by Shawn Brown as 'dataaudit'. In 2015 the project was largely
 3 | rewritten and renamed to 'datatest'.
 4 | 
 5 | Work-for-hire Contributors:
 6 | 
 7 |   * Shawn Brown <sbrown@ncec.org> (development lead)
 8 |   *
 9 | 
10 | Personal Contributors:
11 | 
12 |   * Shawn Brown <shawnbrown@users.noreply.github.com>
13 |   *
14 | 
15 | A big thank you goes out to:
16 | 
17 |     Heather Blum-Pastor for numerous ideas and feedback.
18 | 
19 |     Brian Fraher, Bilen Estephanos, and Eric Hawkins who helped spec-out
20 |     the initial API on our snowy train ride to New York City in February
21 |     of 2014.
22 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: docs/conf.py
11 | 
12 | # Build documentation with MkDocs
13 | #mkdocs:
14 | #  configuration: mkdocs.yml
15 | 
16 | # Optionally build your docs in additional formats such as PDF and ePub
17 | formats: all
18 | 
19 | # Optionally set the version of Python and requirements required to build your docs
20 | python:
21 |   version: 3
22 |   install:
23 |     - requirements: docs/requirements.txt
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2014 - 2021 National Committee for an Effective Congress,
 2 | NCEC Services LLC, and contributing authors
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use datatest except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | 


--------------------------------------------------------------------------------
/docs/_static/test_users.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from datatest import working_directory
 3 | from datatest import Select
 4 | from datatest import validate
 5 | 
 6 | 
 7 | @pytest.fixture(scope='module')
 8 | @working_directory(__file__)
 9 | def users():
10 |     return Select('users.csv')
11 | 
12 | 
13 | @pytest.mark.mandatory
14 | def test_columns(users):
15 |     validate(users.fieldnames, {'user_id', 'active'})
16 | 
17 | 
18 | def test_user_id(users):
19 | 
20 |     def is_wellformed(x):  # <- Helper function.
21 |         return x[:-1].isdigit() and x[-1:].isupper()
22 | 
23 |     validate(users('user_id'), is_wellformed)
24 | 
25 | 
26 | def test_active(users):
27 |     validate(users({'active'}), {'Y', 'N'})
28 | 


--------------------------------------------------------------------------------
/docs/intro/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. meta::
 3 |     :description: Table of Contents for Introduction.
 4 |     :keywords:
 5 |     :title: Introduction
 6 | 
 7 | .. sectionauthor:: Shawn Brown <sbrown@ncecservices.com>
 8 | 
 9 | 
10 | ############
11 | Introduction
12 | ############
13 | 
14 | .. epigraph::
15 | 
16 |     *"...tidy datasets are all alike but every messy dataset is messy
17 |     in its own way"*
18 |     ---Hadley Wickham [#f1]_
19 | 
20 | 
21 | .. toctree::
22 |     :maxdepth: 2
23 | 
24 |     tour-of-datatest
25 |     Automated Testing <automated-testing>
26 |     Pipeline Validation <pipeline-validation>
27 |     Validating Pandas <validating-pandas>
28 | 
29 | 
30 | .. [#f1] Wickham, Hadley. "Tidy Data." Journal of Statistical Software 59,
31 |          no. 10, August 2014.
32 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | matrix:
 4 |   include:
 5 |     - python: 3.10-dev
 6 |     - python: 3.9-dev
 7 |     - python: 3.8-dev
 8 |     - python: 3.7
 9 |     - python: 3.6
10 |     - python: 3.5
11 |     - python: 3.4
12 |     - python: 3.3
13 |       dist: trusty
14 |     - python: 3.2
15 |       dist: trusty
16 | #    - python: 3.1  # not currently supported by Travis CI
17 |     - python: 2.7
18 |     - python: 2.6
19 |       dist: trusty
20 |     - python: pypy3
21 |     - python: pypy
22 | 
23 | install: true
24 | #install:
25 | #  - pip install xlrd
26 | #  - pip install pandas
27 | 
28 | # command to run tests and check installation
29 | script:
30 |   - python setup.py test
31 |   - python -c 'import setuptools;print(setuptools.__version__)'
32 |   - python setup.py install
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files.
 2 | __pycache__/
 3 | *.pyc
 4 | *.pyo
 5 | *.pyd
 6 | 
 7 | # C extensions.
 8 | *.so
 9 | 
10 | # Distribution / packaging.
11 | .Python
12 | env/
13 | bin/
14 | build/
15 | develop-eggs/
16 | dist/
17 | eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # Manifest built with MANIFEST.in
28 | MANIFEST
29 | 
30 | # Installer logs.
31 | pip-log.txt
32 | pip-delete-this-directory.txt
33 | 
34 | # Unit test / coverage reports.
35 | htmlcov/
36 | .tox/
37 | .coverage
38 | .cache
39 | nosetests.xml
40 | coverage.xml
41 | 
42 | # Translations.
43 | *.mo
44 | 
45 | # Sphinx documentation.
46 | docs/_build/
47 | 
48 | # Environments
49 | .env
50 | .venv
51 | env/
52 | venv/
53 | ENV/
54 | env.bak/
55 | venv.bak/
56 | 


--------------------------------------------------------------------------------
/docs/_static/test_users_unit.py:
--------------------------------------------------------------------------------
 1 | from datatest import working_directory
 2 | from datatest import Select
 3 | from datatest import DataTestCase
 4 | from datatest import mandatory
 5 | 
 6 | 
 7 | def setUpModule():
 8 |     global users
 9 |     with working_directory(__file__):
10 |         users = Select('users.csv')
11 | 
12 | 
13 | class TestUserData(DataTestCase):
14 | 
15 |     @mandatory
16 |     def test_columns(self):
17 |         self.assertValid(users.fieldnames, {'user_id', 'active'})
18 | 
19 |     def test_user_id(self):
20 | 
21 |         def is_wellformed(x):  # <- Helper function.
22 |             return x[:-1].isdigit() and x[-1:].isupper()
23 | 
24 |         self.assertValid(users('user_id'), is_wellformed)
25 | 
26 |     def test_active(self):
27 |         self.assertValid(users({'active'}), {'Y', 'N'})
28 | 


--------------------------------------------------------------------------------
/docs/discussion/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. meta::
 3 |     :description: Table of contents for discussion documentation.
 4 |     :keywords:
 5 |     :title: Discussion
 6 | 
 7 | .. sectionauthor:: Shawn Brown <sbrown@ncecservices.com>
 8 | 
 9 | 
10 | ##########
11 | Discussion
12 | ##########
13 | 
14 | .. epigraph::
15 | 
16 |     *"The right information cannot be extracted from the wrong data."*
17 |     ---Russell Ackoff [#f1]_
18 | 
19 | 
20 | .. toctree::
21 |     :maxdepth: 2
22 | 
23 |     Organizing Tests <organizing-tests>
24 |     Tips and Tricks <tips-and-tricks>
25 |     data-preparation
26 | 
27 | ..
28 |     OMIT UNFINISHED PAGES:
29 |       validate-vs-accept
30 |       terminology
31 |       project-history
32 | 
33 | 
34 | .. [#f1] Ackoff, Russell L. "Ackoff's Best", New York: John Wiley & Sons, Inc.,
35 |          1999. p. 172.
36 | 


--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. meta::
 3 |     :description: Table of Contents for Reference.
 4 |     :keywords:
 5 |     :title: Reference
 6 | 
 7 | 
 8 | ####################################
 9 | Reference
10 | ####################################
11 | 
12 | .. epigraph::
13 | 
14 |     *"A tool is best if it does the job required with a minimum of
15 |     effort, with a minimum of complexity, and with a minimum of power."*
16 |     ---Peter Drucker [#f1]_
17 | 
18 | 
19 | .. toctree::
20 |     :maxdepth: 2
21 | 
22 |     Datatest Core <datatest-core>
23 |     Data Handling <data-handling>
24 |     unittest-support
25 | 
26 | See the :ref:`Package Index <genindex>` for a full list of classes
27 | and objects.
28 | 
29 | 
30 | .. [#f1] Drucker, Peter F. "Management: Tasks, Responsibilities, Practices",
31 |          New York: Harper & Row, 1973. p. 224.
32 | 


--------------------------------------------------------------------------------
/docs/discussion/validate-vs-accept.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | .. currentmodule:: datatest
 4 | 
 5 | .. meta::
 6 |     :description: A discussion about when it's appropriate to assert
 7 |                   data requirements and when it's appropriate to accept
 8 |                   deviations.
 9 |     :keywords: data, validation, quality, acceptance
10 | 
11 | 
12 | ########################
13 | Validation vs Acceptance
14 | ########################
15 | 
16 | ..
17 |     validate adherance to a lose requirement
18 |     or accept specified deviation
19 | 
20 |     what's the difference?
21 |     does it matter?
22 | 
23 |     quicker to validate loose requirement
24 |     than it is to generate a bunch of differences that must then be accepted
25 |     but unless executing time is prohibitive, favor semantic accuracy over
26 |     misleading-optimization
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/_static/excel_autoformat.csv:
--------------------------------------------------------------------------------
 1 | A,B
 2 | 106,ABY-22
 3 | 109,ACZ-31
 4 | 116,AFA-34
 5 | 129,AFV-02
 6 | 184,AFY-16
 7 | 191,AGF-30
 8 | 200,AGK-06
 9 | 204,AGW-29
10 | 244,AGZ-08
11 | 252,AHB-28
12 | 255,AIZ-04
13 | 256,ALE-49
14 | 284,AMR-41
15 | 292,AOJ-35
16 | 294,AOX-18
17 | 295,APR-10
18 | 298,AQV-25
19 | 314,ATF-21
20 | 325,AUP-48
21 | 333,AVV-32
22 | 342,AXB-44
23 | 361,AXP-47
24 | 385,APE-07
25 | 391,AZL-36
26 | 414,BAF-37
27 | 418,BES-24
28 | 429,BEW-17
29 | 430,BGO-39
30 | 442,BGW-42
31 | 454,BKE-45
32 | 461,BMO-46
33 | 511,BNT-03
34 | 569,BNW-05
35 | 591,BNX-27
36 | 622,BPD-12
37 | 635,BVD-26
38 | 691,BWP-38
39 | 692,CMO-40
40 | 703,CPX-14
41 | 725,CQO-09
42 | 746,CSA-11
43 | 792,CSD-15
44 | 810,CSN-13
45 | 819,CUT-19
46 | 836,CWK-43
47 | 874,CYL-23
48 | 887,DBB-01
49 | 895,DEC-20
50 | 906,DNZ-33
51 | 981,DVH-50
52 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_movies_df.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import pytest
 4 | import pandas as pd
 5 | import datatest as dt
 6 | 
 7 | 
 8 | @pytest.fixture(scope='module')
 9 | @dt.working_directory(__file__)
10 | def df():
11 |     return pd.read_csv('movies.csv')
12 | 
13 | 
14 | @pytest.mark.mandatory
15 | def test_columns(df):
16 |     dt.validate(
17 |         df.columns,
18 |         {'title', 'rating', 'year', 'runtime'},
19 |     )
20 | 
21 | 
22 | def test_title(df):
23 |     dt.validate.regex(df['title'], r'^[A-Z]')
24 | 
25 | 
26 | def test_rating(df):
27 |     dt.validate.superset(
28 |         df['rating'],
29 |         {'G', 'PG', 'PG-13', 'R', 'NC-17', 'Not Rated'},
30 |     )
31 | 
32 | 
33 | def test_year(df):
34 |     dt.validate(df['year'], int)
35 | 
36 | 
37 | def test_runtime(df):
38 |     dt.validate(df['runtime'], int)
39 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/movies.csv:
--------------------------------------------------------------------------------
 1 | title,rating,year,runtime
 2 | Almost Famous,R,2000,122
 3 | American Pie,R,1999,95
 4 | Back to the Future,PG,1985,116
 5 | Blade Runner,R,1982,117
 6 | Blood for Dracula,R,1974,106
 7 | Blue Velvet,R,1986,120
 8 | The Breakfast Club,R,1985,97
 9 | Clueless,PG-13,1995,97
10 | Cool Hand Luke,GP,1967,127
11 | The Craft,R,1996,101
12 | Doctor Zhivago,PG-13,1965,197
13 | el Topo,Not Rated,1970,125
14 | Evil Dead,NC-17,1981,85
15 | Ghostbusters,PG,1984,105
16 | Grease,PG-13,1978,110
17 | Heathers,R,1988,103
18 | Labyrinth,PG,1986,101
19 | The Lost Boys,R,1987,97
20 | Mean Girls,PG-13,2004,97
21 | Millennium Actress,PG,2001,87
22 | My Neighbor Totoro,G,1988,86
23 | Napoleon Dynamite,PG,2004,96
24 | Pee-wee's Big Adventure,PG,1985,91
25 | Pretty in Pink,PG-13,1986,97
26 | The Princess Bride,PG,1987,98
27 | Psycho,R,1960,109
28 | Stand by Me,R,1986,89
29 | Super 8,PG-13,2011,112
30 | superbad,R,2007,113
31 | WarGames,PG,1983,114
32 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_movies_df_unit.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import pandas as pd
 4 | import datatest as dt
 5 | 
 6 | 
 7 | def setUpModule():
 8 |     global df
 9 |     with dt.working_directory(__file__):
10 |         df = pd.read_csv('movies.csv')
11 | 
12 | 
13 | class TestMovies(dt.DataTestCase):
14 |     @dt.mandatory
15 |     def test_columns(self):
16 |         self.assertValid(
17 |             df.columns,
18 |             {'title', 'rating', 'year', 'runtime'},
19 |         )
20 | 
21 |     def test_title(self):
22 |         self.assertValidRegex(df['title'], r'^[A-Z]')
23 | 
24 |     def test_rating(self):
25 |         self.assertValidSuperset(
26 |             df['rating'],
27 |             {'G', 'PG', 'PG-13', 'R', 'NC-17', 'Not Rated'},
28 |         )
29 | 
30 |     def test_year(self):
31 |         self.assertValid(df['year'], int)
32 | 
33 |     def test_runtime(self):
34 |         self.assertValid(df['runtime'], int)
35 | 


--------------------------------------------------------------------------------
/tests/past_api07_sources_base.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from . import _unittest as unittest
 3 | 
 4 | from datatest.__past__.api07_sources import MinimalSource
 5 | from .mixins import OtherTests
 6 | from .mixins import CountTests
 7 | 
 8 | 
 9 | class TestBaseSource(OtherTests, unittest.TestCase):
10 |     fieldnames = ['label1', 'label2', 'value']
11 |     testdata = [['a', 'x', '17'],
12 |                 ['a', 'x', '13'],
13 |                 ['a', 'y', '20'],
14 |                 ['a', 'z', '15'],
15 |                 ['b', 'z', '5' ],
16 |                 ['b', 'y', '40'],
17 |                 ['b', 'x', '25']]
18 | 
19 |     def setUp(self):
20 |         self.datasource = MinimalSource(self.testdata, self.fieldnames)
21 | 
22 | 
23 | class TestDataSourceCount(CountTests, unittest.TestCase):
24 |     def setUp(self):
25 |         """Define self.datasource (base version uses MinimalSource)."""
26 |         self.datasource = MinimalSource(self.testdata, self.fieldnames)
27 | 


--------------------------------------------------------------------------------
/tests/past_api09.py:
--------------------------------------------------------------------------------
 1 | """Test API for 0.9.x compatibility."""
 2 | from . import _unittest as unittest
 3 | import datatest
 4 | from datatest.__past__ import api09  # <- MONKEY PATCH!!!
 5 | 
 6 | # IMPORT ADDITIONAL TESTS
 7 | #from .past_api09_query import *
 8 | 
 9 | 
10 | class TestSubsetAndSupersetMethods(unittest.TestCase):
11 |     """Semantics were inverted in the following version (0.10.x)."""
12 | 
13 |     def test_subset(self):
14 |         """Check old-style 0.9.x API validate.subset() behavior."""
15 |         data = ['A', 'B', 'C', 'D']
16 |         requirement = set(['A', 'B'])
17 |         datatest.validate.subset(data, requirement)
18 | 
19 |     def test_superset(self):
20 |         """Check old-style 0.9.x API validate.superset() behavior."""
21 |         data = ['A', 'B']
22 |         requirement = set(['A', 'B', 'C', 'D'])
23 |         datatest.validate.superset(data, requirement)
24 | 
25 | 
26 | if __name__ == '__main__':
27 |     unittest.main()
28 | else:
29 |     raise Exception('This test must be run directly or as a subprocess.')
30 | 


--------------------------------------------------------------------------------
/datatest/__init__.py:
--------------------------------------------------------------------------------
 1 | """Datatest: Test driven data-wrangling and data validation.
 2 | 
 3 | PYTEST_DONT_REWRITE
 4 | """
 5 | 
 6 | from __future__ import absolute_import
 7 | 
 8 | __version__ = '0.12.0.dev1'
 9 | 
10 | # Datatest Core API (__all__ property defined in submodules)
11 | from .validation import *   # Validation error and functions.
12 | from .differences import *  # Difference classes.
13 | from .acceptances import accepted
14 | from ._vendor.predicate import Predicate
15 | 
16 | # Pandas extensions.
17 | from ._pandas_integration import register_accessors
18 | 
19 | # Unittest-style API
20 | from .case import DataTestCase
21 | from .runner import mandatory
22 | from .runner import DataTestRunner
23 | from .main import DataTestProgram
24 | from .main import main
25 | 
26 | # Data Handling API
27 | from ._working_directory import working_directory
28 | from ._vendor.repeatingcontainer import RepeatingContainer
29 | 
30 | #############################################
31 | # Register traceback formatting handler.
32 | #############################################
33 | from . import _excepthook
34 | import sys as _sys
35 | _sys.excepthook = _excepthook.excepthook
36 | 


--------------------------------------------------------------------------------
/docs/how-to/run-tests.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. py:currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: How to run tests.
 6 |     :keywords: datatest, run, tests, unittest, pytest
 7 | 
 8 | 
 9 | ################
10 | How to Run Tests
11 | ################
12 | 
13 | ======
14 | Pytest
15 | ======
16 | 
17 | If you have a pytest style script named ``test_mydata.py``,
18 | you can run it by typing the following at the command line:
19 | 
20 | .. code-block:: console
21 | 
22 |     pytest test_mydata.py
23 | 
24 | You invoke pytest just as you would in any other circumstance---see
25 | pytest's standard |pytest-usage|_ for full details.
26 | 
27 | 
28 | ========
29 | Unittest
30 | ========
31 | 
32 | If you have a unittest style script named ``test_mydata.py``,
33 | you can run it by typing the following at the command line:
34 | 
35 | .. code-block:: console
36 | 
37 |     python -m datatest test_mydata.py
38 | 
39 | Datatest includes a unittest-style test runner that facilitates
40 | incremental testing. It runs tests in declaration order (i.e.,
41 | by line-number) and supports the :func:`@mandatory <mandatory>`
42 | decorator.
43 | 
44 | 
45 | ..
46 |   SUBSTITUTIONS:
47 | 
48 | .. |pytest-usage| replace:: Usage and Invocations
49 | .. _pytest-usage: https://docs.pytest.org/en/latest/usage.html
50 | 
51 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/contextlib.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for contextlib (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from contextlib import *
 4 | from . import functools
 5 | 
 6 | 
 7 | try:
 8 |     ContextDecorator  # New in Python 3.2
 9 | except NameError:
10 |     # Adapted from Python 3.6 standard libary.
11 |     class ContextDecorator(object):
12 |         def _recreate_cm(self):  # The `_recreate_cm` method is a private
13 |             return self          # interface for _GeneratorContextManager.
14 |                                  # See issue #11647 for details.
15 | 
16 |         def __call__(self, func):
17 |             @functools.wraps(func)
18 |             def inner(*args, **kwds):
19 |                 with self._recreate_cm():
20 |                     return func(*args, **kwds)
21 |             return inner
22 | 
23 | 
24 | try:
25 |     suppress  # New in Python 3.4
26 | except NameError:
27 |     # Adapted from Python 3.6 standard libary.
28 |     class suppress(object):
29 |         """Context manager to suppress specified exceptions."""
30 |         def __init__(self, *exceptions):
31 |             self._exceptions = exceptions
32 | 
33 |         def __enter__(self):
34 |             pass
35 | 
36 |         def __exit__(self, exctype, excinst, exctb):
37 |             return exctype is not None and issubclass(exctype, self._exceptions)
38 | 


--------------------------------------------------------------------------------
/docs/_static/theme_overrides.css:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Since themes can be loaded after this style sheet is applied, the
 3 |     declarations below should use the "!important" annotation so they
 4 |     will take precedence over corresponding declarations defined later.
 5 | */
 6 | 
 7 | 
 8 | /*
 9 |     In the sphinx_rtd_theme (version 0.4.2, as of this update), table
10 |     cells do not wrap text by default. This can make for unnecessarily
11 |     wide tables that scroll off the page. The following declarations
12 |     allow lines to wrap when the width is 445px or greater.
13 | 
14 |     This solution is adapted from ideas discussed on the following
15 |     issue:
16 | 
17 |         https://github.com/rtfd/sphinx_rtd_theme/issues/117
18 | */
19 | @media screen and (min-width: 445px) {
20 |     .wy-table-responsive table td {
21 |         white-space: normal !important;
22 |     }
23 |     .wy-table-responsive {
24 |         overflow: visible !important;
25 |     }
26 | }
27 | 
28 | 
29 | /*
30 |     The sphinx_rtd_theme (as of version 0.5.0) does not include styles
31 |     for "details" or "summary" elements.
32 | */
33 | details {
34 |     margin-bottom: 1em;
35 | }
36 | 
37 | summary {
38 |     margin-bottom: 1em;
39 |     cursor: pointer;
40 | }
41 | 
42 | summary:hover {
43 |     background: rgb(240, 240, 240); /* fallback if no "rgba" support */
44 |     background-color: rgba(0, 0, 0, 0.0625);
45 | }
46 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_country_of_birth.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from datatest import working_directory
 4 | from datatest import Select
 5 | from datatest import validate
 6 | from datatest import accepted
 7 | from datatest import Missing, Extra, Deviation, Invalid
 8 | 
 9 | 
10 | # Define fixtures.
11 | 
12 | @pytest.fixture(scope='module')
13 | @working_directory(__file__)
14 | def detail():
15 |     return Select('country_of_birth.csv')
16 | 
17 | 
18 | @pytest.fixture(scope='module')
19 | @working_directory(__file__)
20 | def summary():
21 |     return Select('estimated_totals.csv')
22 | 
23 | 
24 | # Begin tests.
25 | 
26 | @pytest.mark.mandatory
27 | def test_columns(detail, summary):
28 |     required_set = set(summary.fieldnames)
29 | 
30 |     validate(detail.fieldnames, required_set)
31 | 
32 | 
33 | def test_state_labels(detail, summary):
34 |     data = detail({'state/territory'})
35 |     requirement = summary({'state/territory'})
36 | 
37 |     validate(data, requirement)
38 | 
39 | 
40 | def test_population_format(detail):
41 |     data = detail({'population'})
42 | 
43 |     def integer_format(x):  # <- Helper function.
44 |         return str(x).isdecimal()
45 | 
46 |     validate(data, integer_format)
47 | 
48 | 
49 | def test_population_sums(detail, summary):
50 |     data = detail({'state/territory': 'population'}).sum()
51 |     requirement = summary({'state/territory': 'population'}).sum()
52 | 
53 |     validate(data, requirement)
54 | 


--------------------------------------------------------------------------------
/docs/_static/test_validation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import re
 3 | import datatest
 4 | 
 5 | 
 6 | class TestExample(datatest.DataTestCase):
 7 |     def test_membership_in_set(self):
 8 |         data = ['x', 'x', 'y', 'y', 'z', 'z']
 9 |         requirement = {'x', 'y', 'z'}  # <- set
10 |         self.assertValid(data, requirement)
11 | 
12 |     def test_function_returns_true(self):
13 |         data = ['X', 'X', 'Y', 'Y']
14 |         def requirement(x):  # <- callable (helper function)
15 |             return x.isupper()
16 |         self.assertValid(data, requirement)
17 | 
18 |     def test_regex_matches(self):
19 |         data = ['foo', 'foo', 'foo', 'bar', 'bar', 'bar']
20 |         requirement = re.compile('^\w\w\w$')  # <- regex object
21 |         self.assertValid(data, requirement)
22 | 
23 |     def test_equality(self):
24 |         data = ['x', 'x', 'x']
25 |         requirement = 'x'  # <- other (not container, callable, or regex)
26 |         self.assertValid(data, requirement)
27 | 
28 |     def test_order(self):
29 |         data = ['x', 'x', 'y', 'y', 'z', 'z']
30 |         requirement = ['x', 'x', 'y', 'y', 'z', 'z']  # <- sequence
31 |         self.assertValid(data, requirement)
32 | 
33 |     def test_mapping(self):
34 |         data = {'x': 'foo', 'y': 'bar'}
35 |         requirement = {'x': 'foo', 'y': 'bar'}  # <- mapping
36 |         self.assertValid(data, requirement)
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     datatest.main()
41 | 


--------------------------------------------------------------------------------
/docs/how-to/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. meta::
 3 |     :description: Table of Contents for How-to Guide.
 4 |     :keywords:
 5 |     :title: How-to Guide
 6 | 
 7 | .. py:currentmodule:: datatest
 8 | .. moduleauthor:: Shawn Brown <sbrown@ncecservices.com>
 9 | .. sectionauthor:: Shawn Brown <sbrown@ncecservices.com>
10 | 
11 | 
12 | ############
13 | How-to Guide
14 | ############
15 | 
16 | .. epigraph::
17 | 
18 |     *"Hell is other people's data."*
19 |     ---Jim Harris [#f1]_
20 | 
21 | 
22 | .. toctree::
23 |     :maxdepth: 1
24 | 
25 |     Install Datatest <install>
26 |     Get Started Testing <get-started>
27 |     Run Tests <run-tests>
28 |     Column Names <column-names>
29 |     Customize Differences <customize-differences>
30 |     Data Types <data-types>
31 |     Date and Time Strings <date-time-str>
32 |     Date and Time Objects <date-time-obj>
33 |     File Names <file-names>
34 |     Test File Properties <test-file-properties>
35 |     Excel Auto-Formatting <excel-auto-formatting>
36 |     Mailing Addresses <mail-address>
37 |     Fuzzy Matching <fuzzy-matching>
38 |     NaN Values <nan-values>
39 |     Negative Matches <negative-matches>
40 |     Outliers <outliers>
41 |     Phone Numbers <phone-numbers>
42 |     Re-order Acceptances <reorder-acceptances>
43 |     Sequences <sequences>
44 | 
45 | 
46 | .. [#f1] Harris, Jim. "Hell is other people’s data", OCDQ (blog), August 06, 2010,
47 |          Retrieved from http://www.ocdqblog.com/home/hell-is-other-peoples-data.html
48 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_country_of_birth_unit.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from datatest import working_directory
 4 | from datatest import Select
 5 | from datatest import DataTestCase
 6 | from datatest import mandatory
 7 | from datatest import Missing, Extra, Deviation, Invalid
 8 | 
 9 | 
10 | # Define fixtures.
11 | 
12 | def setUpModule():
13 |     global detail
14 |     global summary
15 | 
16 |     with working_directory(__file__):
17 |         detail = Select('country_of_birth.csv')
18 |         summary = Select('estimated_totals.csv')
19 | 
20 | 
21 | # Begin tests.
22 | 
23 | class TestPopulation(DataTestCase):
24 | 
25 |     @mandatory
26 |     def test_columns(self):
27 |         required_set = set(summary.fieldnames)
28 | 
29 |         self.assertValid(detail.fieldnames, required_set)
30 | 
31 |     def test_state_labels(self):
32 |         data = detail({'state/territory'})
33 |         requirement = summary({'state/territory'})
34 | 
35 |         self.assertValid(data, requirement)
36 | 
37 |     def test_population_format(self):
38 |         data = detail({'population'})
39 | 
40 |         def integer_format(x):  # <- Helper function.
41 |             return str(x).isdecimal()
42 | 
43 |         self.assertValid(data, integer_format)
44 | 
45 |     def test_population_sums(self):
46 |         data = detail({'state/territory': 'population'}).sum()
47 |         requirement = summary({'state/territory': 'population'}).sum()
48 | 
49 |         self.assertValid(data, requirement)
50 | 


--------------------------------------------------------------------------------
/docs/_static/mydata.csv:
--------------------------------------------------------------------------------
  1 | user_id,active
  2 | 999,Y
  3 | 1000,Y
  4 | 1001,N
  5 | 1002,N
  6 | 1003,Y
  7 | 1004,Y
  8 | 1005,Y
  9 | 1006,N
 10 | 1007,Y
 11 | 1008,Y
 12 | 1009,N
 13 | 1010,N
 14 | 1011,Y
 15 | 1012,Y
 16 | 1013,Y
 17 | 1014,Y
 18 | 1015,Y
 19 | 1016,Y
 20 | 1017,Y
 21 | 1018,Y
 22 | 1019,Y
 23 | 1020,Y
 24 | 1021,N
 25 | 1022,N
 26 | 1023,Y
 27 | 1024,N
 28 | 1025,Y
 29 | 1026,Y
 30 | 1027,Y
 31 | 1028,N
 32 | 1029,N
 33 | 1030,N
 34 | 1031,N
 35 | 1032,Y
 36 | 1033,Y
 37 | 1034,Y
 38 | 1035,N
 39 | 1036,Y
 40 | 1037,Y
 41 | 1038,Y
 42 | 1039,Y
 43 | 1040,N
 44 | 1041,Y
 45 | 1042,Y
 46 | 1043,Y
 47 | 1044,N
 48 | 1045,N
 49 | 1046,Y
 50 | 1047,Y
 51 | 1048,N
 52 | 1049,N
 53 | 1050,N
 54 | 1051,N
 55 | 1052,Y
 56 | 1053,Y
 57 | 1054,Y
 58 | 1055,Y
 59 | 1056,Y
 60 | 1057,Y
 61 | 1058,Y
 62 | 1059,Y
 63 | 1060,Y
 64 | 1061,Y
 65 | 1062,N
 66 | 1063,N
 67 | 1064,Y
 68 | 1065,Y
 69 | 1066,Y
 70 | 1067,Y
 71 | 1068,Y
 72 | 1069,Y
 73 | 1070,Y
 74 | 1071,Y
 75 | 1072,Y
 76 | 1073,Y
 77 | 1074,N
 78 | 1075,Y
 79 | 1076,Y
 80 | 1077,N
 81 | 1078,Y
 82 | 1079,Y
 83 | 1080,Y
 84 | 1081,N
 85 | 1082,Y
 86 | 1083,Y
 87 | 1084,N
 88 | 1085,N
 89 | 1086,Y
 90 | 1087,Y
 91 | 1088,Y
 92 | 1089,Y
 93 | 1090,Y
 94 | 1091,N
 95 | 1092,Y
 96 | 1093,N
 97 | 1094,N
 98 | 1095,Y
 99 | 1096,N
100 | 1097,Y
101 | 1098,Y
102 | 1099,N
103 | 1100,N
104 | 1101,Y
105 | 1102,Y
106 | 1103,Y
107 | 1104,Y
108 | 1105,Y
109 | 1106,N
110 | 1107,N
111 | 1108,Y
112 | 1109,Y
113 | 1110,Y
114 | 1111,N
115 | 1112,N
116 | 1113,Y
117 | 1114,Y
118 | 1115,Y
119 | 1116,Y
120 | 1117,Y
121 | 1118,N
122 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/collections/abc.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for collections.abc (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | try:
 4 |     from collections.abc import *  # New in 3.3
 5 | except ImportError:
 6 |     # Previously, the collection ABCs were in the root namespace.
 7 |     from collections import (
 8 |         Container,
 9 |         Hashable,
10 |         Iterable,
11 |         Iterator,
12 |         Sized,
13 |         Callable,
14 |         Sequence,
15 |         MutableSequence,
16 |         Set,
17 |         MutableSet,
18 |         Mapping,
19 |         MutableMapping,
20 |         MappingView,
21 |         KeysView,
22 |         ItemsView,
23 |         ValuesView,
24 |     )
25 | 
26 | 
27 | try:
28 |     Collection  # New in 3.6
29 | except NameError:
30 |     # Adapted from Python 3.6 standard library.
31 |     def _check_methods(C, *methods):
32 |         mro = C.__mro__
33 |         for method in methods:
34 |             for B in mro:
35 |                 if method in B.__dict__:
36 |                     if B.__dict__[method] is None:
37 |                         return NotImplemented
38 |                     break
39 |             else:
40 |                 return NotImplemented
41 |         return True
42 | 
43 | 
44 |     # Adapted from Python 3.6 standard library.
45 |     class Collection(Sized, Iterable, Container):
46 |         __slots__ = ()
47 | 
48 |         @classmethod
49 |         def __subclasshook__(cls, C):
50 |             if cls is Collection:
51 |                 return _check_methods(C, '__len__', '__iter__', '__contains__')
52 | 


--------------------------------------------------------------------------------
/tests/_contextlib.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for contextlib (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from contextlib import *
 4 | 
 5 | 
 6 | try:
 7 |     redirect_stderr  # New in 3.5
 8 | except NameError:
 9 |     # Adapted from Python 3.5 Standard Library.
10 |     import sys as _sys
11 |     class _RedirectStream:
12 |         _stream = None
13 | 
14 |         def __init__(self, new_target):
15 |             self._new_target = new_target
16 |             self._old_targets = []
17 | 
18 |         def __enter__(self):
19 |             self._old_targets.append(getattr(_sys, self._stream))
20 |             setattr(_sys, self._stream, self._new_target)
21 |             return self._new_target
22 | 
23 |         def __exit__(self, exctype, excinst, exctb):
24 |             setattr(_sys, self._stream, self._old_targets.pop())
25 | 
26 |     class redirect_stderr(_RedirectStream):
27 |         """Context manager for temporarily redirecting stderr to
28 |         another file.
29 |         """
30 |         _stream = 'stderr'
31 | 
32 | 
33 | try:
34 |     redirect_stdout  # New in 3.4
35 | except NameError:
36 |     class redirect_stdout(_RedirectStream):
37 |         """Context manager for temporarily redirecting stdout to
38 |         another file.
39 | 
40 |             # How to send help() to stderr
41 |             with redirect_stdout(sys.stderr):
42 |                 help(dir)
43 | 
44 |             # How to write help() to a file
45 |             with open('help.txt', 'w') as f:
46 |                 with redirect_stdout(f):
47 |                     help(pow)
48 |         """
49 |         _stream = 'stdout'
50 | 


--------------------------------------------------------------------------------
/datatest/__past__/api07_error.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import pprint
 3 | 
 4 | 
 5 | class DataError(AssertionError):
 6 |     """Raised when :meth:`assertValid` finds differences between *data*
 7 |     and *requirement*.
 8 |     """
 9 |     def __init__(self, msg, differences, subject=None, required=None):
10 |         """Initialize self, store *differences* for later reference."""
11 |         if not differences:
12 |             raise ValueError('Missing differences.')
13 |         self._differences = differences
14 |         self.msg = msg
15 |         self.subject = str(subject)    # Subject data source.
16 |         self.required = str(required)  # Required object or reference source.
17 |         self._verbose = False  # <- Set by DataTestResult if verbose.
18 | 
19 |         return AssertionError.__init__(self, msg)
20 | 
21 |     @property
22 |     def differences(self):
23 |         """An iterable (list or dict) of differences."""
24 |         return self._differences
25 | 
26 |     def __repr__(self):
27 |         return self.__class__.__name__ + ': ' + self.__str__()
28 | 
29 |     def __str__(self):
30 |         diff = pprint.pformat(self.differences, width=1)
31 |         if any([diff.startswith('{') and diff.endswith('}'),
32 |                 diff.startswith('[') and diff.endswith(']'),
33 |                 diff.startswith('(') and diff.endswith(')')]):
34 |             diff = diff[1:-1]
35 | 
36 |         if self._verbose:
37 |             msg_extras = '\n\nSUBJECT:\n{0}\nREQUIRED:\n{1}'
38 |             msg_extras = msg_extras.format(self.subject, self.required)
39 |         else:
40 |             msg_extras = ''
41 | 
42 |         return '{0}:\n {1}{2}'.format(self.msg, diff, msg_extras)
43 | 


--------------------------------------------------------------------------------
/docs/_static/users.csv:
--------------------------------------------------------------------------------
  1 | USER_ID,ACTIVE
  2 | 0999F,Y
  3 | 1000C,Y
  4 | 1001C,n
  5 | 1002A,n
  6 | 1003C,Y
  7 | 1004E,Y
  8 | 1005H,Y
  9 | 1006E,n
 10 | 1007H,Y
 11 | 1008A,Y
 12 | 1009F,n
 13 | 1010D,n
 14 | 1011H,Y
 15 | 1012H,Y
 16 | 1013E,Y
 17 | 1014D,Y
 18 | 1015C,Y
 19 | 1016H,Y
 20 | 1017G,Y
 21 | 1018A,Y
 22 | 1019H,Y
 23 | 1020E,Y
 24 | 1021H,n
 25 | 1022A,n
 26 | 1023B,Y
 27 | 1024D,n
 28 | 1025C,Y
 29 | 1026B,Y
 30 | 1027H,Y
 31 | 1028B,n
 32 | 1029A,n
 33 | 1030H,n
 34 | 1031A,n
 35 | 1032G,y
 36 | 1033H,y
 37 | 1034F,y
 38 | 1035F,n
 39 | 1036E,y
 40 | 1037E,y
 41 | 1038G,y
 42 | 1039G,y
 43 | 1040A,n
 44 | 1041A,Y
 45 | 1042H,Y
 46 | 1043B,Y
 47 | 1044G,n
 48 | 1045A,n
 49 | 1046A,Y
 50 | 1047H,Y
 51 | 1048D,n
 52 | 1049A,n
 53 | 1050H,n
 54 | 1051A,n
 55 | 1052E,Y
 56 | 1053A,Y
 57 | 1054G,Y
 58 | 1055C,Y
 59 | 1056a,Y
 60 | 1057F,Y
 61 | 1058D,Y
 62 | 1059H,Y
 63 | 1060A,YES
 64 | 1061D,YES
 65 | 1062E,NO
 66 | 1063C,NO
 67 | 1064H,YES
 68 | 1065A,YES
 69 | 1066F,YES
 70 | 1067A,YES
 71 | 1068F,YES
 72 | 1069D,YES
 73 | 1070H,YES
 74 | 1071E,YES
 75 | 1072G,YES
 76 | 1073B,YES
 77 | 1074B,NO
 78 | 1075B,Y
 79 | 1076A,Y
 80 | 1077A,n
 81 | 1078H,Y
 82 | 1079C,Y
 83 | 1080F,Y
 84 | 1081B,n
 85 | 1082F,Y
 86 | 1083F,Y
 87 | 1084F,n
 88 | 1085H,n
 89 | 1086G,Y
 90 | 1087C,Y
 91 | 1088A,Y
 92 | 1089A,Y
 93 | 1090E,Y
 94 | 1091B,n
 95 | 1092C,Y
 96 | 1093G,n
 97 | 1094B,n
 98 | 1095C,Y
 99 | 1096A,n
100 | 1097E,Y
101 | 1098C,Y
102 | 1099b,n
103 | 1100G,n
104 | 1101B,Y
105 | 1102C,Y
106 | 1103A,Y
107 | 1104H,Y
108 | 1105H,Y
109 | 1106A,n
110 | 1107E,n
111 | 1108E,Y
112 | 1109G,Y
113 | 1110B,Y
114 | 1111F,n
115 | 1112D,n
116 | 1113B,Y
117 | 1114H,Y
118 | 1115A,Y
119 | 1116B,Y
120 | 1117B,Y
121 | 1118D,n
122 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/modified_test_country_of_birth.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from datatest import working_directory
 4 | from datatest import Select
 5 | from datatest import validate
 6 | from datatest import accepted
 7 | from datatest import Missing, Extra, Deviation, Invalid
 8 | 
 9 | 
10 | # Define fixtures.
11 | 
12 | @pytest.fixture(scope='module')
13 | @working_directory(__file__)
14 | def detail():
15 |     return Select('country_of_birth.csv')
16 | 
17 | 
18 | @pytest.fixture(scope='module')
19 | @working_directory(__file__)
20 | def summary():
21 |     return Select('estimated_totals.csv')
22 | 
23 | 
24 | # Begin tests.
25 | 
26 | @pytest.mark.mandatory
27 | def test_columns(detail, summary):
28 |     required_set = set(summary.fieldnames)
29 | 
30 |     with accepted(Extra):
31 |         validate(detail.fieldnames, required_set)
32 | 
33 | 
34 | def test_state_labels(detail, summary):
35 |     data = detail({'state/territory'})
36 |     requirement = summary({'state/territory'})
37 | 
38 |     omitted_territory = accepted([
39 |         Missing('Jervis Bay Territory'),
40 |     ])
41 | 
42 |     with omitted_territory:
43 |         validate(data, requirement)
44 | 
45 | 
46 | def test_population_format(detail):
47 |     data = detail({'population'})
48 | 
49 |     def integer_format(x):  # <- Helper function.
50 |         return str(x).isdecimal()
51 | 
52 |     validate(data, integer_format)
53 | 
54 | 
55 | def test_population_sums(detail, summary):
56 |     data = detail({'state/territory': 'population'}).sum()
57 |     requirement = summary({'state/territory': 'population'}).sum()
58 | 
59 |     omitted_territory = accepted({
60 |         'Jervis Bay Territory': Missing(388),
61 |     })
62 | 
63 |     with accepted.percent(0.03) | omitted_territory:
64 |         validate(data, requirement)
65 | 


--------------------------------------------------------------------------------
/docs/how-to/negative-matches.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: How to validate negative matches.
 6 |     :keywords: datatest, negative match
 7 | 
 8 | 
 9 | ################################
10 | How to Validate Negative Matches
11 | ################################
12 | 
13 | Sometimes you want to check that data is **not** equal to a specific
14 | value. There are a few different ways to perform this type of negative
15 | matching.
16 | 
17 | 
18 | Helper Function
19 | ===============
20 | 
21 | One obvious way to check for a negative match is to define a helper
22 | function that checks for ``!=`` to a given value:
23 | 
24 | .. code-block:: python
25 |     :linenos:
26 | 
27 |     from datatest import validate
28 | 
29 |     data = [...]
30 | 
31 |     def not_bar(x):
32 |         return x != 'bar'
33 | 
34 |     validate(data, not_bar)
35 | 
36 | 
37 | Inverted Predicate
38 | ==================
39 | 
40 | Datatest provides a :class:`Predicate` class for handling different
41 | kinds of matching. You can invert a Predicate's behavior using the
42 | inversion operator, ``~``:
43 | 
44 | .. code-block:: python
45 |     :emphasize-lines: 4
46 |     :linenos:
47 | 
48 |     from datatest import validate, Predicate
49 | 
50 |     data = [...]
51 |     validate(data, ~Predicate('bar'))
52 | 
53 | 
54 | Functional Style
55 | ================
56 | 
57 | If you are accustomed to programming in a functional style, you
58 | could perform a negative match using :func:`functools.partial` and
59 | :func:`operator.ne`:
60 | 
61 | .. code-block:: python
62 |     :emphasize-lines: 6
63 |     :linenos:
64 | 
65 |     from functools import partial
66 |     from operator import ne
67 |     from datatest import validate
68 | 
69 |     data = [...]
70 |     validate(data, partial(ne, 'bar'))
71 | 
72 | 


--------------------------------------------------------------------------------
/run-tests.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #=======================================================================
 3 | #        FILE: run-tests.sh
 4 | # DESCRIPTION: Runs test suite under all supported versions of Python
 5 | #              and displays failures when encountered.
 6 | #=======================================================================
 7 | 
 8 | #-----------------------------------------------------------------------
 9 | # Define function (takes command to run as a single argument).
10 | #-----------------------------------------------------------------------
11 | run_command ()
12 | {
13 |     echo "" >&2
14 |     echo "======================================================================" >&2
15 |     echo "$1" >&2
16 |     echo "======================================================================" >&2
17 |     $1  # <- Run command.
18 |     if [ $? -ne 0 ]  # Check exit status of completed command.
19 |     then
20 |         echo "" >&2
21 |         echo "Failed Command: $1" >&2
22 |         echo "" >&2
23 |         exit $?  # <- EXIT!
24 |     fi
25 | }
26 | 
27 | #-----------------------------------------------------------------------
28 | # Run test suite in all supported versions of Python.
29 | #-----------------------------------------------------------------------
30 | run_command "python3.9 -B -m unittest $*"
31 | run_command "python3.8 -B -m unittest $*"
32 | run_command "python3.7 -B -m unittest $*"
33 | run_command "python3.6 -B -m unittest $*"
34 | run_command "python3.5 -B -m unittest $*"
35 | run_command "python3.4 -B -m unittest $*"
36 | #run_command "python3.3 -B -m unittest $*"
37 | #run_command "python3.2 -B -m unittest $*"
38 | #run_command "python3.1 -B tests/discover.py $*"
39 | run_command "python2.7 -B -m unittest discover $*"
40 | run_command "python2.6 -B tests/discover.py $*"
41 | 
42 | echo "" >&2
43 | echo "All commands successful." >&2
44 | 


--------------------------------------------------------------------------------
/datatest/_excepthook.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | from .validation import ValidationError
 4 | 
 5 | 
 6 | if sys.excepthook:
 7 |     existing_excepthook = sys.excepthook
 8 | else:
 9 |     existing_excepthook = sys.__excepthook__
10 | 
11 | 
12 | def _next_is_internal(tb):
13 |     """Return True if the next traceback refers to an internal part of
14 |     datatest.
15 |     """
16 |     tb_next = tb.tb_next
17 |     if not tb_next:
18 |         return False
19 |     return (tb_next.tb_frame.f_globals.get('__datatest', False)
20 |             or tb_next.tb_frame.f_globals.get('__unittest', False))
21 | 
22 | 
23 | def excepthook(err_type, err_value, err_traceback):
24 |     """Hide calls internal to datatest for ValidationError instances
25 |     and print traceback and exception to sys.stderr.
26 |     """
27 |     if not issubclass(err_type, ValidationError):
28 |         return existing_excepthook(err_type, err_value, err_traceback)
29 | 
30 |     try:
31 |         tb = err_traceback
32 |         while tb:
33 |             if _next_is_internal(tb):
34 |                 tb.tb_next = None  # <- Only settable in 3.7 and newer.
35 |                 break
36 |             tb = tb.tb_next
37 | 
38 |         existing_excepthook(err_type, err_value, err_traceback)
39 | 
40 |     except (AttributeError, TypeError):
41 |         # In older versions of Python, "tb_next" is a read-only attribute.
42 |         # Trying to set "tb_next" in versions 3.0 through 3.6 will raise an
43 |         # AttributeError whereas versions 2.7 and older will raise a TypeError.
44 |         limit = 1
45 |         tb = err_traceback
46 |         while tb:
47 |             if _next_is_internal(tb):
48 |                 break
49 |             limit += 1
50 |             tb = tb.tb_next
51 | 
52 |         import traceback
53 |         traceback.print_exception(err_type, err_value, err_traceback, limit)
54 | 
55 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/modified_test_country_of_birth_unit.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytest
 3 | from datatest import working_directory
 4 | from datatest import Select
 5 | from datatest import DataTestCase
 6 | from datatest import mandatory
 7 | from datatest import Missing, Extra, Deviation, Invalid
 8 | 
 9 | 
10 | # Define fixtures.
11 | 
12 | def setUpModule():
13 |     global detail
14 |     global summary
15 | 
16 |     with working_directory(__file__):
17 |         detail = Select('country_of_birth.csv')
18 |         summary = Select('estimated_totals.csv')
19 | 
20 | 
21 | # Begin tests.
22 | 
23 | class TestPopulation(DataTestCase):
24 | 
25 |     @mandatory
26 |     def test_columns(self):
27 |         required_set = set(summary.fieldnames)
28 | 
29 |         with self.accepted(Extra):
30 |             self.assertValid(detail.fieldnames, required_set)
31 | 
32 |     def test_state_labels(self):
33 |         data = detail({'state/territory'})
34 |         requirement = summary({'state/territory'})
35 | 
36 |         omitted_territory = self.accepted([
37 |             Missing('Jervis Bay Territory'),
38 |         ])
39 | 
40 |         with omitted_territory:
41 |             self.assertValid(data, requirement)
42 | 
43 |     def test_population_format(self):
44 |         data = detail({'population'})
45 | 
46 |         def integer_format(x):  # <- Helper function.
47 |             return str(x).isdecimal()
48 | 
49 |         self.assertValid(data, integer_format)
50 | 
51 |     def test_population_sums(self):
52 |         data = detail({'state/territory': 'population'}).sum()
53 |         requirement = summary({'state/territory': 'population'}).sum()
54 | 
55 |         omitted_territory = self.accepted({
56 |             'Jervis Bay Territory': Missing(388),
57 |         })
58 | 
59 |         with self.acceptedPercent(0.03) | omitted_territory:
60 |             self.assertValid(data, requirement)
61 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_intro1.py:
--------------------------------------------------------------------------------
 1 | """Example tests using pytest-style conventions."""
 2 | 
 3 | import re
 4 | from datatest import validate
 5 | 
 6 | 
 7 | def test_using_set():
 8 |     """Check for set membership."""
 9 |     data = ['A', 'B', 'A']
10 | 
11 |     requirement = {'A', 'B'}
12 | 
13 |     validate(data, requirement)
14 | 
15 | 
16 | def test_using_function():
17 |     """Check that function returns True."""
18 |     data = [2, 4, 6, 8]
19 | 
20 |     def is_even(x):
21 |         return x % 2 == 0
22 | 
23 |     validate(data, is_even)
24 | 
25 | 
26 | def test_using_type():
27 |     """Check that values are of the given type."""
28 |     data = [0.0, 1.0, 2.0]
29 | 
30 |     validate(data, float)
31 | 
32 | 
33 | def test_using_regex():
34 |     """Check that values match the given pattern."""
35 |     data = ['bake', 'cake', 'bake']
36 | 
37 |     regex = re.compile('[bc]ake')
38 | 
39 |     validate(data, regex)
40 | 
41 | 
42 | def test_using_string():
43 |     """Check that values equal the given string."""
44 |     data = ['foo', 'foo', 'foo']
45 | 
46 |     validate(data, 'foo')
47 | 
48 | 
49 | def test_using_tuple():
50 |     """Check that tuples of values satisfy corresponding tuple of
51 |     requirements.
52 |     """
53 |     data = [('A', 0.0), ('A', 1.0), ('A', 2.0)]
54 | 
55 |     requirement = ('A', float)
56 | 
57 |     validate(data, requirement)
58 | 
59 | 
60 | def test_using_dict():
61 |     """Check that values satisfy requirements of matching keys."""
62 |     data = {
63 |         'A': 100,
64 |         'B': 200,
65 |         'C': 300,
66 |     }
67 |     requirement = {
68 |         'A': 100,
69 |         'B': 200,
70 |         'C': 300,
71 |     }
72 |     validate(data, requirement)
73 | 
74 | 
75 | def test_using_list():
76 |     """Check that the order of values match the required sequence."""
77 |     data = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
78 | 
79 |     requirement = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
80 | 
81 |     validate(data, requirement)
82 | 


--------------------------------------------------------------------------------
/docs/discussion/organizing-tests.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: A discussion on organizing a data test suite.
 6 |     :keywords: data, testing, organizing, incremental, validation
 7 | 
 8 | 
 9 | #######################
10 | Organizing a Test Suite
11 | #######################
12 | 
13 | Unlike unit testing of software, it's oftentimes not possible to check
14 | data properties as independent "units" in isolation. Later tests often
15 | depend on the success of earlier ones. For example, it's not useful
16 | to try to check the datatype of an "account_id" column if there's
17 | no column of that name. And it might not be useful to sum the values
18 | in an "accounts_payable" column when the associated account IDs
19 | contain invalid datatypes.
20 | 
21 | Typically, data tests should be run sequentially where broader, general
22 | features are tested first and specific details are tested later (after
23 | their prerequisite tests have passed). This approach is called "top-down,
24 | incremental testing". You can use the following list as a rough guide
25 | of which features to check before others.
26 | 
27 | 
28 | Order to Check Features
29 | -----------------------
30 | 
31 | 1. data is accessible (by loading a file or connecting to a data source
32 |    via a fixture)
33 | 2. names of tables or worksheets (if applicable)
34 | 3. names of columns
35 | 4. categorical columns: controlled vocabulary, set membership, etc.
36 | 5. foreign-keys (if applicable)
37 | 6. well-formedness of text values: date formats, phone numbers, etc.
38 | 7. datatypes: int, float, datetime, etc.
39 | 8. constraints: uniqueness, minimum and maximum values, etc.
40 | 9. accuracy of quantitative columns: compare sums, counts, or averages
41 |    against known-good values
42 | 10. internal consistency, cross-column comparisons, etc.
43 | 
44 | 
45 | ..
46 |     updating for errors discovered later
47 |     don't just fix the data error and move on
48 |     instead, devise a test that fails, then fix
49 |     the data
50 | 
51 | 


--------------------------------------------------------------------------------
/docs/_static/test_errors.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import re
 3 | import datatest
 4 | 
 5 | 
 6 | class TestExample(datatest.DataTestCase):
 7 |     def test_membership_in_set(self):
 8 |         data = ['x', 'x2', 'y', 'y', 'z', 'z']
 9 |         required_elements = {'x', 'y', 'z'}
10 |         self.assertValid(data, required_elements)
11 | 
12 |     def test_function_returns_true(self):
13 |         data = ['X', 'X', 'Y', 'y']
14 |         def uppercase(x):
15 |             return x.isupper()
16 |         self.assertValid(data, uppercase)
17 | 
18 |     def test_regex_matches(self):
19 |         data = ['foo', 'foo', 'foo', 'bar', 'bar', 'xx']
20 |         three_letters = re.compile('^\w\w\w$')
21 |         self.assertValid(data, three_letters)
22 | 
23 |     def test_equality(self):
24 |         data = ['x', 'x', 'Y']
25 |         other_value = 'x'
26 |         self.assertValid(data, other_value)
27 | 
28 |     def test_order(self):
29 |         data = ['x', 'X', 'y', 'y', 'z', 'z']
30 |         my_sequence = ['x', 'x', 'y', 'y', 'z', 'z']
31 |         self.assertValid(data, my_sequence)
32 | 
33 |     def test_mapping1(self):
34 |         data = {
35 |             'x': 'foo',
36 |             'y': 'BAZ',
37 |         }
38 |         required_values = {
39 |             'x': 'foo',
40 |             'y': 'bar',
41 |         }
42 |         self.assertValid(data, required_values)
43 | 
44 |     def test_mapping2(self):
45 |         data = {
46 |             'x': 11,
47 |             'y': 13,
48 |         }
49 |         required_values = {
50 |             'x': 10,
51 |             'y': 15,
52 |         }
53 |         self.assertValid(data, required_values)
54 | 
55 |     def test_mapping3(self):
56 |         data = {
57 |             'x': 10,
58 |             'y': 15,
59 |             'z': 3000,
60 |         }
61 |         required_values = {
62 |             'x': 10,
63 |             'y': 15,
64 |             'z': 20,
65 |         }
66 |         self.assertValid(data, required_values)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     datatest.main()
71 | 


--------------------------------------------------------------------------------
/docs/how-to/reorder-acceptances.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. py:currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: How to re-order acceptances.
 6 |     :keywords: datatest, order of operations, acceptance, order
 7 | 
 8 | 
 9 | ###########################
10 | How to Re-Order Acceptances
11 | ###########################
12 | 
13 | Individual acceptances can be combined together to create new acceptances
14 | with narrower or broader criteria (see :ref:`composability-docs`).
15 | When acceptances are combined, their criteria are applied in an order
16 | determined by their scope. Element-wise criteria are applied first,
17 | group-wise criteria are applied second, and whole-error criteria are
18 | applied last (see :ref:`order-of-operations-docs`).
19 | 
20 | 
21 | Implicit Ordering
22 | -----------------
23 | 
24 | In this first example, we have a combined acceptance made from a
25 | whole-error acceptance, :func:`accepted.count`, and a group-wise
26 | acceptance, :func:`accepted([...]) <accepted>`:
27 | 
28 | .. code-block:: python
29 |     :linenos:
30 |     :lineno-start: 21
31 | 
32 |     with accepted.count(4) | accepted([Missing('A'), Missing('B')]):
33 |         ...
34 | 
35 | Since the :ref:`order-of-operations-docs` specifies that whole-error
36 | acceptances are applied *after* group-wise acceptances, the
37 | ``accepted.count(4)`` criteria is applied last even though it's
38 | defined first.
39 | 
40 | 
41 | Explicit Ordering
42 | -----------------
43 | 
44 | If you want to control this order explicitly, you can use nested
45 | ``with`` statements to change the default behavior:
46 | 
47 | .. code-block:: python
48 |     :linenos:
49 |     :lineno-start: 21
50 | 
51 |     with accepted([Missing('A'), Missing('B')]):
52 |         with accepted.count(4):
53 |             ...
54 | 
55 | Using nested ``with`` statements, the inner-most block is applied
56 | first and outer blocks are applied in order until the outer-most
57 | block is applied last. In this example, the ``accepted.count(4)``
58 | is applied first because it's declared in the inner-most block.
59 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_intro2.py:
--------------------------------------------------------------------------------
 1 | """Example of failing tests using pytest-style conventions."""
 2 | 
 3 | import re
 4 | from datatest import validate
 5 | from datatest import accepted
 6 | 
 7 | 
 8 | def test_using_set():
 9 |     """Check for set membership."""
10 |     data = ['A', 'B', 'C', 'D']
11 | 
12 |     requirement = {'A', 'B'}
13 | 
14 |     validate(data, requirement)
15 | 
16 | 
17 | def test_using_function():
18 |     """Check that function returns True."""
19 |     data = [2, 4, 6, 9]
20 | 
21 |     def is_even(x):
22 |         return x % 2 == 0
23 | 
24 |     validate(data, is_even)
25 | 
26 | 
27 | def test_using_type():
28 |     """Check that values are of the given type."""
29 |     data = [0.0, 1.0, 2]
30 | 
31 |     validate(data, float)
32 | 
33 | 
34 | def test_using_regex():
35 |     """Check that values match the given pattern."""
36 |     data = ['bake', 'cake', 'fake']
37 | 
38 |     regex = re.compile('[bc]ake')
39 | 
40 |     validate(data, regex)
41 | 
42 | 
43 | def test_using_string():
44 |     """Check that values equal the given string."""
45 |     data = ['foo', 'foo', 'bar']
46 | 
47 |     validate(data, 'foo')
48 | 
49 | 
50 | def test_using_tuple():
51 |     """Check that tuples of values satisfy corresponding tuple of
52 |     requirements.
53 |     """
54 |     data = [('A', 1.0), ('A', 2), ('B', 3.0)]
55 | 
56 |     requirement = ('A', float)
57 | 
58 |     validate(data, requirement)
59 | 
60 | 
61 | def test_using_dict():
62 |     """Check that values satisfy requirements of matching keys."""
63 |     data = {
64 |         'A': 100,
65 |         'B': 200,
66 |         'C': 299,
67 |         'D': 405,
68 |     }
69 |     requirement = {
70 |         'A': 100,
71 |         'B': 200,
72 |         'C': 300,
73 |         'D': 400,
74 |     }
75 |     validate(data, requirement)
76 | 
77 | 
78 | def test_using_list():
79 |     """Check that the order of values match the required sequence."""
80 |     data = ['A', 'D', 'XXX', 'YYY', 'E', 'ZZZ', 'G']
81 | 
82 |     requirement = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
83 | 
84 |     validate(data, requirement)
85 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/modified_country_of_birth.csv:
--------------------------------------------------------------------------------
 1 | state/territory,country_of_birth,population
 2 | Australian Capital Territory,Australia,270033
 3 | Australian Capital Territory,China,11351
 4 | Australian Capital Territory,England,12757
 5 | Australian Capital Territory,India,10414
 6 | Australian Capital Territory,New Zealand,4734
 7 | Australian Capital Territory,other/unknown,84310
 8 | Australian Capital Territory,Philippines,3798
 9 | New South Wales,Australia,4899090
10 | New South Wales,China,234508
11 | New South Wales,England,226564
12 | New South Wales,India,143459
13 | New South Wales,New Zealand,117136
14 | New South Wales,other/unknown,1772722
15 | New South Wales,Philippines,86749
16 | Northern Territory,Australia,157531
17 | Northern Territory,England,5583
18 | Northern Territory,Greece,1268
19 | Northern Territory,India,3598
20 | Northern Territory,New Zealand,4636
21 | Northern Territory,other/unknown,50303
22 | Northern Territory,Philippines,5914
23 | Queensland,Australia,3343657
24 | Queensland,China,47114
25 | Queensland,England,180775
26 | Queensland,India,49145
27 | Queensland,New Zealand,201206
28 | Queensland,other/unknown,841165
29 | Queensland,South Africa,40131
30 | South Australia,Australia,1192546
31 | South Australia,China,24610
32 | South Australia,England,97392
33 | South Australia,India,27594
34 | South Australia,Italy,18544
35 | South Australia,other/unknown,301630
36 | South Australia,Vietnam,14337
37 | Tasmania,Australia,411490
38 | Tasmania,China,3036
39 | Tasmania,England,18776
40 | Tasmania,Netherlands,2193
41 | Tasmania,New Zealand,4977
42 | Tasmania,other/unknown,67210
43 | Tasmania,Scotland,2283
44 | Victoria,Australia,3845493
45 | Victoria,China,160652
46 | Victoria,England,171443
47 | Victoria,India,169802
48 | Victoria,New Zealand,93253
49 | Victoria,other/unknown,1405194
50 | Victoria,Vietnam,80787
51 | Western Australia,Australia,1492842
52 | Western Australia,England,194163
53 | Western Australia,India,49385
54 | Western Australia,New Zealand,79221
55 | Western Australia,other/unknown,586956
56 | Western Australia,Philippines,30835
57 | Western Australia,South Africa,41008
58 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/country_of_birth.csv:
--------------------------------------------------------------------------------
 1 | state/territory,country_of_birth,pop
 2 | Australian Capital Territory,Australia,270033
 3 | Australian Capital Territory,China,11351
 4 | Australian Capital Territory,England,12757
 5 | Australian Capital Territory,India,10414
 6 | Australian Capital Territory,New Zealand,4734
 7 | Australian Capital Territory,other/unknown,84310
 8 | Australian Capital Territory,Philippines,3798
 9 | New South Wales,Australia,4899090
10 | New South Wales,China,234508
11 | New South Wales,England,226564
12 | New South Wales,India,143459
13 | New South Wales,New Zealand,117136
14 | New South Wales,other/unknown,1772722
15 | New South Wales,Philippines,86749
16 | Northern Territory,Australia,157531
17 | Northern Territory,England,5583
18 | Northern Territory,Greece,1268
19 | Northern Territory,India,3598
20 | Northern Territory,New Zealand,4636
21 | Northern Territory,other/unknown,50303
22 | Northern Territory,Philippines,5914
23 | Queensland,Australia,3343657
24 | Queensland,China,47114
25 | Queensland,England,180775
26 | Queensland,India,49145
27 | Queensland,New Zealand,201206
28 | Queensland,other/unknown,841165
29 | Queensland,South Africa,40131
30 | South Australia,Australia,1192546
31 | South Australia,China,24610
32 | South Australia,England,"England,97392"
33 | South Australia,India,27594
34 | South Australia,Italy,18544
35 | South Australia,other/unknown,301630
36 | South Australia,Vietnam,14337
37 | Tasmania,Australia,411490
38 | Tasmania,China,3036
39 | Tasmania,England,18776
40 | Tasmania,Netherlands,2193
41 | Tasmania,New Zealand,4977
42 | Tasmania,other/unknown,67210
43 | Tasmania,Scotland,2283
44 | Tasmania,SUBTOTAL,509965
45 | Victoria,Australia,3845493
46 | Victoria,China,160652
47 | Victoria,England,171443
48 | Victoria,India,169802
49 | Victoria,New Zealand,93253
50 | Victoria,other/unknown,1405194
51 | Victoria,Vietnam,80787
52 | Western Australia,Australia,1492842
53 | Western Australia,England,194163
54 | Western Australia,India,49385
55 | Western Australia,New Zealand,79221
56 | Western Australia,other/unknown,586956
57 | Western Australia,Philippines,30835
58 | Western Australia,South Africa,41008
59 | 


--------------------------------------------------------------------------------
/run-tests.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | REM ********************************************************************
 3 | REM        File: run-tests.bat
 4 | REM Description: Runs test suite under all supported versions of Python
 5 | REM              and displays failures when encountered.
 6 | REM ********************************************************************
 7 | 
 8 | GOTO:mainProgram
 9 | 
10 | REM ********************************************************************
11 | REM Define function (takes command to run as a single argument).
12 | REM ********************************************************************
13 | :runCommand
14 |     SETLOCAL & IF %GLOBAL_ERRORLEVEL% NEQ 0 ENDLOCAL & GOTO:EOF
15 |     ECHO.
16 |     ECHO ======================================================================
17 |     ECHO %~1
18 |     ECHO ======================================================================
19 |     CALL %~fs1
20 |     IF %ERRORLEVEL% NEQ 0 (
21 |         ECHO.
22 |         ECHO Failed Command: %~1
23 |     )
24 |     ENDLOCAL & SET GLOBAL_ERRORLEVEL=%ERRORLEVEL%
25 |     GOTO:EOF
26 | 
27 | 
28 | REM ********************************************************************
29 | REM Run test suite in all supported versions of Python.
30 | REM ********************************************************************
31 | :mainProgram
32 | 
33 | SET GLOBAL_ERRORLEVEL=0
34 | 
35 | CALL :runCommand "C:\Program Files\Python37\python.exe -B -m unittest %*"
36 | CALL :runCommand "C:\Program Files\Python 3.6\python.exe -B -m unittest %*"
37 | CALL :runCommand "C:\Program Files\Python 3.5\python.exe -B -m unittest %*"
38 | CALL :runCommand "C:\Python34\python.exe -B -m unittest %*"
39 | CALL :runCommand "C:\Python33\python.exe -B -m unittest %*"
40 | CALL :runCommand "C:\Python32\python.exe -B -m unittest %*"
41 | CALL :runCommand "C:\Python31\python.exe -B tests/discover.py %*"
42 | CALL :runCommand "C:\Python27\python.exe -B -m unittest discover %*"
43 | CALL :runCommand "C:\Python26\python.exe -B tests/discover.py %*"
44 | 
45 | IF %GLOBAL_ERRORLEVEL% EQU 0 (
46 |     ECHO.
47 |     ECHO All commands successful.
48 | )
49 | 


--------------------------------------------------------------------------------
/tests/past_api07_sources_excel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | import os
 4 | from . import _unittest as unittest
 5 | from .mixins import OtherTests
 6 | from .mixins import CountTests
 7 | 
 8 | try:
 9 |     import xlrd
10 | except ImportError:
11 |     xlrd = None
12 | 
13 | from datatest.__past__.api07_sources import ExcelSource
14 | 
15 | workbook_path = os.path.join(
16 |     os.path.dirname(__file__),
17 |     'sample_files',
18 |     'test_sources_excel.xlsx',
19 | )
20 | 
21 | 
22 | @unittest.skipUnless(xlrd, 'requires xlrd')
23 | class TestExcelSource(OtherTests, unittest.TestCase):
24 |     def setUp(self):
25 |         global workbook_path
26 |         self.datasource = ExcelSource(workbook_path)  # <- Defaults to "Sheet 1"
27 | 
28 | 
29 | @unittest.skipUnless(xlrd, 'requires xlrd')
30 | class TestExcelSourceCount(unittest.TestCase):
31 | #class TestExcelSourceCount(CountTests, unittest.TestCase):
32 |     def setUp(self):
33 |         global workbook_path
34 |         self.datasource = ExcelSource(workbook_path, 'count_data')
35 | 
36 |     def test_count(self):
37 |         count = self.datasource.count
38 | 
39 |         self.assertEqual(9, count('label1'))
40 | 
41 |         expected = {'a': 4, 'b': 5}
42 |         result = count('label1', ['label1'])
43 |         self.assertEqual(expected, result)
44 | 
45 |         expected = {'a': 3, 'b': 3}  # Counts only truthy values (not '' or None).
46 |         result = count('label2', ['label1'])
47 |         self.assertEqual(expected, result)
48 | 
49 |         expected = {
50 |             ('a', 'x'): 2,
51 |             ('a', 'y'): 1,
52 |             ('a', ''): 1,
53 |             ('b', 'z'): 1,
54 |             ('b', 'y'): 1,
55 |             ('b', 'x'): 1,
56 |             #('b', None): 1,  # <- None value has no equivalent in XLSX file.
57 |             #('b', ''): 1,
58 |             ('b', ''): 2,
59 |         }
60 |         result = count('label1', ['label1', 'label2'])
61 |         self.assertEqual(expected, result)
62 | 
63 |         expected = {'x': 2, 'y': 1, '': 1}
64 |         result = count('label1', 'label2', label1='a')
65 |         self.assertEqual(expected, result)
66 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_intro1_unit.py:
--------------------------------------------------------------------------------
 1 | """Example tests using unittest-style conventions."""
 2 | 
 3 | import re
 4 | import datatest
 5 | 
 6 | 
 7 | class ExampleTests(datatest.DataTestCase):
 8 | 
 9 |     def test_using_set(self):
10 |         """Check for set membership."""
11 |         data = ['A', 'B', 'A']
12 | 
13 |         requirement = {'A', 'B'}
14 | 
15 |         self.assertValid(data, requirement)
16 | 
17 |     def test_using_function(self):
18 |         """Check that function returns True."""
19 |         data = [2, 4, 6, 8]
20 | 
21 |         def is_even(x):
22 |             return x % 2 == 0
23 | 
24 |         self.assertValid(data, is_even)
25 | 
26 |     def test_using_type(self):
27 |         """Check that values are of the given type."""
28 |         data = [0.0, 1.0, 2.0]
29 | 
30 |         self.assertValid(data, float)
31 | 
32 |     def test_using_regex(self):
33 |         """Check that values match the given pattern."""
34 |         data = ['bake', 'cake', 'bake']
35 | 
36 |         regex = re.compile('[bc]ake')
37 | 
38 |         self.assertValid(data, regex)
39 | 
40 |     def test_using_string(self):
41 |         """Check that values equal the given string."""
42 |         data = ['foo', 'foo', 'foo']
43 | 
44 |         self.assertValid(data, 'foo')
45 | 
46 |     def test_using_tuple(self):
47 |         """Check that tuples of values satisfy corresponding tuple of
48 |         requirements.
49 |         """
50 |         data = [('A', 0.0), ('A', 1.0), ('A', 2.0)]
51 | 
52 |         requirement = ('A', float)
53 | 
54 |         self.assertValid(data, requirement)
55 | 
56 |     def test_using_dict(self):
57 |         """Check that values satisfy requirements of matching keys."""
58 |         data = {
59 |             'A': 100,
60 |             'B': 200,
61 |             'C': 300,
62 |         }
63 |         requirement = {
64 |             'A': 100,
65 |             'B': 200,
66 |             'C': 300,
67 |         }
68 |         self.assertValid(data, requirement)
69 | 
70 |     def test_using_list(self):
71 |         """Check that the order of values match the required sequence."""
72 |         data = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
73 | 
74 |         requirement = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
75 | 
76 |         self.assertValid(data, requirement)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     datatest.main()
81 | 


--------------------------------------------------------------------------------
/docs/_static/tutorial/test_intro2_unit.py:
--------------------------------------------------------------------------------
 1 | """Example of failing tests using unittest-style conventions."""
 2 | 
 3 | import re
 4 | import datatest
 5 | 
 6 | 
 7 | class ExampleTests(datatest.DataTestCase):
 8 |     def test_using_set(self):
 9 |         """Check for set membership."""
10 |         data = ['A', 'B', 'C', 'D']
11 | 
12 |         requirement = {'A', 'B'}
13 | 
14 |         self.assertValid(data, requirement)
15 | 
16 |     def test_using_function(self):
17 |         """Check that function returns True."""
18 |         data = [2, 4, 6, 9]
19 | 
20 |         def is_even(x):
21 |             return x % 2 == 0
22 | 
23 |         self.assertValid(data, is_even)
24 | 
25 |     def test_using_type(self):
26 |         """Check that values are of the given type."""
27 |         data = [0.0, 1.0, 2]
28 | 
29 |         self.assertValid(data, float)
30 | 
31 |     def test_using_regex(self):
32 |         """Check that values match the given pattern."""
33 |         data = ['bake', 'cake', 'fake']
34 | 
35 |         regex = re.compile('[bc]ake')
36 | 
37 |         self.assertValid(data, regex)
38 | 
39 |     def test_using_string(self):
40 |         """Check that values equal the given string."""
41 |         data = ['foo', 'foo', 'bar']
42 | 
43 |         self.assertValid(data, 'foo')
44 | 
45 |     def test_using_tuple(self):
46 |         """Check that tuples of values satisfy corresponding tuple of
47 |         requirements.
48 |         """
49 |         data = [('A', 1.0), ('A', 2), ('B', 3.0)]
50 | 
51 |         requirement = ('A', float)
52 | 
53 |         self.assertValid(data, requirement)
54 | 
55 |     def test_using_dict(self):
56 |         """Check that values satisfy requirements of matching keys."""
57 |         data = {
58 |             'A': 100,
59 |             'B': 200,
60 |             'C': 299,
61 |             'D': 405,
62 |         }
63 |         requirement = {
64 |             'A': 100,
65 |             'B': 200,
66 |             'C': 300,
67 |             'D': 400,
68 |         }
69 |         self.assertValid(data, requirement)
70 | 
71 |     def test_using_list(self):
72 |         """Check that the order of values match the required sequence."""
73 |         data = ['A', 'D', 'XXX', 'YYY', 'E', 'ZZZ', 'G']
74 | 
75 |         requirement = ['A', 'B', 'C', 'D', 'E', 'F', 'G']
76 | 
77 |         self.assertValid(data, requirement)
78 | 
79 | 
80 | if __name__ == '__main__':
81 |     datatest.main()
82 | 


--------------------------------------------------------------------------------
/tests/test_past_subprocesses.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Test backwards compatibility modules using separate subprocesses."""
 3 | import subprocess
 4 | import sys
 5 | 
 6 | from datatest._compatibility import textwrap
 7 | from . import _unittest as unittest
 8 | from .common import ignore_deprecations
 9 | 
10 | 
11 | @ignore_deprecations
12 | class TestBackwardsCompatibility(unittest.TestCase):
13 |     def assertSubprocess(self, module):
14 |         """Run given *module* in separate process--fails if return code
15 |         indicates an error.
16 |         """
17 |         command = [sys.executable, '-B', '-O', '-m', module]
18 |         p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
19 |         stdout_bytes, stderr_bytes = p.communicate()  # Closes file-like object.
20 | 
21 |         # A non-zero return code indicates the command was not successful.
22 |         if p.returncode != 0:
23 |             output = stdout_bytes + stderr_bytes      # Get all output.
24 |             output = output.decode('utf-8')           # Convert bytes to str.
25 |             output = textwrap.wrap(output, width=70)  # Get list of wrapped lines.
26 |             output = '\n'.join(output)                # Join list items as str.
27 |             output = textwrap.indent(output, '    ')  # Indent lines by 4 spaces.
28 | 
29 |             msg = '\n'.join([
30 |                 'Subprocess failed:',
31 |                 output,
32 |                 '',
33 |                 'To run this test directly, use the following command:',
34 |                 ' '.join(command),
35 |             ])
36 |             self.fail(msg)
37 | 
38 |     def test_api00(self):
39 |         """Test compatibility with pre-release alpha API."""
40 |         self.assertSubprocess('tests.past_api00')
41 | 
42 |     def test_api06(self):
43 |         """Test compatibility with first development-release API."""
44 |         self.assertSubprocess('tests.past_api06')
45 | 
46 |     def test_api07(self):
47 |         """Test compatibility with second development-release API."""
48 |         self.assertSubprocess('tests.past_api07')
49 | 
50 |     def test_api08(self):
51 |         """Test compatibility with version 0.8 API."""
52 |         self.assertSubprocess('tests.past_api08')
53 | 
54 |     def test_api09(self):
55 |         """Test compatibility with version 0.9 API."""
56 |         self.assertSubprocess('tests.past_api09')
57 | 
58 | 
59 | if __name__ == '__main__':
60 |     unittest.main()
61 | 


--------------------------------------------------------------------------------
/tests/past_api07_error.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from . import _unittest as unittest
 3 | 
 4 | from datatest.__past__.api07_diffs import xMissing
 5 | from datatest.__past__.api07_error import DataError
 6 | 
 7 | 
 8 | class TestDataError(unittest.TestCase):
 9 |     def test_subclass(self):
10 |         self.assertTrue(issubclass(DataError, AssertionError))
11 | 
12 |     def test_instantiation(self):
13 |         DataError('column names', xMissing('foo'))
14 |         DataError('column names', [xMissing('foo')])
15 |         DataError('column names', {'foo': xMissing('bar')})
16 |         DataError('column names', {('foo', 'bar'): xMissing('baz')})
17 | 
18 |         with self.assertRaises(ValueError, msg='Empty error should raise exception.'):
19 |             DataError(msg='', differences={})
20 | 
21 |     def test_repr(self):
22 |         error = DataError('different columns', [xMissing('foo')])
23 |         pattern = "DataError: different columns:\n xMissing('foo')"
24 |         self.assertEqual(repr(error), pattern)
25 | 
26 |         error = DataError('different columns', xMissing('foo'))
27 |         pattern = "DataError: different columns:\n xMissing('foo')"
28 |         self.assertEqual(repr(error), pattern)
29 | 
30 |         # Test pprint lists.
31 |         error = DataError('different columns', [xMissing('foo'),
32 |                                                 xMissing('bar')])
33 |         pattern = ("DataError: different columns:\n"
34 |                    " xMissing('foo'),\n"
35 |                    " xMissing('bar')")
36 |         self.assertEqual(repr(error), pattern)
37 | 
38 |         # Test dictionary.
39 |         error = DataError('different columns', {'FOO': xMissing('bar')})
40 |         pattern = ("DataError: different columns:\n"
41 |                    " 'FOO': xMissing('bar')")
42 |         self.assertEqual(repr(error), pattern)
43 | 
44 |     def test_verbose_repr(self):
45 |         reference = 'reference-data-source'
46 |         subject = 'subject-data-source'
47 |         error = DataError('different columns', [xMissing('foo')], subject, reference)
48 |         error._verbose = True  # <- Set verbose flag, here!
49 | 
50 |         pattern = ("DataError: different columns:\n"
51 |                    " xMissing('foo')\n"
52 |                    "\n"
53 |                    "SUBJECT:\n"
54 |                    "subject-data-source\n"
55 |                    "REQUIRED:\n"
56 |                    "reference-data-source")
57 |         self.assertEqual(repr(error), pattern)
58 | 


--------------------------------------------------------------------------------
/docs/how-to/get-started.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: How to get started.
 6 |     :keywords: datatest, example, getting started
 7 | 
 8 | 
 9 | ###############################
10 | How to Get Started With Testing
11 | ###############################
12 | 
13 | Once you have reviewed the tutorials and have a basic understanding
14 | of datatest, you should be ready to start testing your own data.
15 | 
16 | 
17 | =========================================
18 | 1. Create a File and Add Some Sample Code
19 | =========================================
20 | 
21 | A simple way to get started is to create a **.py** file in the same folder
22 | as the data you want to test. It's a good idea to follow established testing
23 | conventions and make sure your filename starts with "**test\_**".
24 | 
25 | Then, copy one of following the **pytest** or **unittest** code samples
26 | to use as a template for writing your own tests:
27 | 
28 | .. raw:: html
29 | 
30 |    <details>
31 |    <summary><a>Pytest Samples</a></summary>
32 | 
33 | .. include:: ../intro/automated-testing.rst
34 |     :start-after: start-inclusion-marker-pytestsamples
35 |     :end-before: end-inclusion-marker-pytestsamples
36 | 
37 | .. raw:: html
38 | 
39 |    </details>
40 | 
41 | 
42 | .. raw:: html
43 | 
44 |    <details>
45 |    <summary><a>Unittest Samples</a></summary>
46 | 
47 | .. include:: ../intro/automated-testing.rst
48 |     :start-after: start-inclusion-marker-unittestsamples
49 |     :end-before: end-inclusion-marker-unittestsamples
50 | 
51 | .. raw:: html
52 | 
53 |    </details>
54 | 
55 | 
56 | ==========================================
57 | 2. Adapt the Sample Code to Suit Your Data
58 | ==========================================
59 | 
60 | After copying the sample code into your own file, begin adapting
61 | it to suit your data:
62 | 
63 | 1. Change the fixture to use your data (instead of "example.csv").
64 | 2. Update the set in ``test_column_names()`` to require the names your
65 |    data should contain (instead of "A", "B", and "C").
66 | 3. Rename ``test_a()`` and change it to check values in one of the
67 |    columns in your data.
68 | 4. Add more tests appropriate for your own data requirements.
69 | 
70 | 
71 | ===================================
72 | 3. Refactor Your Tests as They Grow
73 | ===================================
74 | 
75 | As your tests grow, look to structure them into related groups. Start
76 | by creating separate classes to contain groups of related test cases.
77 | And as you develop more and more classes, create separate modules to
78 | hold groups of related classes. If you are using ``pytest``, move your
79 | fixtures into a ``conftest.py`` file.
80 | 


--------------------------------------------------------------------------------
/tests/common.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import glob
 3 | import os
 4 | import shutil
 5 | import sys
 6 | import tempfile
 7 | import warnings
 8 | from functools import wraps
 9 | 
10 | from . import _io as io
11 | from . import _unittest as unittest
12 | 
13 | 
14 | class MkdtempTestCase(unittest.TestCase):
15 |     # TestCase changes cwd to temporary location.  After testing,
16 |     # removes files and restores original cwd.
17 |     @classmethod
18 |     def setUpClass(cls):
19 |         cls._orig_dir = os.getcwd()
20 |         cls._temp_dir = tempfile.mkdtemp()  # Requires mkdtemp--cannot
21 | 
22 |     @classmethod
23 |     def tearDownClass(cls):
24 |         os.rmdir(cls._temp_dir)
25 | 
26 |     def setUp(self):
27 |         os.chdir(self._temp_dir)
28 | 
29 |     def tearDown(self):
30 |         for path in glob.glob(os.path.join(self._temp_dir, '*')):
31 |             if os.path.isdir(path):
32 |                 shutil.rmtree(path)
33 |             else:
34 |                 os.remove(path)
35 |         os.chdir(self._orig_dir)
36 | 
37 | 
38 | def ignore_deprecations(obj):
39 |     """A class and function decorator to ignore DeprecationWarnings."""
40 |     def decorate(func):
41 |         @wraps(func)
42 |         def wrapper(*args, **kwds):
43 |             with warnings.catch_warnings():
44 |                 warnings.simplefilter('ignore', DeprecationWarning)
45 |                 return func(*args, **kwds)
46 |         return wrapper
47 | 
48 |     if isinstance(obj, type):
49 |         # If object is a class, decorate its methods.
50 |         for key, val in obj.__dict__.items():
51 |             if callable(val):
52 |                 setattr(obj, key, decorate(val))
53 |     else:
54 |         # Else decorate the object itself.
55 |         obj = decorate(obj)
56 | 
57 |     return obj
58 | 
59 | 
60 | try:
61 |     unittest.TestCase.setUpClass  # New in 2.7
62 | except AttributeError:
63 |     _MkdtempTestCase = MkdtempTestCase
64 |     class MkdtempTestCase(_MkdtempTestCase):
65 |         def setUp(self):
66 |             self.setUpClass.__func__(self)
67 |             _MkdtempTestCase.setUp(self)
68 | 
69 |         def tearDown(self):
70 |             _MkdtempTestCase.tearDown(self)
71 |             self.tearDownClass.__func__(self)
72 | 
73 | 
74 | def make_csv_file(fieldnames, datarows):
75 |     """Helper function to make CSV file-like object using *fieldnames*
76 |     (a list of field names) and *datarows* (a list of lists containing
77 |     the row values).
78 |     """
79 |     init_string = []
80 |     init_string.append(','.join(fieldnames)) # Concat cells into row.
81 |     for row in datarows:
82 |         row = [str(cell) for cell in row]
83 |         init_string.append(','.join(row))    # Concat cells into row.
84 |     init_string = '\n'.join(init_string)     # Concat rows into final string.
85 |     return io.StringIO(init_string)
86 | 


--------------------------------------------------------------------------------
/tests/test_pandas_integration.py:
--------------------------------------------------------------------------------
 1 | """Tests for Pandas accessor extensions."""
 2 | from . import _unittest as unittest
 3 | 
 4 | try:
 5 |     import pandas
 6 | except ImportError:
 7 |     pandas = None
 8 | 
 9 | from datatest import Invalid
10 | from datatest import ValidationError
11 | from datatest import register_accessors
12 | 
13 | 
14 | @unittest.skipUnless(pandas, 'requires pandas')
15 | class TestAccessorExtensions(unittest.TestCase):
16 |     """Test Pandas accessors."""
17 |     def setUp(self):          # Change to `setUpClass` when dropping
18 |         register_accessors()  # support for Python 2.6 and 3.1.
19 |         self.df = pandas.DataFrame(
20 |             data=[(1, 'x'), (2, 'y'), (3, 'z')],
21 |             columns=['A', 'B'],
22 |         )
23 | 
24 |     def test_dataframe_success(self):
25 |         # Should pass without error on success.
26 |         self.df.validate((int, str))
27 | 
28 |     def test_dataframe_failure(self):
29 |         with self.assertRaises(ValidationError) as cm:
30 |             is_odd = lambda x: x % 2 == 1
31 |             self.df.validate((is_odd, str))
32 | 
33 |         actual = cm.exception.differences
34 |         expected = [Invalid((2, 'y'))]
35 |         self.assertEqual(actual, expected)
36 | 
37 |     def test_series_success(self):
38 |         # Should pass without error on success.
39 |         self.df.columns.validate.order(['A', 'B'])  # Columns are a Series
40 |         self.df['A'].validate(int)  # A selected Series of column values.
41 | 
42 |     def test_series_failure(self):
43 |         with self.assertRaises(ValidationError) as cm:
44 |             is_odd = lambda x: x % 2 == 1
45 |             self.df['A'].validate(is_odd)
46 | 
47 |         self.assertEqual(cm.exception.differences, [Invalid(2)])
48 | 
49 |     def test_index_success(self):
50 |         # Should pass without error on success.
51 |         self.df.index.validate(int)
52 | 
53 |     def test_index_failure(self):
54 |         with self.assertRaises(ValidationError) as cm:
55 |             is_odd = lambda x: x % 2 == 1
56 |             self.df.index.validate(is_odd)
57 | 
58 |         actual = cm.exception.differences
59 |         expected = [Invalid(0), Invalid(2)]
60 |         self.assertEqual(actual, expected)
61 | 
62 |     def test_multiindex_success(self):
63 |         # Should pass without error on success.
64 |         multi_index = pandas.MultiIndex.from_arrays(
65 |             [[1, 1, 2], ['A', 'B', 'C']],
66 |             names=('number', 'letter')
67 |         )
68 |         multi_index.validate((int, str))
69 | 
70 |     def test_multiindex_failure(self):
71 |         multi_index = pandas.MultiIndex.from_arrays(
72 |             [[1, 1, 2], ['A', 'B', 'C']],
73 |             names=('number', 'letter')
74 |         )
75 |         with self.assertRaises(ValidationError) as cm:
76 |             is_odd = lambda x: x % 2 == 1
77 |             multi_index.validate((is_odd, str))
78 | 
79 |         self.assertEqual(cm.exception.differences, [Invalid((2, 'C'))])
80 | 


--------------------------------------------------------------------------------
/docs/_ext/autodoc_classinstance.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """Sphinx Extension: autodoc_classinstance (written by Shawn Brown)."""
 4 | from sphinx.domains.python import PyClasslike
 5 | from sphinx.ext.autodoc import ClassDocumenter
 6 | from sphinx.ext.autodoc import MethodDocumenter
 7 | from sphinx.util import inspect
 8 | 
 9 | 
10 | class PyClassInstance(PyClasslike):
11 |     """
12 |     Description of a class-instance object.
13 |     """
14 |     def get_signature_prefix(self, sig):
15 |         return ''  # Omit "class" prefix for instances.
16 | 
17 | 
18 | class ClassInstanceDocumenter(ClassDocumenter):
19 |     """
20 |     Specialized Documenter subclass for class instances.
21 |     """
22 |     objtype = 'classinstance'
23 | 
24 |     @classmethod
25 |     def can_document_member(cls, member, membername, isattr, parent):
26 |         return not isinstance(member, type)
27 | 
28 |     def import_object(self):
29 |         ret = super().import_object()
30 |         self.doc_as_attr = False  # never document as a data/attribute
31 |         return ret
32 | 
33 |     def format_args(self):
34 |         # for instances, the relevant signature is the __call__ method's
35 |         callmeth = self.get_attr(self.object, '__call__', None)
36 |         if callmeth:
37 |             sig = inspect.Signature(callmeth, bound_method=True, has_retval=True)
38 |             return sig.format_args()
39 |         return None
40 | 
41 | 
42 | class AlternateMethodDocumenter(MethodDocumenter):
43 |     """
44 |     Alternative documenter for methods of classes and class instances.
45 |     """
46 |     def add_directive_header(self, sig):
47 |         if isinstance(self.parent, type):
48 |             # If parent is a class definition, then add header as normal.
49 |             super(AlternateMethodDocumenter, self).add_directive_header(sig)
50 |         else:
51 |             # When parent is an instance, then add a special header
52 |             # (calls superclass' superclass method).
53 |             super(MethodDocumenter, self).add_directive_header(sig)
54 | 
55 |             # Tag async methods but do not tag abstract, class, or
56 |             # static methods.
57 |             parentclass = self.parent.__class__
58 |             obj = parentclass.__dict__.get(self.object_name, self.object)
59 |             if inspect.iscoroutinefunction(obj):
60 |                 sourcename = self.get_sourcename()
61 |                 self.add_line('   :async:', sourcename)
62 | 
63 | 
64 | def setup(app):
65 |     app.add_directive('classinstance', PyClassInstance)
66 |     app.add_directive('py:classinstance', PyClassInstance)
67 |     app.add_autodocumenter(ClassInstanceDocumenter)
68 | 
69 |     # If sphinx.ext.autosummary is used, it will override the
70 |     # existing autodocumenters on the 'builder-inited' event.
71 |     # Adding AlternateMethodDocumenter after this event makes
72 |     # sure it isn't overridden.
73 |     def add_method_documenter(app, env, docnames):
74 |         app.add_autodocumenter(AlternateMethodDocumenter)
75 |     app.connect('env-before-read-docs', add_method_documenter)
76 | 


--------------------------------------------------------------------------------
/datatest/__past__/api09.py:
--------------------------------------------------------------------------------
 1 | """Backward compatibility for version 0.9 API."""
 2 | from __future__ import absolute_import
 3 | 
 4 | import datatest
 5 | from datatest._compatibility.collections.abc import Mapping
 6 | from datatest._compatibility.collections.abc import Set
 7 | from datatest._normalize import normalize
 8 | from datatest._utils import IterItems
 9 | 
10 | 
11 | class RequiredSubset_090(datatest.requirements.GroupRequirement):
12 |     """Implements inverted subset behavior from 0.9.x API."""
13 |     def __init__(self, requirement):
14 |         if not isinstance(requirement, Set):
15 |             requirement = set(requirement)
16 |         self._set = requirement
17 | 
18 |     def check_group(self, group):
19 |         missing = self._set.copy()
20 |         for element in group:
21 |             if not missing:
22 |                 break
23 |             missing.discard(element)
24 | 
25 |         differences = (Missing(element) for element in missing)
26 |         description = 'must contain all elements of given requirement'
27 |         return differences, description
28 | 
29 | 
30 | class RequiredSuperset_090(datatest.requirements.GroupRequirement):
31 |     """Implements inverted superset behavior from 0.9.x API."""
32 | 
33 |     def __init__(self, requirement):
34 |         if not isinstance(requirement, Set):
35 |             requirement = set(requirement)
36 |         self._set = requirement
37 | 
38 |     def check_group(self, group):
39 |         superset = self._set
40 |         extras = set()
41 |         for element in group:
42 |             if element not in superset:
43 |                 extras.add(element)
44 | 
45 |         differences = (Extra(element) for element in extras)
46 |         description = 'may only contain elements of given requirement'
47 |         return differences, description
48 | 
49 | 
50 | 
51 | class ValidateType(datatest.validation.ValidateType):
52 |     def subset(self, data, requirement, msg=None):
53 |         """Implements API 0.9.x subset behavior."""
54 |         __tracebackhide__ = datatest.validation._pytest_tracebackhide
55 | 
56 |         requirement = normalize(requirement, lazy_evaluation=False, default_type=set)
57 | 
58 |         if isinstance(requirement, (Mapping, IterItems)):
59 |             factory = RequiredSubset_090
60 |             requirement = datatest.requirements.RequiredMapping(requirement, factory)
61 |         else:
62 |             requirement = RequiredSubset_090(requirement)
63 | 
64 |         self(data, requirement, msg=msg)
65 | 
66 |     def superset(self, data, requirement, msg=None):
67 |         """Implements API 0.9.x superset behavior."""
68 |         __tracebackhide__ = datatest.validation._pytest_tracebackhide
69 | 
70 |         requirement = normalize(requirement, lazy_evaluation=False, default_type=set)
71 | 
72 |         if isinstance(requirement, (Mapping, IterItems)):
73 |             factory = RequiredSuperset_090
74 |             requirement = datatest.requirements.RequiredMapping(requirement, factory)
75 |         else:
76 |             requirement = RequiredSuperset_090(requirement)
77 | 
78 |         self(data, requirement, msg=msg)
79 | 
80 | 
81 | datatest.validate = ValidateType()
82 | 


--------------------------------------------------------------------------------
/datatest/__past__/api00.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """Backwards compatibility for version 0.6.0.dev0 API."""
 3 | from __future__ import absolute_import
 4 | import datatest
 5 | from datatest.__past__ import api08
 6 | from datatest.__past__ import api07
 7 | from datatest.__past__ import api06
 8 | from datatest import DataTestCase
 9 | 
10 | datatest.DataAssertionError = datatest.__past__.api07_error.DataError
11 | 
12 | # Acceptances.
13 | DataTestCase.allowSpecified = DataTestCase.allowOnly
14 | DataTestCase.allowUnspecified = DataTestCase.allowAny
15 | DataTestCase.allowDeviationPercent = DataTestCase.allowPercentDeviation
16 | 
17 | # Assertions.
18 | from .api06 import _assertDataCount
19 | DataTestCase.assertValueCount = _assertDataCount
20 | 
21 | DataTestCase.assertColumnSet = DataTestCase.assertSubjectColumns
22 | DataTestCase.assertValueSet = DataTestCase.assertSubjectSet
23 | DataTestCase.assertValueSum = DataTestCase.assertSubjectSum
24 | DataTestCase.assertValueRegex = DataTestCase.assertSubjectRegex
25 | DataTestCase.assertValueNotRegex = DataTestCase.assertSubjectNotRegex
26 | 
27 | 
28 | def _assertColumnSubset(self, ref=None, msg=None):
29 |     """Test that the set of subject columns is a subset of reference
30 |     columns.  If *ref* is provided, it is used in-place of the set
31 |     from ``referenceData``.
32 |     """
33 |     try:
34 |         self.assertColumnSet(ref, msg)
35 |     except datatest.DataAssertionError:
36 |         with self.allowMissing():
37 |             self.assertColumnSet(ref, msg)
38 | 
39 | DataTestCase.assertColumnSubset = _assertColumnSubset
40 | 
41 | 
42 | def _assertColumnSuperset(self, ref=None, msg=None):
43 |     """Test that the set of subject columns is a superset of reference
44 |     columns.  If *ref* is provided, it is used in-place of the set
45 |     from ``referenceData``.
46 |     """
47 |     try:
48 |         self.assertColumnSet(ref, msg)
49 |     except datatest.DataAssertionError:
50 |         with self.allowExtra():
51 |             self.assertColumnSet(ref, msg)
52 | 
53 | DataTestCase.assertColumnSuperset = _assertColumnSuperset
54 | 
55 | 
56 | def _assertValueSubset(self, column, ref=None, msg=None, **filter_by):
57 |     """Test that the set of subject values is a subset of reference
58 |     values for the given *column*.  If *ref* is provided, it is used
59 |     in place of the set from ``referenceData``.
60 |     """
61 |     try:
62 |         self.assertValueSet(column, ref, msg, **filter_by)
63 |     except datatest.DataAssertionError:
64 |         with self.allowMissing():
65 |             self.assertValueSet(column, ref, msg, **filter_by)
66 | 
67 | DataTestCase.assertValueSubset = _assertValueSubset
68 | 
69 | 
70 | def _assertValueSuperset(self, column, ref=None, msg=None, **filter_by):
71 |     """Test that the set of subject values is a superset of reference
72 |     values for the given *column*.  If *ref* is provided, it is used
73 |     in place of the set from ``referenceData``.
74 |     """
75 |     try:
76 |         self.assertValueSet(column, ref, msg, **filter_by)
77 |     except datatest.DataAssertionError:
78 |         with self.allowExtra():
79 |             self.assertValueSet(column, ref, msg, **filter_by)
80 | 
81 | DataTestCase.assertValueSuperset = _assertValueSuperset
82 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | :tocdepth: 2
 2 | 
 3 | .. meta::
 4 |     :description: Datatest introduction and table of contents.
 5 |     :keywords: data cleaning, data quality, etl testing, data validation, data testing, data preparation, python, datatest
 6 |     :title: Datatest: Test driven data-wrangling and data validation.
 7 | 
 8 | .. module:: datatest
 9 |     :synopsis: Test driven data-wrangling and data validation.
10 | .. moduleauthor:: Shawn Brown <sbrown@ncecservices.com>
11 | .. sectionauthor:: Shawn Brown <sbrown@ncecservices.com>
12 | 
13 | 
14 | ########################################################
15 | Datatest: Test driven data-wrangling and data validation
16 | ########################################################
17 | 
18 | 
19 | .. include:: ../README.rst
20 |     :start-after: start-inclusion-marker-badge-substitutions
21 |     :end-before: end-inclusion-marker-badge-substitutions
22 | 
23 | |licensebadge| |pythonbadge| |requiresbadge| |releasebadge| |repobadge|
24 | 
25 | 
26 | Datatest helps to speed up and formalize data-wrangling and data
27 | validation tasks. It was designed to work with poorly formatted
28 | data by detecting and describing validation failures.
29 | 
30 | * |Validate| the format, type, set membership, and more from a variety of data
31 |   sources including pandas ``DataFrames`` and ``Series``, NumPy ``ndarrays``,
32 |   built-in data structures, etc.
33 | * Smart |comparison behavior| applies the appropriate validation method for
34 |   a given data requirement.
35 | * Automatic |data handling| manages the validation of single elements,
36 |   sequences, sets, dictionaries, and other containers of elements.
37 | * |Difference objects| characterize the discrepancies and deviations
38 |   between a dataset and its requirements.
39 | * |Acceptance managers| distinguish between ideal criteria and acceptable
40 |   differences.
41 | 
42 | .. |Validate| replace:: :ref:`Validate <intro-validation>`
43 | .. |comparison behavior| replace:: :ref:`comparison behavior <intro-smart-comparisons>`
44 | .. |data handling| replace:: :ref:`data handling <intro-automatic-data-handling>`
45 | .. |Difference objects| replace:: :ref:`Difference objects <intro-difference-objects>`
46 | .. |Acceptance managers| replace:: :ref:`Acceptance managers <intro-acceptance-managers>`
47 | 
48 | 
49 | **Test driven data-wrangling** is a process for taking data from a source
50 | of unverified quality or format and producing a verified, well-formatted
51 | dataset. It repurposes software testing practices for data preparation
52 | and quality assurance projects. **Pipeline validation** monitors the status
53 | and quality of data as it passes through a pipeline and identifies *where*
54 | in a pipeline an error occurs.
55 | 
56 | See the project `README <https://pypi.org/project/datatest/>`_ file for
57 | full details regarding supported versions, backward compatibility, and
58 | more.
59 | 
60 | 
61 | =================
62 | Table of Contents
63 | =================
64 | 
65 | .. toctree::
66 |     :caption: Documentation
67 |     :hidden:
68 | 
69 |     Home <self>
70 | 
71 | 
72 | .. toctree::
73 |     :maxdepth: 2
74 | 
75 |     intro/index
76 |     how-to/index
77 |     reference/index
78 |     discussion/index
79 | 
80 | ..
81 |     OMIT UNFINISHED PAGES:
82 |       tutorial/index
83 | 
84 | 


--------------------------------------------------------------------------------
/datatest/_working_directory.py:
--------------------------------------------------------------------------------
 1 | """working_directory context manager."""
 2 | 
 3 | import os
 4 | from ._compatibility import contextlib
 5 | 
 6 | 
 7 | class working_directory(contextlib.ContextDecorator):
 8 |     """A context manager to temporarily set the working directory
 9 |     to a given *path*. If *path* specifies a file, the file's
10 |     directory is used. When exiting the with-block, the working
11 |     directory is automatically changed back to its previous
12 |     location.
13 | 
14 |     **Context Manager:**
15 | 
16 |         You can use Python's :py:obj:`__file__` constant to load data
17 |         relative to a file's current directory:
18 | 
19 |         .. code-block:: python
20 |             :emphasize-lines: 4
21 | 
22 |             from datatest import working_directory
23 |             import pandas as pd
24 | 
25 |             with working_directory(__file__):
26 |                 my_df = pd.read_csv('myfile.csv')
27 | 
28 |     **Decorator:**
29 | 
30 |         This context manager can also be used as a decorator:
31 | 
32 |         .. code-block:: python
33 |             :emphasize-lines: 4
34 | 
35 |             from datatest import working_directory
36 |             import pandas as pd
37 | 
38 |             @working_directory(__file__)
39 |             def my_df():
40 |                 return pd.read_csv('myfile.csv')
41 | 
42 |     **Explicit Control:**
43 | 
44 |         In some cases, you may want to forgo the use of a context manager
45 |         or decorator. You can explicitly control directory switching with
46 |         the ``change()`` and ``revert()`` methods:
47 | 
48 |         .. code-block:: python
49 |             :emphasize-lines: 4,8
50 | 
51 |             from datatest import working_directory
52 | 
53 |             work_dir = working_directory(__file__)
54 |             work_dir.change()
55 | 
56 |             ...
57 | 
58 |             work_dir.revert()
59 |     """
60 |     def __init__(self, path):
61 |         if os.path.isfile(path):
62 |             path = os.path.dirname(path)
63 |         self._working_dir = os.path.abspath(path)
64 |         self._original_dir = None  # Assigned on __enter__(), not before.
65 | 
66 |     def __enter__(self):
67 |         if self._original_dir:
68 |             msg = 'cannot reenter {0}, already entered from {1!r}'.format(
69 |                 self.__class__.__name__,
70 |                 self._original_dir,
71 |             )
72 |             raise RuntimeError(msg)
73 | 
74 |         self._original_dir = os.path.abspath(os.getcwd())
75 |         os.chdir(self._working_dir)
76 | 
77 |     def __exit__(self, exc_type, exc_value, traceback):
78 |         if self._original_dir:
79 |             os.chdir(self._original_dir)
80 |             self._original_dir = None
81 | 
82 |     def change(self):
83 |         """Change to the defined working directory (enter the context).
84 | 
85 |         While operating in a working directory context, you cannot
86 |         enter it again. Calling ``change()`` a second time will raise
87 |         a :py:class:`RuntimeError`---you must first call ``revert()``.
88 |         """
89 |         self.__enter__()
90 | 
91 |     def revert(self):
92 |         """Revert to the original working directory (exit the context).
93 | 
94 |         If no context has been entered, calling ``revert()`` will do
95 |         nothing and pass without error.
96 |         """
97 |         self.__exit__(None, None, None)
98 | 


--------------------------------------------------------------------------------
/docs/discussion/data-preparation.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. meta::
 3 |     :description: A discussion about the need for a structured approach
 4 |                   to data preparation and data-wrangling.
 5 |     :keywords: data preparation, test driven, data-wrangling, structured,
 6 |                data science
 7 | 
 8 | 
 9 | ################
10 | Data Preparation
11 | ################
12 | 
13 | In the practice of data science, data preparation is a huge part of
14 | the job. Practitioners often spend 50 to 80 percent of their time
15 | wrangling data [#f1]_ [#f2]_ [#f3]_ [#f4]_.  This critically important
16 | phase is time-consuming, unglamorous, and often poorly structured.
17 | 
18 | The :mod:`datatest` package was created to support test driven
19 | data-wrangling and provide a disciplined approach to an otherwise
20 | messy process.
21 | 
22 | A datatest suite can facilitate quick edit-test cycles to help guide
23 | the selection, cleaning, integration, and formatting of data. Data tests
24 | can also help to automate check-lists, measure progress, and promote
25 | best practices.
26 | 
27 | 
28 | **************************
29 | Test Driven Data-Wrangling
30 | **************************
31 | 
32 | When data is messy, poorly structured, or uses an incompatible format,
33 | it's oftentimes not possible to prepare it using an automated process.
34 | There are a multitude of ways for messy data to counfound a processing
35 | system or schema. Dealing with data like this requires a data-wrangling
36 | approach where users are actively involved with making decisions and
37 | judgment calls about cleaning and formatting the data.
38 | 
39 | A well-structured suite of data tests can serve as a template to guide
40 | the data-wrangling process. Using a quick edit-test cycle, users can:
41 | 
42 |  1. focus on a failing test
43 |  2. make change to the data or the test
44 |  3. re-run the suite to check that the test now passes
45 |  4. then, move on to the next failing test
46 | 
47 | The work of cleaning and formatting data takes place outside of the
48 | datatest package itself.  Users can work with with the tools they find
49 | the most productive (Excel, `pandas <http://pandas.pydata.org/>`_, R,
50 | sed, etc.).
51 | 
52 | 
53 | .. rubric:: Footnotes
54 | 
55 | .. [#f1] "Data scientists, according to interviews and expert estimates, spend
56 |          from 50 percent to 80 percent of their time mired in this more mundane
57 |          labor of collecting and preparing unruly digital data..." Steve Lohraug
58 |          in *For Big-Data Scientists, 'Janitor Work' Is Key Hurdle to Insights*.
59 |          Retrieved from http://www.nytimes.com/2014/08/18/technology/for-big-data-scientists-hurdle-to-insights-is-janitor-work.html
60 | 
61 | .. [#f2] "This [data preparation step] has historically taken the largest part
62 |          of the overall time in the data mining solution process, which in some
63 |          cases can approach 80% of the time." *Dynamic Warehousing: Data Mining
64 |          Made Easy* (p. 19)
65 | 
66 | .. [#f3] Online poll of data mining practitioners: `See image <../_static/data_prep_poll.png>`_,
67 |         *Data preparation (Oct 2003)*.
68 |         Retrieved from http://www.kdnuggets.com/polls/2003/data_preparation.htm
69 |         [While this poll is quite old, the situation has not changed
70 |         drastically.]
71 | 
72 | .. [#f4] "As much as 80% of KDD is about preparing data, and the remaining 20%
73 |          is about mining." *Data Mining for Design and Manufacturing* (p. 44)
74 | 


--------------------------------------------------------------------------------
/datatest/__past__/load_csv.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import warnings
 3 | from .._utils import exhaustible
 4 | from .._utils import seekable
 5 | from .._utils import file_types
 6 | from .get_reader import get_reader
 7 | from .temptable import load_data
 8 | from .temptable import savepoint
 9 | 
10 | 
11 | preferred_encoding = 'utf-8'
12 | fallback_encoding = ['latin-1']
13 | 
14 | 
15 | def load_csv(cursor, table, csvfile, encoding=None, **kwds):
16 |     """Load *csvfile* and insert data into *table*."""
17 |     global preferred_encoding
18 |     global fallback_encoding
19 | 
20 |     default = kwds.get('restval', '')  # Used for default column value.
21 | 
22 |     if encoding:
23 |         # When an encoding is specified, use it to load *csvfile* or
24 |         # fail if there are errors (no fallback recovery):
25 |         with savepoint(cursor):
26 |             reader = get_reader.from_csv(csvfile, encoding, **kwds)
27 |             load_data(cursor, table, reader, default=default)
28 | 
29 |         return  # <- EXIT!
30 | 
31 |     # When the encoding is unspecified, try to load *csvfile* using the
32 |     # preferred encoding and failing that, try the fallback encodings:
33 | 
34 |     if isinstance(csvfile, file_types) and seekable(csvfile):
35 |         position = csvfile.tell()  # Get current position if
36 |     else:                          # csvfile is file-like and
37 |         position = None            # supports random access.
38 | 
39 |     try:
40 |         with savepoint(cursor):
41 |             reader = get_reader.from_csv(csvfile, preferred_encoding, **kwds)
42 |             load_data(cursor, table, reader, default=default)
43 | 
44 |         return  # <- EXIT!
45 | 
46 |     except UnicodeDecodeError as orig_error:
47 |         if exhaustible(csvfile) and position is None:
48 |             encoding, object_, start, end, reason = orig_error.args  # Unpack args.
49 |             reason = (
50 |                 '{0}: unable to load {1!r}, cannot attempt fallback with '
51 |                 '{2!r} type: must specify an appropriate text encoding'
52 |             ).format(reason, csvfile, csvfile.__class__.__name__)
53 |             raise UnicodeDecodeError(encoding, object_, start, end, reason)
54 | 
55 |         if isinstance(fallback_encoding, list):
56 |             fallback_list = fallback_encoding
57 |         else:
58 |             fallback_list = [fallback_encoding]
59 | 
60 |         for fallback in fallback_list:
61 |             if position is not None:
62 |                 csvfile.seek(position)
63 | 
64 |             try:
65 |                 with savepoint(cursor):
66 |                     reader = get_reader.from_csv(csvfile, fallback, **kwds)
67 |                     load_data(cursor, table, reader, default=default)
68 | 
69 |                 msg = (
70 |                     '{0}: loaded {1!r} using fallback {2!r}: specify an '
71 |                     'appropriate text encoding to assure correct operation'
72 |                 ).format(orig_error, csvfile, fallback)
73 |                 warnings.warn(msg)
74 | 
75 |                 return  # <- EXIT!
76 | 
77 |             except UnicodeDecodeError:
78 |                 pass
79 | 
80 |         # Note: DO NOT refactor this section using a for-else. I swear...
81 |         encoding, object_, start, end, reason = orig_error.args  # Unpack args.
82 |         reason = (
83 |             '{0}: unable to load {1!r}, fallback recovery unsuccessful: '
84 |             'must specify an appropriate text encoding'
85 |         ).format(reason, csvfile)
86 |         raise UnicodeDecodeError(encoding, object_, start, end, reason)
87 | 


--------------------------------------------------------------------------------
/tests/past_api07_sources_pandas.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from . import _unittest as unittest
 3 | from .mixins import CountTests
 4 | from .mixins import OtherTests
 5 | 
 6 | from datatest.__past__.api07_sources import PandasSource
 7 | from datatest.__past__.api07_sources import _version_info
 8 | 
 9 | 
10 | ########################################################################
11 | # Test version parsing and import ``pandas`` if available.
12 | ########################################################################
13 | class TestVersionInfo(unittest.TestCase):
14 |     def test_public_version(self):
15 |         public_version = '0.19.2'
16 |         info_tuple = _version_info(public_version)
17 |         self.assertEqual(info_tuple, (0, 19, 2))
18 | 
19 |     def test_local_version(self):
20 |         """Version items after a "+" are considered "local" version
21 |         identifiers (see PEP 440).
22 |         """
23 |         local_version = '0.19.2+0.g825876c.dirty'
24 |         info_tuple = _version_info(local_version)
25 |         self.assertEqual(info_tuple, (0, 19, 2, 0, 'g825876c', 'dirty'))
26 | 
27 | 
28 | try:
29 |     import pandas
30 |     if (_version_info(pandas) < (0, 13, 0)
31 |             or _version_info(pandas.np) < (1, 7, 1)):
32 |         raise ImportError
33 | except ImportError:
34 |     pandas = None
35 | 
36 | 
37 | ########################################################################
38 | # Test with DataFrame with no specified index (using default indexing).
39 | ########################################################################
40 | @unittest.skipUnless(pandas, 'requires pandas 0.13 or newer')
41 | class TestPandasSource(OtherTests, unittest.TestCase):
42 |     def setUp(self):
43 |         df = pandas.DataFrame(self.testdata, columns=self.fieldnames)
44 |         self.datasource = PandasSource(df)
45 | 
46 | 
47 | @unittest.skipUnless(pandas, 'requires pandas 0.13 or newer')
48 | class TestPandasSourceCount(CountTests, unittest.TestCase):
49 |     def setUp(self):
50 |         df = pandas.DataFrame(self.testdata, columns=self.fieldnames)
51 |         self.datasource = PandasSource(df)
52 | 
53 | 
54 | ########################################################################
55 | # Test with DataFrame that has a specified index.
56 | ########################################################################
57 | @unittest.skipUnless(pandas, 'requires pandas 0.13 or newer')
58 | class TestPandasSourceWithIndex(OtherTests, unittest.TestCase):
59 |     def setUp(self):
60 |         df = pandas.DataFrame(self.testdata, columns=self.fieldnames)
61 |         df = df.set_index(['label1', 'label2'])  # <- Specify index!
62 |         self.datasource = PandasSource(df)
63 | 
64 | 
65 | @unittest.skipUnless(pandas, 'requires pandas 0.13 or newer')
66 | class TestPandasSourceWithIndexCount(CountTests, unittest.TestCase):
67 |     def setUp(self):
68 |         df = pandas.DataFrame(self.testdata, columns=self.fieldnames)
69 |         df = df.set_index(['label1', 'label2'])  # <- Specify index!
70 |         self.datasource = PandasSource(df)
71 | 
72 |     def test_compound_keys(self):
73 |         expected = {
74 |             ('a', 'x'): 2,
75 |             ('a', 'y'): 1,
76 |             ('a', ''): 1,
77 |             ('b', 'z'): 1,
78 |             ('b', 'y'): 1,
79 |             ('b', 'x'): 1,
80 |             #('b', None): 1,
81 |             ('b', pandas.np.nan): 1,  # <- Returns nan instead of None (and that's OK!).
82 |             ('b', ''): 1,
83 |         }
84 |         result = self.datasource.count('label1', ['label1', 'label2'])
85 |         self.assertEqual(expected, result)
86 | 


--------------------------------------------------------------------------------
/docs/how-to/date-time-str.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. currentmodule:: datatest
 3 | 
 4 | .. meta::
 5 |     :description: How to validate date formats.
 6 |     :keywords: datatest, date format, validate, validation
 7 | 
 8 | 
 9 | #####################################
10 | How to Validate Date and Time Strings
11 | #####################################
12 | 
13 | To validate date and time formats, we can define a helper function that
14 | uses `strftime codes`_ to check for matching strings.
15 | 
16 | In the following example, we use the code ``%Y-%m-%d`` to check for
17 | dates that match the pattern YYYY-MM-DD:
18 | 
19 | .. code-block:: python
20 |     :emphasize-lines: 17
21 |     :linenos:
22 | 
23 |     from datetime import datetime
24 |     from datatest import validate
25 | 
26 | 
27 |     def strftime_format(format):
28 |         def func(value):
29 |             try:
30 |                 datetime.strptime(value, format)
31 |             except ValueError:
32 |                 return False
33 |             return True
34 |         func.__doc__ = f'should use date format {format}'
35 |         return func
36 | 
37 | 
38 |     data = ['2020-02-29', '03-17-2021', '2021-02-29', '2021-04-01']
39 |     validate(data, strftime_format('%Y-%m-%d'))
40 | 
41 | 
42 | Date strings that don't match the required format are flagged as
43 | :class:`Invalid`:
44 | 
45 | .. code-block:: none
46 | 
47 |     Traceback (most recent call last):
48 |       File "example.py", line 17, in <module>
49 |         validate(data, strftime_format('%Y-%m-%d'))
50 |     datatest.ValidationError: should use date format %Y-%m-%d (2 differences): [
51 |         Invalid('03-17-2021'),
52 |         Invalid('2021-02-29'),
53 |     ]
54 | 
55 | Above, the date ``03-17-2021`` is invalid because it's not well-formed
56 | and ``2021-02-29`` is invalid because 2021 is not a leap-year so the last
57 | day of February is the 28th---there is no February 29th in that calendar
58 | year.
59 | 
60 | 
61 | Strftime Codes for Common Formats
62 | =================================
63 | 
64 | You can use the following **format codes** with the function
65 | defined earlier to validate many common date and time formats
66 | (e.g., ``strftime_format('%d %B %Y')``):
67 | 
68 | ========================  =========================  ========================
69 | format codes              description                example
70 | ========================  =========================  ========================
71 | ``%Y-%m-%d``              YYYY-MM-DD                 2021-03-17
72 | ``%m/%d/%Y``              MM/DD/YYYY                 3/17/2021
73 | ``%d/%m/%Y``              DD/MM/YYYY                 17/03/2021
74 | ``%d.%m.%Y``              DD.MM.YYYY                 17.03.2021
75 | ``%d %B %Y``              DD Month YYYY              17 March 2021
76 | ``%b %d, %Y``             Mnth DD, YYYY              Mar 17, 2021
77 | ``%a %b %d %H:%M:%S %Y``  WkDay Mnth DD H:M:S YYYY   Wed Mar 17 19:42:50 2021
78 | ``%I:%M %p``              12-hour time               7:42 PM [1]_
79 | ``%H:%M:%S``              24-hour time with seconds  19:42:50
80 | ========================  =========================  ========================
81 | 
82 | In Python's :py:mod:`datetime` module, see `strftime() and strptime() Format Codes`_
83 | for all supported codes.
84 | 
85 | .. _`strftime codes`: https://docs.python.org/library/datetime.html#strftime-and-strptime-format-codes
86 | .. _`strftime() and strptime() Format Codes`: https://docs.python.org/library/datetime.html#strftime-and-strptime-format-codes
87 | 
88 | 
89 | .. rubric:: Footnotes
90 | 
91 | .. [1] The code ``%p`` expects the system locale's equivalent of AM or PM.
92 |        For example, the locale ``en_US`` uses "AM" and "PM" while the locale
93 |        ``de_DE`` uses "am" and "pm".
94 | 


--------------------------------------------------------------------------------
/tests/test_runner.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import sys
 3 | from . import _unittest as unittest
 4 | from datatest import DataTestCase
 5 | from datatest import ValidationError
 6 | from datatest import Missing
 7 | 
 8 | from datatest.runner import DataTestResult
 9 | from datatest.runner import mandatory
10 | from datatest.runner import _sort_key
11 | 
12 | 
13 | class TestDataTestResult(unittest.TestCase):
14 |     def test_is_mandatory(self):
15 |         testresult = DataTestResult()
16 | 
17 |         class _TestClass(DataTestCase):  # Dummy class.
18 |             def test_method(_self):
19 |                 pass
20 | 
21 |             def runTest(_self):
22 |                 pass
23 | 
24 |         # Not mandatory.
25 |         testcase = _TestClass()
26 |         self.assertFalse(testresult._is_mandatory(testcase))
27 | 
28 |         # Mandatory class.
29 |         testcase = _TestClass()
30 |         testcase.__datatest_mandatory__ = True
31 |         self.assertTrue(testresult._is_mandatory(testcase))
32 | 
33 |         # Mandatory method.
34 |         #TODO!!!: Need to make this test.
35 | 
36 |         # Check non-test-case behavior.
37 |         not_a_testcase = object()
38 |         self.assertFalse(testresult._is_mandatory(not_a_testcase))
39 | 
40 |     def test_add_mandatory_message(self):
41 |         testresult = DataTestResult()
42 | 
43 |         err_tuple = (ValidationError,
44 |                      ValidationError([Missing('x')], 'example failure'),
45 |                      '<dummy traceback>')
46 | 
47 |         new_tuple = testresult._add_mandatory_message(err_tuple)
48 |         _, err, _ = new_tuple
49 |         self.assertRegex(str(err), 'mandatory test failed, stopping early')
50 | 
51 | 
52 | class TestOrdering(unittest.TestCase):
53 |     def test_sort_key(self):
54 |         # Define and instantiate sample case.
55 |         class SampleCase(unittest.TestCase):
56 |             def test_reference(self):  # <- This line number used as reference.
57 |                 pass                                  # +1
58 |                                                       # +2
59 |             @unittest.skip('Testing skip behavior.')  # +3 (first check)
60 |             def test_skipped(self):                   # +4
61 |                 pass                                  # +5
62 |                                                       # +6
63 |             @mandatory                                # +7 (second check)
64 |             def test_mandatory(self):                 # +8
65 |                 pass                                  # +9
66 | 
67 |         # Get line number of undecorated method--this is uses as a
68 |         # reference point from which to determine the required line
69 |         # numbers for the decorated methods.
70 |         reference_case = SampleCase('test_reference')
71 |         _, reference_line_no = _sort_key(reference_case)
72 | 
73 |         # Starting in Python 3.3, the @functools.wraps() decorator
74 |         # added a greatly needed `__wrapped__` attribute that points
75 |         # to the original wrapped object. After @unittest.skip() is
76 |         # applied, this attribute is needed to get the line number
77 |         # of the original object (instead of the line number of the
78 |         # decorator).
79 |         if sys.version_info >= (3, 3):
80 |             # Test line number of skipped method.
81 |             skipped_case = SampleCase('test_skipped')
82 |             skipped_line_no = reference_line_no + 3
83 |             _, line_no = _sort_key(skipped_case)
84 |             self.assertEqual(skipped_line_no, line_no)
85 | 
86 |         # Test line number of mandatory method.
87 |         mandatory_case = SampleCase('test_mandatory')
88 |         mandatory_line_no = reference_line_no + 7
89 |         _, line_no = _sort_key(mandatory_case)
90 |         self.assertEqual(mandatory_line_no, line_no)
91 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/builtins.py:
--------------------------------------------------------------------------------
  1 | """compatibility layer for built-in functions"""
  2 | from __future__ import absolute_import
  3 | 
  4 | 
  5 | try:
  6 |     chr = unichr
  7 | except NameError:
  8 |     pass
  9 | 
 10 | 
 11 | from io import open as _open
 12 | if open == _open:  # Starting in 3.1
 13 |     open = open  # <- Declare in local namespace.
 14 | else:
 15 |     open = _open
 16 | 
 17 | 
 18 | try:
 19 |     callable = callable  # Removed from 3.0 and 3.1, added back in 3.2.
 20 | except NameError:
 21 |     def callable(obj):
 22 |         parent_types = type(obj).__mro__
 23 |         return any('__call__' in typ.__dict__ for typ in parent_types)
 24 | 
 25 | 
 26 | try:
 27 |     property.__isabstractmethod__  # New in 3.3.
 28 |     property = property
 29 | except AttributeError:
 30 |     _property = property
 31 |     class property(_property):
 32 |         def __init__(self, fget=None, fset=None, fdel=None, doc=None):
 33 |             super(property, self).__init__(fget, fset, fdel, doc)
 34 |             self.__isabstractmethod__ = getattr(
 35 |                 fget, '__isabstractmethod__', False,
 36 |             )
 37 | 
 38 | 
 39 | # In the move to Python 3.0, map, filter, zip were replaced with their
 40 | # iterable equivalents from the itertools module.
 41 | try:
 42 |     map.__iter__
 43 |     filter.__iter__
 44 |     zip.__iter__
 45 |     map = map
 46 |     filter = filter
 47 |     zip = zip
 48 | except AttributeError:
 49 |     from itertools import imap as map
 50 |     from itertools import ifilter as filter
 51 |     from itertools import izip as zip
 52 | 
 53 | 
 54 | try:
 55 |     max([0, 1], default=None)  # The default keyword for max()
 56 |     min([0, 1], default=None)  # and min() is new in 3.4.
 57 |     max = max
 58 |     min = min
 59 | except TypeError:
 60 |     from itertools import chain as _chain
 61 | 
 62 |     _max = max
 63 |     def max(*iterable, **kwds):
 64 |         """
 65 |         max(iterable, *[, default, key])
 66 |         max(arg1, arg2, *args, *[, key])
 67 |         """
 68 |         allowed_kwds = ('default', 'key')
 69 |         for key in kwds:
 70 |             if key not in allowed_kwds:
 71 |                 msg = "'{0}' is an invalid keyword argument for this function"
 72 |                 raise TypeError(msg.format(key))
 73 | 
 74 |         if len(iterable) == 1:
 75 |             iterable = iterable[0]
 76 | 
 77 |         try:
 78 |             first_item = next(iter(iterable))
 79 |             if iter(iterable) is iterable:
 80 |                 iterable = _chain([first_item], iterable)
 81 |         except StopIteration:
 82 |             if 'default' not in kwds:
 83 |                 raise ValueError('max() arg is an empty sequence')
 84 |             return kwds['default']
 85 | 
 86 |         if 'key' in kwds:
 87 |             return _max(iterable, key=kwds['key'])
 88 |         return _max(iterable)
 89 | 
 90 |     _min = min
 91 |     def min(*iterable, **kwds):
 92 |         """
 93 |         min(iterable, *[, default, key])
 94 |         min(arg1, arg2, *args, *[, key])
 95 |         """
 96 |         allowed_kwds = ('default', 'key')
 97 |         for key in kwds:
 98 |             if key not in allowed_kwds:
 99 |                 msg = "'{0}' is an invalid keyword argument for this function"
100 |                 raise TypeError(msg.format(key))
101 | 
102 |         if len(iterable) == 1:
103 |             iterable = iterable[0]
104 | 
105 |         try:
106 |             first_item = next(iter(iterable))
107 |             if iter(iterable) is iterable:
108 |                 iterable = _chain([first_item], iterable)
109 |         except StopIteration:
110 |             if 'default' not in kwds:
111 |                 raise ValueError('min() arg is an empty sequence')
112 |             return kwds['default']
113 | 
114 |         if 'key' in kwds:
115 |             return _min(iterable, key=kwds['key'])
116 |         return _min(iterable)
117 | 


--------------------------------------------------------------------------------
/datatest/main.py:
--------------------------------------------------------------------------------
 1 | """Datatest main program"""
 2 | 
 3 | import sys as _sys
 4 | from unittest import TestProgram as _TestProgram
 5 | from unittest import defaultTestLoader as _defaultTestLoader
 6 | try:
 7 |     from unittest.signals import installHandler
 8 | except ImportError:
 9 |     installHandler = None
10 | 
11 | from datatest import DataTestRunner
12 | 
13 | __unittest = True
14 | __datatest = True
15 | 
16 | 
17 | class DataTestProgram(_TestProgram):
18 |     def __init__(self, module='__main__', defaultTest=None, argv=None,
19 |                    testRunner=DataTestRunner, testLoader=_defaultTestLoader,
20 |                    exit=True, verbosity=1, failfast=None, catchbreak=None,
21 |                    buffer=None, ignore=False):
22 |         self.ignore = ignore
23 |         _TestProgram.__init__(self,
24 |                               module=module,
25 |                               defaultTest=defaultTest,
26 |                               argv=argv,
27 |                               testRunner=testRunner,
28 |                               testLoader=testLoader,
29 |                               exit=exit,
30 |                               verbosity=verbosity,
31 |                               failfast=failfast,
32 |                               catchbreak=catchbreak,
33 |                               buffer=buffer)
34 | 
35 |     def runTests(self):
36 |         try:
37 |             if self.catchbreak and installHandler:
38 |                 installHandler()
39 |         except AttributeError:
40 |             pass  # does not have catchbreak attribute
41 | 
42 |         if self.testRunner is None:
43 |             self.testRunner = DataTestRunner
44 | 
45 |         if isinstance(self.testRunner, type):
46 |             try:
47 |                 kwds = ['verbosity', 'failfast', 'buffer', 'warnings', 'ignore']
48 |                 kwds = [attr for attr in kwds if hasattr(self, attr)]
49 |                 kwds = dict((attr, getattr(self, attr)) for attr in kwds)
50 |                 testRunner = self.testRunner(**kwds)
51 |             except TypeError:
52 |                 if 'warnings' in kwds:
53 |                     del kwds['warnings']
54 |                 testRunner = self.testRunner(**kwds)
55 |         else:
56 |             # assumed to be a TestRunner instance
57 |             testRunner = self.testRunner
58 | 
59 |         self.result = testRunner.run(self.test)
60 |         if self.exit:
61 |             _sys.exit(not self.result.wasSuccessful())
62 | 
63 | 
64 | if _sys.version_info[:2] == (3, 1):  # Patch methods for Python 3.1.
65 |     def __init__(self, module='__main__', defaultTest=None, argv=None,
66 |                    testRunner=DataTestRunner, testLoader=_defaultTestLoader,
67 |                    exit=True, ignore=False):
68 |         self.ignore = ignore
69 |         _TestProgram.__init__(self,
70 |                               module=module,
71 |                               defaultTest=defaultTest,
72 |                               argv=argv,
73 |                               testRunner=testRunner,
74 |                               testLoader=testLoader,
75 |                               exit=exit)
76 |     DataTestProgram.__init__ = __init__
77 | 
78 | elif _sys.version_info[:2] == (2, 6):  # Patch runTests() for Python 2.6.
79 |     def __init__(self, module='__main__', defaultTest=None, argv=None,
80 |                    testRunner=DataTestRunner, testLoader=_defaultTestLoader,
81 |                    exit=True, ignore=False):
82 |         self.exit = exit  # <- 2.6 does not handle exit argument.
83 |         self.ignore = ignore
84 |         _TestProgram.__init__(self,
85 |                               module=module,
86 |                               defaultTest=defaultTest,
87 |                               argv=argv,
88 |                               testRunner=testRunner,
89 |                               testLoader=testLoader)
90 |     DataTestProgram.__init__ = __init__
91 | 
92 | 
93 | main = DataTestProgram
94 | 


--------------------------------------------------------------------------------
/docs/how-to/fuzzy-matching.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. currentmodule:: datatest
  3 | 
  4 | .. meta::
  5 |     :description: How to assert fuzzy matches.
  6 |     :keywords: approximate string, fuzzy matching, testing, datatest
  7 | 
  8 | 
  9 | #############################
 10 | How to Validate Fuzzy Matches
 11 | #############################
 12 | 
 13 | When comparing strings of text, it can sometimes be useful
 14 | to check that values are similar instead of asserting that
 15 | they are exactly the same. Datatest provides options for
 16 | *approximate string matching* (also called "fuzzy
 17 | matching").
 18 | 
 19 | When checking mappings or sequences of values, you can accept
 20 | approximate matches with the :meth:`accepted.fuzzy` acceptance:
 21 | 
 22 | .. tabs::
 23 | 
 24 |     .. tab:: Using Acceptance
 25 | 
 26 |         .. code-block:: python
 27 |             :emphasize-lines: 19
 28 |             :linenos:
 29 | 
 30 |             from datatest import validate, accepted
 31 | 
 32 |             linked_record = {
 33 |                 'id165': 'Saint Louis',
 34 |                 'id382': 'Raliegh',
 35 |                 'id592': 'Austin',
 36 |                 'id720': 'Cincinatti',
 37 |                 'id826': 'Philadelphia',
 38 |             }
 39 | 
 40 |             master_record = {
 41 |                 'id165': 'St. Louis',
 42 |                 'id382': 'Raleigh',
 43 |                 'id592': 'Austin',
 44 |                 'id720': 'Cincinnati',
 45 |                 'id826': 'Philadelphia',
 46 |             }
 47 | 
 48 |             with accepted.fuzzy(cutoff=0.6):
 49 |                 validate(linked_record, master_record)
 50 | 
 51 |     .. tab:: No Acceptance
 52 | 
 53 |         .. code-block:: python
 54 |             :linenos:
 55 | 
 56 |             from datatest import validate
 57 | 
 58 |             linked_record = {
 59 |                 'id165': 'Saint Louis',
 60 |                 'id382': 'Raliegh',
 61 |                 'id592': 'Austin',
 62 |                 'id720': 'Cincinatti',
 63 |                 'id826': 'Philadelphia',
 64 |             }
 65 | 
 66 |             master_record = {
 67 |                 'id165': 'St. Louis',
 68 |                 'id382': 'Raleigh',
 69 |                 'id592': 'Austin',
 70 |                 'id720': 'Cincinnati',
 71 |                 'id826': 'Philadelphia',
 72 |             }
 73 | 
 74 |             validate(linked_record, master_record)
 75 | 
 76 | 
 77 |         .. code-block:: none
 78 |             :emphasize-lines: 5-7
 79 | 
 80 |             Traceback (most recent call last):
 81 |               File "example.py", line 19, in <module>
 82 |                 validate(linked_record, master_record)
 83 |             datatest.ValidationError: does not satisfy mapping requirements (3 differences): {
 84 |                 'id165': Invalid('Saint Louis', expected='St. Louis'),
 85 |                 'id382': Invalid('Raliegh', expected='Raleigh'),
 86 |                 'id720': Invalid('Cincinatti', expected='Cincinnati'),
 87 |             }
 88 | 
 89 | 
 90 | If variation is an inherent, natural feature of the data and
 91 | does not necessarily represent a defect, it may be appropriate
 92 | to use :meth:`validate.fuzzy` instead of the acceptance shown
 93 | previously:
 94 | 
 95 | .. code-block:: python
 96 |     :emphasize-lines: 19
 97 |     :linenos:
 98 | 
 99 |     from datatest import validate
100 | 
101 |     linked_record = {
102 |         'id165': 'Saint Louis',
103 |         'id382': 'Raliegh',
104 |         'id592': 'Austin',
105 |         'id720': 'Cincinatti',
106 |         'id826': 'Philadelphia',
107 |     }
108 | 
109 |     master_record = {
110 |         'id165': 'St. Louis',
111 |         'id382': 'Raleigh',
112 |         'id592': 'Austin',
113 |         'id720': 'Cincinnati',
114 |         'id826': 'Philadelphia',
115 |     }
116 | 
117 |     validate.fuzzy(linked_record, master_record, cutoff=0.6)
118 | 
119 | 
120 | That said, it's probably more appropriate to use an acceptance
121 | for this specific example.
122 | 
123 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/functools.py:
--------------------------------------------------------------------------------
 1 | """compatibility layer for functools (Python standard library)"""
 2 | from __future__ import absolute_import
 3 | from functools import *
 4 | from sys import version_info as _version_info
 5 | 
 6 | 
 7 | if _version_info[:2] <= (2, 7):  # For version 2.7 and earlier.
 8 | 
 9 |     def update_wrapper(wrapper,
10 |                        wrapped,
11 |                        assigned=WRAPPER_ASSIGNMENTS,
12 |                        updated=WRAPPER_UPDATES):
13 |         for attr in assigned:
14 |             try:                                # <- This try/except
15 |                 value = getattr(wrapped, attr)  #    fixes issue #3445
16 |             except AttributeError:              #    in Python 2.7 and
17 |                 pass                            #    earlier.
18 |             else:
19 |                 setattr(wrapper, attr, value)
20 |         for attr in updated:
21 |             getattr(wrapper, attr).update(getattr(wrapped, attr, {}))
22 |         wrapper.__wrapped__ = wrapped
23 |         return wrapper
24 | 
25 | 
26 |     def wraps(wrapped,
27 |               assigned=WRAPPER_ASSIGNMENTS,
28 |               updated=WRAPPER_UPDATES):
29 |         return partial(update_wrapper,  # <- Patched update_wrapper().
30 |                        wrapped=wrapped,
31 |                        assigned=assigned,
32 |                        updated=updated)
33 | 
34 | 
35 | try:
36 |     partialmethod  # New in version 3.4.
37 | except NameError:
38 |     # Adapted from the Python 3.6 Standard Library.
39 |     class partialmethod(object):
40 |         def __init__(self, func, *args, **keywords):
41 |             if not callable(func) and not hasattr(func, "__get__"):
42 |                 raise TypeError("{!r} is not callable or a descriptor"
43 |                                      .format(func))
44 | 
45 |             if isinstance(func, partialmethod):
46 |                 self.func = func.func
47 |                 self.args = func.args + args
48 |                 self.keywords = func.keywords.copy()
49 |                 self.keywords.update(keywords)
50 |             else:
51 |                 self.func = func
52 |                 self.args = args
53 |                 self.keywords = keywords
54 | 
55 |         def __repr__(self):
56 |             args = ", ".join(map(repr, self.args))
57 |             keywords = ", ".join("{}={!r}".format(k, v)
58 |                                      for k, v in self.keywords.items())
59 |             format_string = "{module}.{cls}({func}, {args}, {keywords})"
60 |             return format_string.format(module=self.__class__.__module__,
61 |                                         cls=self.__class__.__qualname__,
62 |                                         func=self.func,
63 |                                         args=args,
64 |                                         keywords=keywords)
65 | 
66 |         def _make_unbound_method(self):
67 |             def _method(*args, **keywords):
68 |                 call_keywords = self.keywords.copy()
69 |                 call_keywords.update(keywords)
70 |                 #cls_or_self, *rest = args
71 |                 cls_or_self, rest = args[0], args[1:]
72 |                 call_args = (cls_or_self,) + self.args + tuple(rest)
73 |                 return self.func(*call_args, **call_keywords)
74 |             _method.__isabstractmethod__ = self.__isabstractmethod__
75 |             _method._partialmethod = self
76 |             return _method
77 | 
78 |         def __get__(self, obj, cls):
79 |             get = getattr(self.func, "__get__", None)
80 |             result = None
81 |             if get is not None:
82 |                 new_func = get(obj, cls)
83 |                 if new_func is not self.func:
84 |                     result = partial(new_func, *self.args, **self.keywords)
85 |                     try:
86 |                         result.__self__ = new_func.__self__
87 |                     except AttributeError:
88 |                         pass
89 |             if result is None:
90 |                 result = self._make_unbound_method().__get__(obj, cls)
91 |             return result
92 | 
93 |         @property
94 |         def __isabstractmethod__(self):
95 |             return getattr(self.func, "__isabstractmethod__", False)
96 | 


--------------------------------------------------------------------------------
/release-checklist.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Release Checklist
  3 | =================
  4 | 
  5 | #. Make sure correct version number is set in the following files
  6 |    (remove the ".devN" suffix):
  7 | 
  8 |    * ``datatest/__init__.py``
  9 |    * ``docs/conf.py``
 10 | 
 11 | #. Make sure the *description* argument in ``setup.py`` matches the project
 12 |    description on GitHub (in the "About" section).
 13 | 
 14 | #. In the call to ``setup()``, check the versions defined by the
 15 |    *python_requires* argument (see the "Version specifiers" section of
 16 |    PEP-440 for details).
 17 | 
 18 | #. In the call to ``setup()``, check the trove classifiers in the
 19 |    *classifiers* argument (see https://pypi.org/classifiers/ for values).
 20 | 
 21 | #. Check that *packages* argument of ``setup()`` is correct. Check that the
 22 |    value matches what ``setuptools.find_packages()`` returns:
 23 | 
 24 |    .. code-block:: python
 25 | 
 26 |         >>> import setuptools
 27 |         >>> sorted(setuptools.find_packages('.', exclude=['tests']))
 28 | 
 29 |    Defining this list explicitly (rather than using ``find_packages()``
 30 |    directly in ``setup.py`` file) is needed when installing on systems
 31 |    where ``setuptools`` is not available.
 32 | 
 33 | #. Make sure ``__past__`` sub-package includes a stub module for the
 34 |    current API version.
 35 | 
 36 | #. Update ``README.rst`` (including "Backward Compatibility" section).
 37 | 
 38 | #. Make final edits to ``CHANGELOG`` (doublecheck release date and version).
 39 | 
 40 | #. Commit and push final changes to upstream repository:
 41 | 
 42 |         Prepare version info, README, and CHANGELOG for version N.N.N release.
 43 | 
 44 | #. Perform final checks to make sure there are no CI test failures.
 45 | 
 46 | #. Make sure the packaging tools are up-to-date:
 47 | 
 48 |    .. code-block:: console
 49 | 
 50 |         pip install -U twine wheel setuptools check-manifest
 51 | 
 52 | #. Check the manifest against the project's root folder:
 53 | 
 54 |    .. code-block:: console
 55 | 
 56 |         check-manifest .
 57 | 
 58 | #. Remove all existing files in the ``dist/`` folder.
 59 | 
 60 | #. Build new distributions:
 61 | 
 62 |    .. code-block:: console
 63 | 
 64 |         python setup.py sdist bdist_wheel
 65 | 
 66 | #. Upload distributions to TestPyPI:
 67 | 
 68 |    .. code-block:: console
 69 | 
 70 |         twine upload --repository testpypi dist/*
 71 | 
 72 | #. View the package's web page on TestPyPI and verify that the information
 73 |    is correct for the "Project links" and "Meta" sections:
 74 | 
 75 |    * https://test.pypi.org/project/datatest
 76 | 
 77 |    If you are testing a pre-release version, make sure to use the URL returned
 78 |    by twine in the previous step (the default URL shows the latest *stable*
 79 |    version).
 80 | 
 81 | #. Test the installation process from TestPyPI:
 82 | 
 83 |    .. code-block:: console
 84 | 
 85 |         python -m pip install --index-url https://test.pypi.org/simple/ datatest
 86 | 
 87 |    If you're testing a pre-release version, make sure to use the "pip install"
 88 |    command listed at the top of the project's TestPyPI page.
 89 | 
 90 | #. Upload source and wheel distributions to PyPI:
 91 | 
 92 |    .. code-block:: console
 93 | 
 94 |         twine upload dist/*
 95 | 
 96 | #. Double check PyPI project page and test installation from PyPI:
 97 | 
 98 |    .. code-block:: console
 99 | 
100 |         python -m pip install datatest
101 | 
102 | #. Add version tag to upstream repository (also used by readthedocs.org).
103 | 
104 | #. Iterate the version number in the development repository to the next
105 |    anticipated release and add a "dev" suffix (e.g., N.N.N.dev1). This
106 |    version number should conform to the "Version scheme" section of PEP-440.
107 |    Make sure these changes are reflected in the following files:
108 | 
109 |    * ``datatest/__init__.py``
110 |    * ``docs/conf.py``
111 | 
112 |    Commit these changes with a comment like the one below:
113 | 
114 |         Iterate version number to the next anticipated release.
115 | 
116 |    This is done so that installations made directly from the development
117 |    repository and the "latest" docs are not confused with the just-published
118 |    "stable" versions.
119 | 
120 | #. Make sure the documentation reflects the new versions:
121 | 
122 |    * https://datatest.readthedocs.io/ (stable)
123 |    * https://datatest.readthedocs.io/en/latest/ (latest)
124 | 
125 |    If the documentation was not automatically updated, you may need to
126 |    login to https://readthedocs.org/ and start the build process manually.
127 | 
128 | #. Publish update announcement to relevant mailing lists:
129 | 
130 |    * python-announce-list@python.org
131 |    * testing-in-python@lists.idyll.org
132 | 


--------------------------------------------------------------------------------
/tests/test_utils_misc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import
  3 | from datetime import timedelta
  4 | from . import _unittest as unittest
  5 | from datatest import _utils
  6 | from datatest._utils import IterItems
  7 | from datatest._utils import pretty_timedelta_repr
  8 | 
  9 | 
 10 | class TestIterItems(unittest.TestCase):
 11 |     def test_type_error(self):
 12 |         regex = "expected iterable or mapping, got 'int'"
 13 |         with self.assertRaisesRegex(TypeError, regex):
 14 |             IterItems(123)
 15 | 
 16 |     def test_non_exhaustible(self):
 17 |         items_list = [('a', 1), ('b', 2)]  # <- Non-exhaustible input.
 18 | 
 19 |         items = IterItems(items_list)
 20 |         self.assertIs(iter(items), iter(items), msg='exhaustible output')
 21 |         self.assertEqual(list(items), items_list)
 22 |         self.assertEqual(list(items), [], msg='already exhausted')
 23 | 
 24 |     def test_exhaustible(self):
 25 |         items_iter = iter([('a', 1), ('b', 2)])  # <- Exhaustible iterator.
 26 | 
 27 |         items = IterItems(items_iter)
 28 |         self.assertIs(iter(items), iter(items))
 29 |         self.assertEqual(list(items), [('a', 1), ('b', 2)])
 30 |         self.assertEqual(list(items), [], msg='already exhausted')
 31 | 
 32 |     def test_dict(self):
 33 |         mapping = {'a': 1, 'b': 2}
 34 | 
 35 |         items = IterItems(mapping)
 36 |         self.assertEqual(set(items), set([('a', 1), ('b', 2)]))
 37 |         self.assertEqual(set(items), set(), msg='already exhausted')
 38 | 
 39 |     def test_dictitems(self):
 40 |         dic = {'a': 1}
 41 | 
 42 |         if hasattr(dic, 'iteritems'):  # <- Python 2
 43 |             dic_items = dic.iteritems()
 44 | 
 45 |             items = IterItems(dic_items)
 46 |             self.assertEqual(list(items), [('a', 1)])
 47 |             self.assertEqual(list(items), [], msg='already exhausted')
 48 | 
 49 |         dic_items = dic.items()
 50 | 
 51 |         items = IterItems(dic_items)
 52 |         self.assertEqual(list(items), [('a', 1)])
 53 |         self.assertEqual(list(items), [], msg='already exhausted')
 54 | 
 55 |     def test_empty_iterable(self):
 56 |         empty = iter([])
 57 | 
 58 |         items = IterItems(empty)
 59 |         self.assertEqual(list(items), [])
 60 | 
 61 |     def test_repr(self):
 62 |         items = IterItems([1, 2])
 63 | 
 64 |         repr_part = repr(iter([])).partition(' ')[0]
 65 |         repr_start = 'IterItems({0}'.format(repr_part)
 66 |         self.assertTrue(repr(items).startswith(repr_start))
 67 | 
 68 |         generator = (x for x in [1, 2])
 69 |         items = IterItems(generator)
 70 |         self.assertEqual(repr(items), 'IterItems({0!r})'.format(generator))
 71 | 
 72 |     def test_subclasshook(self):
 73 |         items = IterItems(iter([]))
 74 |         self.assertIsInstance(items, IterItems)
 75 | 
 76 |         try:
 77 |             items = dict([]).iteritems()  # <- For Python 2
 78 |         except AttributeError:
 79 |             items = dict([]).items()  # <- For Python 3
 80 |         self.assertIsInstance(items, IterItems)
 81 | 
 82 |         items = enumerate([])
 83 |         self.assertIsInstance(items, IterItems)
 84 | 
 85 |     def test_virtual_subclass(self):
 86 |         class OtherClass(object):
 87 |             pass
 88 | 
 89 |         oth_cls = OtherClass()
 90 | 
 91 |         IterItems.register(OtherClass)  # <- Register virtual subclass.
 92 |         self.assertIsInstance(oth_cls, IterItems)
 93 | 
 94 | 
 95 | class TestMakeSentinel(unittest.TestCase):
 96 |     def test_basic(self):
 97 |         sentinel = _utils._make_token(
 98 |             'TheName', '<the repr>', 'The docstring.'
 99 |         )
100 |         self.assertEqual(sentinel.__class__.__name__, 'TheName')
101 |         self.assertEqual(repr(sentinel), '<the repr>')
102 |         self.assertEqual(sentinel.__doc__, 'The docstring.')
103 |         self.assertTrue(bool(sentinel))
104 | 
105 |     def test_falsy(self):
106 |         token = _utils._make_token(
107 |             'TheName', '<the repr>', 'The docstring.', truthy=False
108 |         )
109 |         self.assertFalse(bool(token))
110 | 
111 | 
112 | class TestPrettyTimedeltaRepr(unittest.TestCase):
113 |     def test_already_normalized_units(self):
114 |         delta = timedelta(days=6, seconds=27, microseconds=100)
115 | 
116 |         actual = pretty_timedelta_repr(delta)
117 |         expected = 'timedelta(days=+6, seconds=+27, microseconds=+100)'
118 |         self.assertEqual(actual, expected)
119 | 
120 |     def test_negative_delta(self):
121 |         delta = timedelta(seconds=-2)  # The built-in repr for this timedelta
122 |                                        # is: timedelta(days=-1, seconds=86398)
123 | 
124 |         actual = pretty_timedelta_repr(delta)
125 |         expected = 'timedelta(seconds=-2)'
126 |         self.assertEqual(actual, expected)
127 | 


--------------------------------------------------------------------------------
/datatest/__past__/api06.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Backwards compatibility for version 0.6.0.dev1 API."""
  3 | from __future__ import absolute_import
  4 | import inspect
  5 | import datatest
  6 | from datatest.__past__ import api08
  7 | from datatest.__past__ import api07
  8 | from datatest._compatibility import itertools
  9 | from datatest import DataTestCase
 10 | 
 11 | 
 12 | DataTestCase.subjectData = property(fget=DataTestCase.subject.fget,
 13 |                                     fset=DataTestCase.subject.fset)
 14 | DataTestCase.referenceData = property(fget=DataTestCase.reference.fget,
 15 |                                       fset=DataTestCase.reference.fset)
 16 | DataTestCase.assertDataColumns = DataTestCase.assertSubjectColumns
 17 | DataTestCase.assertDataSet = DataTestCase.assertSubjectSet
 18 | DataTestCase.assertDataSum = DataTestCase.assertSubjectSum
 19 | DataTestCase.assertDataRegex = DataTestCase.assertSubjectRegex
 20 | DataTestCase.assertDataNotRegex = DataTestCase.assertSubjectNotRegex
 21 | datatest.DataAssertionError = datatest.__past__.api07_error.DataError
 22 | 
 23 | 
 24 | _wrapped_find_data_source = DataTestCase._find_data_source
 25 | @staticmethod
 26 | def _find_data_source(name):
 27 |     if name in ('subject', 'subjectData'):
 28 |         stack = inspect.stack()
 29 |         stack.pop()  # Skip record of current frame.
 30 |         for record in stack:
 31 |             frame = record[0]
 32 |             if 'subject' in frame.f_globals:
 33 |                 return frame.f_globals['subject']  # <- EXIT!
 34 |             if 'subjectData' in frame.f_globals:
 35 |                 return frame.f_globals['subjectData']  # <- EXIT!
 36 |         raise NameError('cannot find {0!r}'.format(name))
 37 |     elif name in ('reference', 'referenceData'):
 38 |         stack = inspect.stack()
 39 |         stack.pop()  # Skip record of current frame.
 40 |         for record in stack:
 41 |             frame = record[0]
 42 |             if 'reference' in frame.f_globals:
 43 |                 return frame.f_globals['reference']  # <- EXIT!
 44 |             if 'referenceData' in frame.f_globals:
 45 |                 return frame.f_globals['referenceData']  # <- EXIT!
 46 |         raise NameError('cannot find {0!r}'.format(name))
 47 |     return _wrapped_find_data_source(name)
 48 | DataTestCase._find_data_source = _find_data_source
 49 | 
 50 | 
 51 | def _normalize_required(self, required, method, *args, **kwds):
 52 |     if required == None:
 53 |         required = self.referenceData  # <- OLD NAME!
 54 |     if isinstance(required, datatest.BaseSource):
 55 |         fn = getattr(required, method)
 56 |         required = fn(*args, **kwds)
 57 |     return required
 58 | DataTestCase._normalize_required = _normalize_required
 59 | 
 60 | 
 61 | # This method was removed entirely.
 62 | def _assertDataCount(self, column, keys, required=None, msg=None, **kwds_filter):
 63 |     subject_dict = self.subject.count(column, keys, **kwds_filter)
 64 |     required = self._normalize_required(required, 'sum', column, keys, **kwds_filter)
 65 |     msg = msg or 'row counts different than {0!r} sums'.format(column)
 66 |     self.assertEqual(subject_dict, required, msg)
 67 | DataTestCase.assertDataCount = _assertDataCount
 68 | 
 69 | 
 70 | # Function signature and behavior was changed.
 71 | def _allowAny(self, number=None, msg=None, **kwds_filter):
 72 |     if number:
 73 |         return datatest.allow_limit(number, msg, **kwds_filter)
 74 |     return datatest.allow_any(msg, **kwds_filter)
 75 | DataTestCase.allowAny = _allowAny
 76 | 
 77 | 
 78 | # Function signature and behavior was changed.
 79 | def _allowMissing(self, number=None, msg=None):
 80 |     def function(iterable):
 81 |         t1, t2 = itertools.tee(iterable)
 82 |         not_allowed = []
 83 |         count = 0
 84 |         for x in t1:
 85 |             if not isinstance(x, datatest.Missing):
 86 |                 not_allowed.append(x)
 87 |             else:
 88 |                 count += 1
 89 |             if number and count > number:
 90 |                 return t2  # <- EXIT! Exceeds limit, return all.
 91 |         return not_allowed
 92 |     return datatest.allow_iter(function, msg)
 93 | DataTestCase.allowMissing = _allowMissing
 94 | 
 95 | 
 96 | # Function signature and behavior was changed.
 97 | def _allowExtra(self, number=None, msg=None):
 98 |     def function(iterable):
 99 |         t1, t2 = itertools.tee(iterable)
100 |         not_allowed = []
101 |         count = 0
102 |         for x in t1:
103 |             if not isinstance(x, datatest.Extra):
104 |                 not_allowed.append(x)
105 |             else:
106 |                 count += 1
107 |             if number and count > number:
108 |                 return t2  # <- EXIT! Exceeds limit, return all.
109 |         return not_allowed
110 |     return datatest.allow_iter(function, msg)
111 | DataTestCase.allowExtra = _allowExtra
112 | 


--------------------------------------------------------------------------------
/docs/how-to/sequences.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. py:currentmodule:: datatest
  3 | 
  4 | .. meta::
  5 |     :description: How to validate sequences.
  6 |     :keywords: datatest, sequences, order
  7 | 
  8 | 
  9 | #########################
 10 | How to Validate Sequences
 11 | #########################
 12 | 
 13 | 
 14 | Index Position
 15 | ==============
 16 | 
 17 | To check for a specific sequence, you can pass a list [1]_ as the
 18 | *requirement* argument:
 19 | 
 20 | .. code-block:: python
 21 |     :emphasize-lines: 4
 22 |     :linenos:
 23 | 
 24 |     from datatest import validate
 25 | 
 26 |     data = ['A', 'B', 'X', 'C', 'D']
 27 |     requirement = ['A', 'B', 'C', 'D']  # <- a list
 28 |     validate(data, requirement)
 29 | 
 30 | 
 31 | Elements in the *data* and *requirement* lists are compared by
 32 | sequence position. The items at index position 0 are compared to
 33 | each other, then items at index position 1 are compared to each
 34 | other, and so on:
 35 | 
 36 | .. math::
 37 | 
 38 |     \begin{array}{cccc}
 39 |     \hline
 40 |     \textbf{index} & \textbf{data} & \textbf{requirement} & \textbf{result} \\
 41 |     \hline
 42 |     0 & \textbf{A} & \textbf{A} & \textrm{matches} \\
 43 |     1 & \textbf{B} & \textbf{B} & \textrm{matches} \\
 44 |     2 & \textbf{X} & \textbf{C} & \textrm{doesn't match} \\
 45 |     3 & \textbf{C} & \textbf{D} & \textrm{doesn't match} \\
 46 |     4 & \textbf{D} & no\;value & \textrm{doesn't match} \\
 47 |     \hline
 48 |     \end{array}
 49 | 
 50 | 
 51 | In this example, there are three differences:
 52 | 
 53 | .. code-block:: none
 54 | 
 55 |     ValidationError: does not match required sequence (3 differences): [
 56 |         Invalid('X', expected='C'),
 57 |         Invalid('C', expected='D'),
 58 |         Extra('D'),
 59 |     ]
 60 | 
 61 | 
 62 | Using enumerate()
 63 | -----------------
 64 | 
 65 | While the previous example works well for short lists, the error
 66 | does not describe **where** in your sequence the differences occur.
 67 | To get the index positions associated with any differences, you
 68 | can :py:func:`enumerate` your *data* and *requirement* objects:
 69 | 
 70 | .. code-block:: python
 71 |     :emphasize-lines: 5
 72 |     :linenos:
 73 | 
 74 |     from datatest import validate
 75 | 
 76 |     data = ['A', 'B', 'X', 'C', 'D']
 77 |     requirement = ['A', 'B', 'C', 'D']
 78 |     validate(enumerate(data), enumerate(requirement))
 79 | 
 80 | 
 81 | A required **enumerate object** is treated as a mapping. The keys
 82 | for any differences will correspond to their index positions:
 83 | 
 84 | .. code-block:: none
 85 | 
 86 |     ValidationError: does not satisfy mapping requirements (3 differences): {
 87 |         2: Invalid('X', expected='C'),
 88 |         3: Invalid('C', expected='D'),
 89 |         4: Extra('D'),
 90 |     }
 91 | 
 92 | 
 93 | Relative Order
 94 | ==============
 95 | 
 96 | When comparing elements by sequence position, one mis-alignment can
 97 | create differences for all following elements. If this behavior is
 98 | not desireable, you may want to check for *relative order* instead.
 99 | 
100 | If you want to check the relative order of elements rather than
101 | their index positions, you can use :meth:`validate.order`:
102 | 
103 | .. code-block:: python
104 |     :emphasize-lines: 5
105 |     :linenos:
106 | 
107 |     from datatest import validate
108 | 
109 |     data = ['A', 'B', 'X', 'C', 'D']
110 |     requirement = ['A', 'B', 'C', 'D']
111 |     validate.order(data, requirement)
112 | 
113 | 
114 | When checking for relative order, this method tries to align
115 | elements into contiguous matching subsequences. This reduces
116 | the number of non-matches:
117 | 
118 | .. math::
119 | 
120 |     \begin{array}{cccc}
121 |     \hline
122 |     \textbf{index} & \textbf{data} & \textbf{requirement} & \textbf{result} \\
123 |     \hline
124 |     0 & \textbf{A} & \textbf{A} & \textrm{matches} \\
125 |     1 & \textbf{B} & \textbf{B} & \textrm{matches} \\
126 |     2 & \textbf{X} & no\;value & \textrm{doesn't match} \\
127 |     3 & \textbf{C} & \textbf{C} & \textrm{matches} \\
128 |     4 & \textbf{D} & \textbf{D} & \textrm{matches} \\
129 |     \hline
130 |     \end{array}
131 | 
132 | Differences are reported as two-tuples containing the index (in *data*)
133 | where the difference occurs and the non-matching value. In the earlier
134 | examples, we saw that validating by index position produced three
135 | differences. But in this example, validating the same sequences by
136 | relative order produces only one difference:
137 | 
138 | .. code-block:: none
139 | 
140 |     ValidationError: does not match required order (1 difference): [
141 |          Extra((2, 'X')),
142 |     ]
143 | 
144 | 
145 | .. rubric:: Footnotes
146 | 
147 | .. [1] The validate() function will check *data* by index position when the
148 |        *requirement* is any iterable object other than a set, mapping, tuple
149 |        or string. See the :ref:`Sequence Validation <sequence-validation>`
150 |        section of the :func:`validate` documentation for full details.
151 | 


--------------------------------------------------------------------------------
/tests/past_api00.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Test backwards compatibility with pre-release API.
  3 | 
  4 | .. note:: Because this sub-module works by monkey-patching the global
  5 |           ``datatest`` package, these tests should be run in a separate
  6 |           process.
  7 | """
  8 | from . import _unittest as unittest
  9 | 
 10 | import datatest
 11 | from datatest.__past__ import api00  # <- MONKEY PATCH!!!
 12 | 
 13 | DataTestCase = datatest.DataTestCase
 14 | from datatest.__past__.api07_error import DataError
 15 | from datatest.__past__.api07_sources import MinimalSource
 16 | 
 17 | 
 18 | class TestAttributes(unittest.TestCase):
 19 |     def test_api_dev0(self):
 20 |         # Error class.
 21 |         self.assertTrue(hasattr(datatest, 'DataAssertionError'))
 22 | 
 23 |         # Data source properties.
 24 |         self.assertTrue(hasattr(datatest.DataTestCase, 'subjectData'))
 25 |         self.assertTrue(hasattr(datatest.DataTestCase, 'referenceData'))
 26 | 
 27 |         # Acceptance context managers.
 28 |         self.assertTrue(hasattr(datatest.DataTestCase, 'allowSpecified'))
 29 |         self.assertTrue(hasattr(datatest.DataTestCase, 'allowUnspecified'))
 30 |         self.assertTrue(hasattr(datatest.DataTestCase, 'allowDeviationPercent'))
 31 | 
 32 |         # Assert methods.
 33 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertColumnSet'))
 34 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertColumnSubset'))
 35 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertColumnSuperset'))
 36 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueSet'))
 37 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueSubset'))
 38 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueSuperset'))
 39 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueSum'))
 40 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueCount'))
 41 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueRegex'))
 42 |         self.assertTrue(hasattr(datatest.DataTestCase, 'assertValueNotRegex'))
 43 | 
 44 | 
 45 | class TestColumnSubset(datatest.DataTestCase):
 46 |     def setUp(self):
 47 |         self.subjectData = MinimalSource(data=[['a', '65'], ['b', '70']],
 48 |                                          fieldnames=['label1', 'value'])
 49 | 
 50 |     def test_is_same(self):
 51 |         self.assertColumnSubset(ref=['label1', 'value'])  # Should pass without error.
 52 | 
 53 |     def test_is_subset(self):
 54 |         self.assertColumnSubset(ref=['label1', 'label2', 'value'])  # Should pass without error.
 55 | 
 56 |     def test_is_superset(self):
 57 |         regex = "different column names:\n xExtra\(u?'value'\)"
 58 |         with self.assertRaisesRegex(DataError, regex):
 59 |             self.assertColumnSubset(ref=['label1'])
 60 | 
 61 | 
 62 | class TestColumnSuperset(datatest.DataTestCase):
 63 |     def setUp(self):
 64 |         self.subjectData = MinimalSource(data=[['a', '65'], ['b', '70']],
 65 |                                          fieldnames=['label1', 'value'])
 66 | 
 67 |     def test_is_same(self):
 68 |         self.assertColumnSuperset(ref=['label1', 'value'])  # Should pass without error.
 69 | 
 70 |     def test_is_superset(self):
 71 |         self.assertColumnSuperset(ref=['label1'])  # Should pass without error.
 72 | 
 73 |     def test_is_subset(self):
 74 |         regex = "different column names:\n xMissing\(u?'label2'\)"
 75 |         with self.assertRaisesRegex(DataError, regex):
 76 |             self.assertColumnSuperset(ref=['label1', 'label2', 'value'])
 77 | 
 78 | 
 79 | class TestValueSubset(DataTestCase):
 80 |     def setUp(self):
 81 |         self.subjectData = MinimalSource(data=[['a'], ['b'], ['c']],
 82 |                                          fieldnames=['label'])
 83 | 
 84 |     def test_is_same(self):
 85 |         self.assertValueSubset('label', ref=['a', 'b', 'c'])  # Should pass without error.
 86 | 
 87 |     def test_is_subset(self):
 88 |         self.assertValueSubset('label', ref=['a', 'b', 'c', 'd'])  # Should pass without error.
 89 | 
 90 |     def test_is_superset(self):
 91 |         regex = "different 'label' values:\n xExtra\(u?'c'\)"
 92 |         with self.assertRaisesRegex(DataError, regex):
 93 |             self.assertValueSubset('label', ref=['a', 'b'])
 94 | 
 95 | 
 96 | class TestValueSuperset(DataTestCase):
 97 |     def setUp(self):
 98 |         self.subjectData = MinimalSource(data=[['a'], ['b'], ['c']],
 99 |                                          fieldnames=['label'])
100 | 
101 |     def test_is_same(self):
102 |         self.assertValueSuperset('label', ref=['a', 'b', 'c'])  # Should pass without error.
103 | 
104 |     def test_is_superset(self):
105 |         self.assertValueSuperset('label', ref=['a', 'b'])  # Should pass without error.
106 | 
107 |     def test_is_subset(self):
108 |         regex = "different 'label' values:\n xMissing\(u?'d'\)"
109 |         with self.assertRaisesRegex(DataError, regex):
110 |             self.assertValueSuperset('label', ref=['a', 'b', 'c', 'd'])
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     unittest.main()
115 | else:
116 |     raise Exception('This test must be run directly or as a subprocess.')
117 | 


--------------------------------------------------------------------------------
/docs/reference/unittest-support.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. currentmodule:: datatest
  3 | 
  4 | .. meta::
  5 |     :description: datatest API for unittest-style testing
  6 |     :keywords: datatest, unittest, data-wrangling
  7 | 
  8 | 
  9 | ################
 10 | Unittest Support
 11 | ################
 12 | 
 13 | Datatest can be used together with the :mod:`unittest` package
 14 | from the Python Standard Library. For a quick introduction, see:
 15 | 
 16 | * :ref:`Automated Data Testing: Unittest <unittest-intro-docs>`
 17 | * :ref:`Unittest Samples <unittest-samples-docs>`
 18 | 
 19 | 
 20 | .. _datatestcase-docs:
 21 | 
 22 | ************
 23 | DataTestCase
 24 | ************
 25 | 
 26 | .. autoclass:: DataTestCase
 27 | 
 28 |     **VALIDATION METHODS**
 29 | 
 30 |     The assertion methods wrap :func:`validate` and its methods:
 31 | 
 32 |     .. code-block:: python
 33 |         :emphasize-lines: 7
 34 | 
 35 |         from datatest import DataTestCase
 36 | 
 37 |         class MyTest(DataTestCase):
 38 |             def test_mydata(self):
 39 |                 data = ...
 40 |                 requirement = ...
 41 |                 self.assertValid(data, requirement)
 42 | 
 43 |     .. automethod:: assertValid
 44 | 
 45 |     .. automethod:: assertValidPredicate
 46 | 
 47 |     .. automethod:: assertValidRegex
 48 | 
 49 |     .. automethod:: assertValidApprox
 50 | 
 51 |     .. automethod:: assertValidFuzzy
 52 | 
 53 |     .. automethod:: assertValidInterval
 54 | 
 55 |     .. automethod:: assertValidSet
 56 | 
 57 |     .. automethod:: assertValidSubset
 58 | 
 59 |     .. automethod:: assertValidSuperset
 60 | 
 61 |     .. automethod:: assertValidUnique
 62 | 
 63 |     .. automethod:: assertValidOrder
 64 | 
 65 |     **ACCEPTANCE METHODS**
 66 | 
 67 |     The acceptance methods wrap :func:`accepted` and its methods:
 68 | 
 69 |     .. code-block:: python
 70 |         :emphasize-lines: 7
 71 | 
 72 |         from datatest import DataTestCase
 73 | 
 74 |         class MyTest(DataTestCase):
 75 |             def test_mydata(self):
 76 |                 data = ...
 77 |                 requirement = ...
 78 |                 with self.accepted(Missing):
 79 |                     self.assertValid(data, requirement)
 80 | 
 81 |     .. automethod:: accepted
 82 | 
 83 |     .. automethod:: acceptedKeys
 84 | 
 85 |     .. automethod:: acceptedArgs
 86 | 
 87 |     .. method:: acceptedTolerance(tolerance, /, msg=None)
 88 |                 acceptedTolerance(lower, upper, msg=None)
 89 | 
 90 |         Wrapper for :meth:`accepted.tolerance`.
 91 | 
 92 |     .. method:: acceptedPercent(tolerance, /, msg=None)
 93 |                 acceptedPercent(lower, upper, msg=None)
 94 | 
 95 |         Wrapper for :meth:`accepted.percent`.
 96 | 
 97 |     .. automethod:: acceptedFuzzy
 98 | 
 99 |     .. automethod:: acceptedCount
100 | 
101 | 
102 | .. _unittest-style-invocation:
103 | 
104 | **********************
105 | Command-Line Interface
106 | **********************
107 | 
108 | The datatest module can be used from the command line just like
109 | unittest. To run the program with `test discovery
110 | <http://docs.python.org/library/unittest.html#test-discovery>`_
111 | use the following command::
112 | 
113 |     python -m datatest
114 | 
115 | Run tests from specific modules, classes, or individual methods with::
116 | 
117 |     python -m datatest test_module1 test_module2
118 |     python -m datatest test_module.TestClass
119 |     python -m datatest test_module.TestClass.test_method
120 | 
121 | The syntax and command-line options (``-f``, ``-v``, etc.) are the
122 | same as unittest---see unittest's `command-line documentation
123 | <http://docs.python.org/library/unittest.html#command-line-interface>`_
124 | for full details.
125 | 
126 | .. note::
127 | 
128 |     Tests are ordered by **file name** and then by **line number**
129 |     (within each file) when running datatest from the command-line.
130 | 
131 | ..
132 |     Unlike strict unit testing, data preparation tests are often
133 |     dependant on one another---this strict order-by-line-number
134 |     behavior lets users design test suites appropriately.
135 |     For example, asserting the population of a city will always
136 |     fail when the 'city' column is missing. So it's appropriate
137 |     to validate column names *before* validating the contents of
138 |     each column.
139 | 
140 | 
141 | *******************
142 | Test Runner Program
143 | *******************
144 | 
145 | .. py:decorator:: mandatory
146 | 
147 |     A decorator to mark whole test cases or individual methods as
148 |     mandatory.  If a mandatory test fails, DataTestRunner will stop
149 |     immediately (this is similar to the ``--failfast`` command line
150 |     argument behavior)::
151 | 
152 |         @datatest.mandatory
153 |         class TestFileFormat(datatest.DataTestCase):
154 |             def test_columns(self):
155 |                 ...
156 | 
157 | .. autoclass:: DataTestRunner
158 |     :members:
159 |     :inherited-members:
160 | 
161 | .. autoclass:: DataTestProgram(module='__main__', defaultTest=None, argv=None, testRunner=datatest.DataTestRunner, testLoader=unittest.TestLoader, exit=True, verbosity=1, failfast=None, catchbreak=None, buffer=None, warnings=None)
162 |     :members:
163 |     :inherited-members:
164 | 
165 | |
166 | 
167 | .. autoclass:: main
168 |    :members:
169 |    :inherited-members:
170 | 


--------------------------------------------------------------------------------
/docs/how-to/phone-numbers.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. py:currentmodule:: datatest
  3 | 
  4 | .. meta::
  5 |     :description: How to assert telephone number formats.
  6 |     :keywords: datatest, phone format, validate phone number
  7 | 
  8 | 
  9 | #############################
 10 | How to Validate Phone Numbers
 11 | #############################
 12 | 
 13 | To check that phone numbers are well-formed, you can use a regular
 14 | expression.
 15 | 
 16 | 
 17 | USA and Canada
 18 | ==============
 19 | 
 20 | 
 21 | .. code-block:: python
 22 | 
 23 |     from datatest import validate
 24 | 
 25 |     pattern = r'^\(\d{3}\)[ ]\d{3}-\d{4}$'
 26 | 
 27 |     data = [
 28 |         '(914) 232-9901',
 29 |         '(914) 737-9938',
 30 |         '(213) 888-7636',
 31 |         '(202) 965-2900',
 32 |         '(858) 651-5050',
 33 |     ]
 34 | 
 35 |     validate.regex(data, pattern, msg='must use phone number format')
 36 | 
 37 | 
 38 | For other common US and Canadian formats, you can use the regex
 39 | patterns:
 40 | 
 41 | .. table::
 42 |     :widths: auto
 43 | 
 44 |     +-------------------------------+-------------------+
 45 |     | pattern                       | examples          |
 46 |     +===============================+===================+
 47 |     | ``^\(\d{3}\)[ ]\d{3}-\d{4}$`` | \(914) 232-9901   |
 48 |     +-------------------------------+-------------------+
 49 |     | ``^\d{3}-\d{3}-\d{4}$``       | 914-232-9901      |
 50 |     +-------------------------------+-------------------+
 51 |     | ``^\+?1-\d{3}-\d{3}-\d{4}$``  | 1-914-232-9901    |
 52 |     |                               +-------------------+
 53 |     |                               | +1-914-232-9901   |
 54 |     +-------------------------------+-------------------+
 55 | 
 56 | 
 57 | ..
 58 |     THESE PHONE NUMBER PATTERNS ARE INCOMPLETE
 59 | 
 60 |     China
 61 |     =====
 62 | 
 63 |     .. code-block:: python
 64 | 
 65 |         from datatest import validate
 66 | 
 67 |         pattern = r'^\d{3}[ ]\d{3,4}[ ]\d{4}$'
 68 | 
 69 |         data = [
 70 |             '074 7284 5586',
 71 |             '400 669 5539',
 72 |         ]
 73 | 
 74 |         validate.regex(data, pattern, msg='must use phone number format')
 75 | 
 76 | 
 77 |     For common variants, you can use the following patterns:
 78 | 
 79 |     .. table::
 80 |         :widths: auto
 81 | 
 82 |         +--------------------------------------+-------------------+
 83 |         | ``^\d{3}[ ]\d{3,4}[ ]\d{4}$``        | 074 7284 5586     |
 84 |         |                                      +-------------------+
 85 |         |                                      | 400 669 5539      |
 86 |         +--------------------------------------+-------------------+
 87 |         | ``^\+86[ ]\d{3}[ ]\d{3,4}[ ]\d{4}$`` | +86 074 7284 5586 |
 88 |         |                                      +-------------------+
 89 |         |                                      | +86 400 669 5539  |
 90 |         +--------------------------------------+-------------------+
 91 | 
 92 | 
 93 | India
 94 | =====
 95 | 
 96 | .. code-block:: python
 97 | 
 98 |     import re
 99 |     from datatest import validate
100 | 
101 | 
102 |     indian_phone_format = re.compile(r'''^
103 |         (\+91[ ])?   # Optional international code.
104 |         (\(0\))?     # Optional trunk prefix.
105 |         # 10 digit codes with area & number splits.
106 |         (
107 |             \d{10}           # xxxxxxxxxx
108 |             | \d{5}[ ]\d{5}  # xxxxx xxxxx
109 |             | \d{4}[ ]\d{6}  # xxxx xxxxxx
110 |             | \d{3}[ ]\d{7}  # xxx xxxxxxx
111 |             | \d{2}[ ]\d{8}  # xx xxxxxxxx
112 |         )
113 |     $''', re.VERBOSE)
114 | 
115 |     data = [
116 |         '+91 (0)99999 99999',
117 |         '+91 99999 99999',
118 |         '9999999999',
119 |         '99999 99999',
120 |         '9999 999999',
121 |         '999 9999999',
122 |         '99 99999999',
123 |     ]
124 | 
125 |     validate(data, indian_phone_format, msg='must use phone number format')
126 | 
127 | 
128 | United Kingdom
129 | ==============
130 | 
131 | .. code-block:: python
132 | 
133 |     import re
134 |     from datatest import validate
135 | 
136 | 
137 |     uk_phone_format = re.compile(r'''^(
138 |         # 10 digit NSNs (leading zero doesn't count)
139 |         \(01\d{2}[ ]\d{2}\d\)[ ]\d{2}[ ]\d{3} # (01xx xx) xx xxx
140 |         | \(01\d{3}\)[ ]\d{3}[ ]\d{3}         # (01xxx) xxx xxx
141 |         | \(01\d{2}\)[ ]\d{3}[ ]\d{4}         # (01xx) xxx xxxx
142 |         | \(02\d\)[ ]\d{4}[ ]\d{4}            # (02x) xxxx xxxx
143 |         | 0\d{3}[ ]\d{3}[ ]\d{4}              # 0xxx xxx xxxx
144 |         | 0\d{2}[ ]\d{4}[ ]\d{4}              # 0xx xxxx xxxx
145 |         | 07\d{3}[ ]\d{3}[ ]\d{3}             # 07xxx xxx xxx
146 | 
147 |         # 9 digit NSNs
148 |         | \(0169[ ]77\)[ ]\d{4}               # (0169 77) xxxx
149 |         | \(01\d{3}\)[ ]\d{2}[ ]\d{3}         # (01xxx) xx xxx
150 |         | 0500[ ]\d{3}[ ]\d{3}                # 0500 xxx xxx
151 |         | 0800[ ]\d{3}[ ]\d{3}                # 0800 xxx xxx
152 |     )$''', re.VERBOSE)
153 | 
154 |     data = [
155 |         '(01257) 421 282',
156 |         '(01736) 759 307',
157 |         '(0169 77) 3452',
158 |         '0116 319 5885',
159 |         '0191 384 6777',
160 |         '020 8399 0617',
161 |     ]
162 | 
163 |     validate(data, uk_phone_format, msg='must use phone number format')
164 | 
165 | 
166 | ..
167 |     TO ADD:
168 |       Germany
169 |       Japan
170 |       France
171 | 
172 | 


--------------------------------------------------------------------------------
/docs/how-to/excel-auto-formatting.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. currentmodule:: datatest
  3 | 
  4 | .. meta::
  5 |     :description: How to prevent Excel from converting values.
  6 |     :keywords: datatest, excel, date conversion, scientific notation, leading zeros
  7 | 
  8 | 
  9 | #######################################
 10 | How to Avoid Excel Automatic Formatting
 11 | #######################################
 12 | 
 13 | When MS Excel opens CSV files (and many other tabular formats),
 14 | its default behavior will reformat certain values as dates,
 15 | strip leading zeros, convert long numbers into scientific
 16 | notation, and more. There are many cases where these kinds
 17 | of changes actually corrupt your data.
 18 | 
 19 | It is possible to control Excel's formatting behavior using its
 20 | *Text Import Wizard*. But as long as other users can open and
 21 | re-save your CSV files, there may be no good way to guarantee that
 22 | someone else won't inadvertently corrupt your data with Excel's
 23 | default auto-format behavior. In a situation like this, you can
 24 | mitigate problems by avoiding values that Excel likes to auto-format.
 25 | 
 26 | Using the :class:`Predicate` object below, you can check that values
 27 | are "Excel safe" and receive a list of differences when values are
 28 | vulnerable to inadvertent auto-formatting:
 29 | 
 30 | .. code-block:: python
 31 |     :emphasize-lines: 44
 32 |     :linenos:
 33 | 
 34 |     import re
 35 |     from datatest import validate, Predicate
 36 | 
 37 | 
 38 |     # Predicate to check that elements are not subject
 39 |     # to Excel auto-formatting.
 40 |     excel_safe = ~Predicate(re.compile(r'''^(
 41 |         # Date format character combinations.
 42 |         \d{1,2}-(?:\d{1,2}|\d{4})
 43 |         | (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[ \-]\d{1,2}
 44 |         | [01]?[0-9]-(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)
 45 | 
 46 |         # Time conversions.
 47 |         | [01]?[0-9][ ]?(AM?|PM?)     # Twelve-hour clock.
 48 |         | \d?\d[ ]*:                  # HH (hours).
 49 |         | \d?\d[ ]*(:[ ]*\d\d?){1,2}  # HH:MM and HH:MM:SS
 50 | 
 51 |         # Numeric conversions.
 52 |         | 0\d+\.?\d*        # Number with leading zeros.
 53 |         | \d*\.\d*0         # Decimal point with trailing zeros.
 54 |         | \d*\.             # Trailing decimal point.
 55 |         | \d.?\d*E[+-]?\d+  # Scientific notation.
 56 |         | \d{16,}           # Numbers of 16+ digits get approximated.
 57 | 
 58 |         # Whitespace normalization.
 59 |         | \s.*              # Leading whitespace.
 60 |         | .*\s              # Trailing whitespace.
 61 |         | .*\s\s.*          # Irregular whitespace (new in Office 365).
 62 | 
 63 |         # Other conversions
 64 |         | =.+               # Spreadsheet formula.
 65 | 
 66 |     )$''', re.VERBOSE | re.IGNORECASE), name='excel_safe')
 67 | 
 68 | 
 69 |     data = [
 70 |         'AOX-18',
 71 |         'APR-23',
 72 |         'DBB-01',
 73 |         'DEC-20',
 74 |         'DNZ-33',
 75 |         'DVH-50',
 76 |     ]
 77 |     validate(data, excel_safe)
 78 | 
 79 | In the example above, we use ``excel_safe`` as our *requirement*.
 80 | The validation fails because our *data* contains two codes that
 81 | Excel would auto-convert into date types:
 82 | 
 83 | .. code-block:: none
 84 | 
 85 |     ValidationError: does not satisfy excel_safe() (2 differences): [
 86 |         Invalid('APR-23'),
 87 |         Invalid('DEC-20'),
 88 |     ]
 89 | 
 90 | 
 91 | Fixing the Data
 92 | ---------------
 93 | 
 94 | To address the failure, we need to change the values in *data* so
 95 | they are no longer subject to Excel's auto-formatting behavior.
 96 | There are a few ways to do this.
 97 | 
 98 | We can prefix the failing values with apostrophes (``'APR-23``
 99 | and ``'DEC-20``). This causes Excel to treat them as text instead
100 | of dates or numbers:
101 | 
102 | .. code-block:: python
103 |     :emphasize-lines: 5,7
104 |     :linenos:
105 |     :lineno-start: 34
106 | 
107 |     ...
108 | 
109 |     data = [
110 |         "AOX-18",
111 |         "'APR-23",
112 |         "DBB-01",
113 |         "'DEC-20",
114 |         "DNZ-33",
115 |         "DVH-50",
116 |     ]
117 |     validate(data, excel_safe)
118 | 
119 | 
120 | Another approach would be to change the formatting for the all of
121 | the values. Below, the hyphens in *data* have been replaced with
122 | underscores (``_``):
123 | 
124 | .. code-block:: python
125 |     :emphasize-lines: 4-9
126 |     :linenos:
127 |     :lineno-start: 34
128 | 
129 |     ...
130 | 
131 |     data = [
132 |         'AOX_18',
133 |         'APR_23',
134 |         'DBB_01',
135 |         'DEC_20',
136 |         'DNZ_33',
137 |         'DVH_50',
138 |     ]
139 |     validate(data, excel_safe)
140 | 
141 | 
142 | After making the needed changes, the validation will now pass without
143 | error.
144 | 
145 | 
146 | .. caution::
147 | 
148 |     The ``excel_safe`` predicate implements a blacklist approach
149 |     to detect values that Excel will automatically convert. It is
150 |     not guaranteed to catch everything and future versions of Excel
151 |     could introduce new behaviors. If you discover auto-formatted
152 |     values that are not handled by this helper function (or if you
153 |     have an idea regarding a workable whitelist approach), please
154 |     `file an issue`_ and we will try to improve it.
155 | 
156 | 
157 | .. _`file an issue`: https://github.com/shawnbrown/datatest/issues
158 | 


--------------------------------------------------------------------------------
/tests/past_api07_sources_sqlite.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import sqlite3
  3 | from . import _unittest as unittest
  4 | 
  5 | from .mixins import CountTests
  6 | from .mixins import OtherTests
  7 | 
  8 | from datatest.__past__.api07_sources import SqliteSource
  9 | 
 10 | 
 11 | class TestSqliteSourceCount(CountTests, unittest.TestCase):
 12 |     def setUp(self):
 13 |         tablename = 'testtable'
 14 |         connection = sqlite3.connect(':memory:')
 15 |         cursor = connection.cursor()
 16 |         cursor.execute("CREATE TABLE testtable (label1, label2, value)")
 17 |         for values in self.testdata:
 18 |             cursor.execute("INSERT INTO testtable VALUES (?, ?, ?)", values)
 19 |         connection.commit()
 20 | 
 21 |         self.datasource = SqliteSource(connection, tablename)
 22 | 
 23 | 
 24 | class TestSqliteSource(OtherTests, unittest.TestCase):
 25 |     def setUp(self):
 26 |         tablename = 'testtable'
 27 |         connection = sqlite3.connect(':memory:')
 28 |         cursor = connection.cursor()
 29 |         cursor.execute("CREATE TABLE testtable (label1, label2, value)")
 30 |         for values in self.testdata:
 31 |             cursor.execute("INSERT INTO testtable VALUES (?, ?, ?)", values)
 32 |         connection.commit()
 33 | 
 34 |         self.datasource = SqliteSource(connection, tablename)
 35 | 
 36 |     def test_where_clause(self):
 37 |         # No key-word args.
 38 |         clause, params = SqliteSource._build_where_clause()
 39 |         self.assertEqual(clause, '')
 40 |         self.assertEqual(params, [])
 41 | 
 42 |         # Single condition (where label1 equals 'a').
 43 |         clause, params = SqliteSource._build_where_clause(label1='a')
 44 |         self.assertEqual(clause, 'label1=?')
 45 |         self.assertEqual(params, ['a'])
 46 | 
 47 |         # Multiple conditions (where label1 equals 'a' AND label2 equals 'x').
 48 |         clause, params = SqliteSource._build_where_clause(label1='a', label2='x')
 49 |         self.assertEqual(clause, 'label1=? AND label2=?')
 50 |         self.assertEqual(params, ['a', 'x'])
 51 | 
 52 |         # Compound condition (where label1 equals 'a' OR 'b').
 53 |         clause, params = SqliteSource._build_where_clause(label1=('a', 'b'))
 54 |         self.assertEqual(clause, 'label1 IN (?, ?)')
 55 |         self.assertEqual(params, ['a', 'b'])
 56 | 
 57 |         # Mixed conditions (where label1 equals 'a' OR 'b' AND label2 equals 'x').
 58 |         clause, params = SqliteSource._build_where_clause(label1=('a', 'b'), label2='x')
 59 |         self.assertEqual(clause, 'label1 IN (?, ?) AND label2=?')
 60 |         self.assertEqual(params, ['a', 'b', 'x'])
 61 | 
 62 |     def test_normalize_column(self):
 63 |         result = SqliteSource._normalize_column('foo')
 64 |         self.assertEqual('"foo"', result)
 65 | 
 66 |         result = SqliteSource._normalize_column('foo bar')
 67 |         self.assertEqual('"foo bar"', result)
 68 | 
 69 |         result = SqliteSource._normalize_column('foo "bar" baz')
 70 |         self.assertEqual('"foo ""bar"" baz"', result)
 71 | 
 72 |     def test_from_records(self):
 73 |         """Test from_records method (wrapper for TemporarySqliteTable class)."""
 74 |         # Test tuples.
 75 |         columns = ['foo', 'bar', 'baz']
 76 |         data = [
 77 |             ('a', 'x', '1'),
 78 |             ('b', 'y', '2'),
 79 |             ('c', 'z', '3'),
 80 |         ]
 81 |         source = SqliteSource.from_records(data, columns)
 82 | 
 83 |         expected = [
 84 |             {'foo': 'a', 'bar': 'x', 'baz': '1'},
 85 |             {'foo': 'b', 'bar': 'y', 'baz': '2'},
 86 |             {'foo': 'c', 'bar': 'z', 'baz': '3'},
 87 |         ]
 88 |         self.assertEqual(expected, list(source))
 89 | 
 90 |         # Test dict.
 91 |         columns = ['foo', 'bar', 'baz']
 92 |         data = [
 93 |             {'foo': 'a', 'bar': 'x', 'baz': '1'},
 94 |             {'foo': 'b', 'bar': 'y', 'baz': '2'},
 95 |             {'foo': 'c', 'bar': 'z', 'baz': '3'},
 96 |         ]
 97 |         source = SqliteSource.from_records(data, columns)
 98 |         self.assertEqual(data, list(source))
 99 | 
100 |         # Test omitted *columns* argument.
101 |         data_dict = [
102 |             {'foo': 'a', 'bar': 'x', 'baz': '1'},
103 |             {'foo': 'b', 'bar': 'y', 'baz': '2'},
104 |             {'foo': 'c', 'bar': 'z', 'baz': '3'},
105 |         ]
106 |         source = SqliteSource.from_records(data_dict)
107 |         self.assertEqual(data_dict, list(source))
108 | 
109 |     def test_create_index(self):
110 |         cursor = self.datasource._connection.cursor()
111 | 
112 |         # There should be no indexes initially.
113 |         cursor.execute("PRAGMA INDEX_LIST('testtable')")
114 |         self.assertEqual(cursor.fetchall(), [])
115 | 
116 |         # Add single-column index.
117 |         self.datasource.create_index('label1')  # <- CREATE INDEX!
118 |         cursor.execute("PRAGMA INDEX_LIST('testtable')")
119 |         results = [tup[1] for tup in cursor.fetchall()]
120 |         self.assertEqual(results, ['idx_testtable_label1'])
121 | 
122 |         # Add multi-column index.
123 |         self.datasource.create_index('label2', 'value')  # <- CREATE INDEX!
124 |         cursor.execute("PRAGMA INDEX_LIST('testtable')")
125 |         results = sorted(tup[1] for tup in cursor.fetchall())
126 |         self.assertEqual(results, ['idx_testtable_label1', 'idx_testtable_label2_value'])
127 | 
128 |         # Duplicate of first, single-column index should have no effect.
129 |         self.datasource.create_index('label1')  # <- CREATE INDEX!
130 |         cursor.execute("PRAGMA INDEX_LIST('testtable')")
131 |         results = sorted(tup[1] for tup in cursor.fetchall())
132 |         self.assertEqual(results, ['idx_testtable_label1', 'idx_testtable_label2_value'])
133 | 


--------------------------------------------------------------------------------
/datatest/_compatibility/decimal.py:
--------------------------------------------------------------------------------
  1 | """compatibility layer for decimal (Python standard library)"""
  2 | from __future__ import absolute_import
  3 | from decimal import *
  4 | 
  5 | 
  6 | try:
  7 |     Decimal.from_float  # New in 2.7
  8 | except AttributeError:
  9 |     import math as _math
 10 | 
 11 |     def _bit_length(integer):
 12 |         s = bin(integer)    # binary representation:  bin(-37) --> '-0b100101'
 13 |         s = s.lstrip('-0b') # remove leading zeros and minus sign
 14 |         return len(s)       # len('100101') --> 6
 15 | 
 16 |     @classmethod
 17 |     def _from_float(cls, f):
 18 |         if isinstance(f, int):                # handle integer inputs
 19 |             return cls(f)
 20 |         if not isinstance(f, float):
 21 |             raise TypeError("argument must be int or float.")
 22 |         if _math.isinf(f) or _math.isnan(f):
 23 |             return cls(repr(f))
 24 |         if _math.copysign(1.0, f) == 1.0:
 25 |             sign = 0
 26 |         else:
 27 |             sign = 1
 28 |         n, d = abs(f).as_integer_ratio()
 29 |         #k = d.bit_length() - 1
 30 |         k = _bit_length(d) - 1
 31 |         result = _dec_from_triple(sign, str(n*5**k), -k)
 32 |         if cls is Decimal:
 33 |             return result
 34 |         else:
 35 |             return cls(result)
 36 | 
 37 |     Decimal.from_float = _from_float
 38 | 
 39 | 
 40 | if Decimal('1.0') != 1.0:  # Changed in Python 3.2
 41 | 
 42 |     import numbers as _numbers
 43 |     from decimal import _dec_from_triple
 44 | 
 45 | 
 46 |     class FloatOperation(DecimalException, TypeError):
 47 |         """Enable stricter semantics for mixing floats and Decimals."""
 48 |         pass
 49 | 
 50 | 
 51 |     # Adapted from Python 3.1 standard library.
 52 |     _context_init_orig = Context.__init__
 53 |     def _context_init_new(self, prec=None, rounding=None,
 54 |                           traps=None, flags=None,
 55 |                           Emin=None, Emax=None,
 56 |                           capitals=None, _clamp=0,
 57 |                           _ignored_flags=None):
 58 | 
 59 |         # Call original __init__.
 60 |         _context_init_orig(self, prec=prec, rounding=rounding, traps=traps,
 61 |                            flags=flags, Emin=Emin, Emax=Emax, capitals=capitals,
 62 |                            _clamp=_clamp, _ignored_flags=_ignored_flags)
 63 | 
 64 |         # Add FloatOperation to `traps` dict.
 65 |         self.traps[FloatOperation] = 0
 66 | 
 67 |     Context.__init__ = _context_init_new
 68 | 
 69 | 
 70 |     # Adapted from Python 3.4 standard library.
 71 |     def _convert_for_comparison(self, other, equality_op=False):
 72 |         if isinstance(other, Decimal):
 73 |             return self, other
 74 |         if isinstance(other, _numbers.Rational):
 75 |             if not self._is_special:
 76 |                 self = _dec_from_triple(self._sign,
 77 |                                         str(int(self._int) * other.denominator),
 78 |                                         self._exp)
 79 |             return self, Decimal(other.numerator)
 80 |         if equality_op and isinstance(other, _numbers.Complex) and other.imag == 0:
 81 |             other = other.real
 82 |         if isinstance(other, float):
 83 |             context = getcontext()
 84 |             if equality_op:
 85 |                 context.flags[FloatOperation] = 1
 86 |             else:
 87 |                 context._raise_error(FloatOperation,
 88 |                     "strict semantics for mixing floats and Decimals are enabled")
 89 |             return self, Decimal.from_float(other)
 90 |         return NotImplemented, NotImplemented
 91 | 
 92 |     def _eq(self, other, context=None):
 93 |         self, other = _convert_for_comparison(self, other, equality_op=True)
 94 |         if other is NotImplemented:
 95 |             return other
 96 |         if self._check_nans(other, context):
 97 |             return False
 98 |         return self._cmp(other) == 0
 99 |     Decimal.__eq__ = _eq
100 | 
101 |     def _ne(self, other, context=None):
102 |         self, other = _convert_for_comparison(self, other, equality_op=True)
103 |         if other is NotImplemented:
104 |             return other
105 |         if self._check_nans(other, context):
106 |             return True
107 |         return self._cmp(other) != 0
108 |     Decimal.__ne__ = _ne
109 | 
110 |     def _lt(self, other, context=None):
111 |         self, other = _convert_for_comparison(self, other)
112 |         if other is NotImplemented:
113 |             return other
114 |         ans = self._compare_check_nans(other, context)
115 |         if ans:
116 |             return False
117 |         return self._cmp(other) < 0
118 |     Decimal.__lt__ = _lt
119 | 
120 |     def _le(self, other, context=None):
121 |         self, other = _convert_for_comparison(self, other)
122 |         if other is NotImplemented:
123 |             return other
124 |         ans = self._compare_check_nans(other, context)
125 |         if ans:
126 |             return False
127 |         return self._cmp(other) <= 0
128 |     Decimal.__le__ = _le
129 | 
130 |     def _gt(self, other, context=None):
131 |         self, other = _convert_for_comparison(self, other)
132 |         if other is NotImplemented:
133 |             return other
134 |         ans = self._compare_check_nans(other, context)
135 |         if ans:
136 |             return False
137 |         return self._cmp(other) > 0
138 |     Decimal.__gt__ = _gt
139 | 
140 |     def _ge(self, other, context=None):
141 |         self, other = _convert_for_comparison(self, other)
142 |         if other is NotImplemented:
143 |             return other
144 |         ans = self._compare_check_nans(other, context)
145 |         if ans:
146 |             return False
147 |         return self._cmp(other) >= 0
148 |     Decimal.__ge__ = _ge
149 | 


--------------------------------------------------------------------------------
/docs/how-to/customize-differences.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | .. currentmodule:: datatest
  3 | 
  4 | .. meta::
  5 |     :description: How to customize error differences.
  6 |     :keywords: datatest, difference, customize
  7 | 
  8 | 
  9 | ############################
 10 | How to Customize Differences
 11 | ############################
 12 | 
 13 | When using a helper function for validation, datatest's default
 14 | behavior is to produce :class:`Invalid` differences when the
 15 | function returns False. But you can customize this behavior
 16 | by returning a difference object instead of False. The returned
 17 | difference is used in place of an automatically generated one.
 18 | 
 19 | 
 20 | Default Behavior
 21 | ================
 22 | 
 23 | In the following example, the helper function checks that text
 24 | values are upper case and have no extra whitespace. If the values
 25 | are good, the function returns ``True``, if the values are bad it
 26 | returns ``False``:
 27 | 
 28 | .. code-block:: python
 29 |     :linenos:
 30 |     :emphasize-lines: 6
 31 | 
 32 |     from datatest import validate
 33 | 
 34 | 
 35 |     def wellformed(x):  # <- Helper function.
 36 |         """Must be upercase and no extra whitespace."""
 37 |         return x == ' '.join(x.split()) and x.isupper()
 38 | 
 39 |     data = [
 40 |         'CAPE GIRARDEAU',
 41 |         'GREENE ',
 42 |         'JACKSON',
 43 |         'St. Louis',
 44 |     ]
 45 | 
 46 |     validate(data, wellformed)
 47 | 
 48 | 
 49 | Each time the helper function returns ``False``, an :class:`Invalid`
 50 | difference is created:
 51 | 
 52 | .. code-block:: none
 53 |     :emphasize-lines: 5-6
 54 | 
 55 |     Traceback (most recent call last):
 56 |       File "example.py", line 15, in <module>
 57 |         validate(data, wellformed)
 58 |     ValidationError: Must be upercase and no extra whitespace. (2 differences): [
 59 |         Invalid('GREENE '),
 60 |         Invalid('St. Louis'),
 61 |     ]
 62 | 
 63 | 
 64 | Custom Differences
 65 | ==================
 66 | 
 67 | In this example, the helper function returns a custom ``BadWhitespace``
 68 | or ``NotUpperCase`` difference for each bad value:
 69 | 
 70 | .. code-block:: python
 71 |     :linenos:
 72 |     :emphasize-lines: 15,17
 73 | 
 74 |     from datatest import validate, Invalid
 75 | 
 76 | 
 77 |     class BadWhitespace(Invalid):
 78 |         """For strings with leading, trailing, or irregular whitespace."""
 79 | 
 80 | 
 81 |     class NotUpperCase(Invalid):
 82 |         """For strings that aren't upper case."""
 83 | 
 84 | 
 85 |     def wellformed(x):  # <- Helper function.
 86 |         """Must be upercase and no extra whitespace."""
 87 |         if x != ' '.join(x.split()):
 88 |             return BadWhitespace(x)
 89 |         if not x.isupper():
 90 |             return NotUpperCase(x)
 91 |         return True
 92 | 
 93 | 
 94 |     data = [
 95 |         'CAPE GIRARDEAU',
 96 |         'GREENE ',
 97 |         'JACKSON',
 98 |         'St. Louis',
 99 |     ]
100 | 
101 |     validate(data, wellformed)
102 | 
103 | 
104 | These differences are use in the ValidationError:
105 | 
106 | .. code-block:: none
107 |     :emphasize-lines: 5-6
108 | 
109 |     Traceback (most recent call last):
110 |       File "example.py", line 15, in <module>
111 |         validate(data, wellformed)
112 |     ValidationError: Must be upercase and no extra whitespace. (2 differences): [
113 |         BadWhitespace('GREENE '),
114 |         NotUpperCase('St. Louis'),
115 |     ]
116 | 
117 | 
118 | .. caution::
119 | 
120 |     Typically, you should try to **stick with existing differences**
121 |     in your data tests. Only create a custom subclass when its meaning
122 |     is evident and doing so helps your data preparation workflow.
123 | 
124 |     Don't add a custom class when it doesn't benefit your testing
125 |     process. At best, you're doing extra work for no added benefit.
126 |     And at worst, an ambiguous or needlessly complex subclass can
127 |     cause more problems than it solves.
128 | 
129 |     If you need to resolve ambiguity in a validation, you can split
130 |     the check into multiple calls. Below, we perform the same check
131 |     demonstrated earlier using two :func:`validate` calls:
132 | 
133 |     .. code-block:: python
134 |         :linenos:
135 |         :emphasize-lines: 14,21
136 | 
137 |         from datatest import validate
138 | 
139 |         data = [
140 |             'CAPE GIRARDEAU',
141 |             'GREENE ',
142 |             'JACKSON',
143 |             'St. Louis',
144 |         ]
145 | 
146 |         def no_irregular_whitespace(x):  # <- Helper function.
147 |             """Must have no irregular whitespace."""
148 |             return x == ' '.join(x.split())
149 | 
150 |         validate(data, no_irregular_whitespace)
151 | 
152 | 
153 |         def is_upper_case(x):  # <- Helper function.
154 |             """Must be upper case."""
155 |             return x.isupper()
156 | 
157 |         validate(data, is_upper_case)
158 | 
159 | 
160 | ..
161 |     # In the future, after adding a comparator interface to validate(),
162 |     # possibly change the example to something like the following.
163 | 
164 |     from enum import Enum
165 |     from datatest import validate, Invalid
166 | 
167 | 
168 |     # Likert Scale
169 |     class response(Enum):
170 |         STRONGLY_OPPOSE = 1
171 |         OPPOSE = 2
172 |         NEUTRAL = 3
173 |         SUPPORT = 4
174 |         STRONGLY_SUPPORT = 5
175 | 
176 | 
177 |     # 7-Point Likert Scale
178 |     #class response(Enum):
179 |     #    STRONGLY_OPPOSE = 1
180 |     #    OPPOSE = 2
181 |     #    SOMEWHAT_OPPOSE = 3
182 |     #    NEUTRAL = 4
183 |     #    SOMEWHAT_SUPPORT = 5
184 |     #    SUPPORT = 6
185 |     #    STRONGLY_SUPPORT = 7
186 | 
187 | 
188 |     class Change(Invalid):
189 |         """For differences of 1 point."""
190 | 
191 | 
192 |     class LargeChange(Invalid):
193 |         """For differences of 2 or more points."""
194 | 
195 | 
196 |     latest_survey = {
197 |         'a': response.SUPPORT,
198 |         'b': response.STRONGLY_OPPOSE,
199 |         'c': response.STRONGLY_SUPPORT,
200 |         'd': response.OPPOSE,
201 |     }
202 | 
203 |     previous_survey = {
204 |         'a': response.SUPPORT,
205 |         'b': response.OPPOSE,
206 |         'c': response.STRONGLY_SUPPORT,
207 |         'd': response.SUPPORT,
208 |     }
209 | 
210 |     validate(latest_survey, previous_survey)
211 | 
212 | 


--------------------------------------------------------------------------------
/tests/past_api09_load_csv.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sqlite3
  4 | import sys
  5 | import warnings
  6 | from . import _io as io
  7 | from . import _unittest as unittest
  8 | from datatest._compatibility.builtins import *
  9 | 
 10 | from datatest._vendor.load_csv import load_csv
 11 | 
 12 | try:
 13 |     from StringIO import StringIO
 14 | except ImportError:
 15 |     StringIO = None
 16 | 
 17 | 
 18 | class TestLoadCsv(unittest.TestCase):
 19 |     def setUp(self):
 20 |         connection = sqlite3.connect(':memory:')
 21 |         connection.execute('PRAGMA synchronous=OFF')
 22 |         connection.isolation_level = None
 23 |         self.cursor = connection.cursor()
 24 | 
 25 |         self.original_cwd = os.path.abspath(os.getcwd())
 26 |         os.chdir(os.path.join(os.path.dirname(__file__), 'sample_files'))
 27 | 
 28 |     def tearDown(self):              # It would be best to use addCleanup()
 29 |         os.chdir(self.original_cwd)  # but it is not available in Python 2.6.
 30 | 
 31 |     @staticmethod
 32 |     def get_stream(string, encoding=None):
 33 |         """Accepts a string and returns a file-like stream object.
 34 | 
 35 |         In Python 2, Unicode files should be opened in binary-mode
 36 |         but in Python 3, they should be opened in text-mode. This
 37 |         function emulates the appropriate opening behavior.
 38 |         """
 39 |         fh = io.BytesIO(string)
 40 |         if sys.version_info[0] == 2:
 41 |             return fh
 42 |         return io.TextIOWrapper(fh, encoding=encoding)
 43 | 
 44 |     def test_encoding_with_stream(self):
 45 |         csvfile = self.get_stream((
 46 |             b'col1,col2\n'
 47 |             b'1,\xe6\n'  # '\xe6' -> æ (ash)
 48 |             b'2,\xf0\n'  # '\xf0' -> ð (eth)
 49 |             b'3,\xfe\n'  # '\xfe' -> þ (thorn)
 50 |         ), encoding='latin-1')
 51 |         load_csv(self.cursor, 'testtable1', csvfile, encoding='latin-1')
 52 | 
 53 |         expected = [
 54 |             ('1', chr(0xe6)),  # chr(0xe6) -> æ
 55 |             ('2', chr(0xf0)),  # chr(0xf0) -> ð
 56 |             ('3', chr(0xfe)),  # chr(0xfe) -> þ
 57 |         ]
 58 |         self.cursor.execute('SELECT col1, col2 FROM testtable1')
 59 |         self.assertEqual(list(self.cursor), expected)
 60 | 
 61 |     def test_encoding_with_file(self):
 62 |         path = 'sample_text_iso88591.csv'
 63 |         load_csv(self.cursor, 'testtable', path, encoding='latin-1')
 64 | 
 65 |         expected = [
 66 |             ('iso88591', chr(0xe6)),  # chr(0xe6) -> æ
 67 |         ]
 68 |         self.cursor.execute('SELECT col1, col2 FROM testtable')
 69 |         self.assertEqual(list(self.cursor), expected)
 70 | 
 71 |     def test_encoding_mismatch(self):
 72 |         path = 'sample_text_iso88591.csv'
 73 |         wrong_encoding = 'utf-8'  # <- Doesn't match file.
 74 | 
 75 |         with self.assertRaises(UnicodeDecodeError):
 76 |             load_csv(self.cursor, 'testtable', path, wrong_encoding)
 77 | 
 78 |     def test_fallback_with_stream(self):
 79 |         with warnings.catch_warnings(record=True):  # Catch warnings issued
 80 |             csvfile = self.get_stream((             # when running Python 2.
 81 |                 b'col1,col2\n'
 82 |                 b'1,\xe6\n'  # '\xe6' -> æ (ash)
 83 |                 b'2,\xf0\n'  # '\xf0' -> ð (eth)
 84 |                 b'3,\xfe\n'  # '\xfe' -> þ (thorn)
 85 |             ), encoding='latin-1')
 86 |             load_csv(self.cursor, 'testtable1', csvfile)  # <- No encoding arg.
 87 | 
 88 |         expected = [
 89 |             ('1', chr(0xe6)),  # chr(0xe6) -> æ
 90 |             ('2', chr(0xf0)),  # chr(0xf0) -> ð
 91 |             ('3', chr(0xfe)),  # chr(0xfe) -> þ
 92 |         ]
 93 |         self.cursor.execute('SELECT col1, col2 FROM testtable1')
 94 |         self.assertEqual(list(self.cursor), expected)
 95 | 
 96 |         def test_fallback_with_StringIO(self):
 97 |             if not StringIO:  # <- Python 2.x only.
 98 |                 return
 99 | 
100 |             csvfile = StringIO(
101 |                 b'col1,col2\n'
102 |                 b'1,\xe6\n'  # '\xe6' -> æ (ash)
103 |                 b'2,\xf0\n'  # '\xf0' -> ð (eth)
104 |                 b'3,\xfe\n'  # '\xfe' -> þ (thorn)
105 |             )
106 | 
107 |             with warnings.catch_warnings(record=True):
108 |                 load_csv(self.cursor, 'testtable1', csvfile)
109 | 
110 |             expected = [
111 |                 ('1', chr(0xe6)),  # chr(0xe6) -> æ
112 |                 ('2', chr(0xf0)),  # chr(0xf0) -> ð
113 |                 ('3', chr(0xfe)),  # chr(0xfe) -> þ
114 |             ]
115 |             self.cursor.execute('SELECT col1, col2 FROM testtable1')
116 |             self.assertEqual(list(self.cursor), expected)
117 | 
118 |     def test_fallback_with_file(self):
119 |         with warnings.catch_warnings(record=True) as warning_list:
120 |             warnings.simplefilter('always')
121 |             path = 'sample_text_iso88591.csv'
122 |             load_csv(self.cursor, 'testtable', path)  # <- No encoding arg.
123 | 
124 |         self.assertEqual(len(warning_list), 1)
125 |         expected = "using fallback 'latin-1'"
126 |         self.assertIn(expected, str(warning_list[0].message))
127 | 
128 |         expected = [
129 |             ('iso88591', chr(0xe6)),  # chr(0xe6) -> æ
130 |         ]
131 |         self.cursor.execute('SELECT col1, col2 FROM testtable')
132 |         self.assertEqual(list(self.cursor), expected)
133 | 
134 |     def test_fallback_with_exhaustible_object(self):
135 |         """Exhaustible iterators and unseekable file-like objects
136 |         can only be iterated over once. This means that the usual
137 |         fallback behavior can not be applied and the function must
138 |         raise an exception.
139 |         """
140 |         if not sys.version_info[0] == 2:
141 |             return
142 | 
143 |         csvfile = self.get_stream((
144 |             b'col1,col2\n'
145 |             b'1,\xe6\n'  # '\xe6' -> æ (ash)
146 |             b'2,\xf0\n'  # '\xf0' -> ð (eth)
147 |             b'3,\xfe\n'  # '\xfe' -> þ (thorn)
148 |         ), encoding='latin-1')
149 |         generator = (x for x in csvfile)  # <- Make stream unseekable.
150 | 
151 |         with self.assertRaises(UnicodeDecodeError) as cm:
152 |             load_csv(self.cursor, 'testtable', generator)
153 | 
154 |         error_message = str(cm.exception)
155 |         self.assertIn('cannot attempt fallback', error_message.lower())
156 | 


--------------------------------------------------------------------------------
/docs/tutorial/testing-pandas.rst:
--------------------------------------------------------------------------------
  1 | :orphan:
  2 | 
  3 | .. meta::
  4 |     :description: Datatest examples demonstrating use of pandas DataFrame objects.
  5 |     :keywords: datatest, pandas, DataFrame
  6 | 
  7 | 
  8 | ###################
  9 | Testing With Pandas
 10 | ###################
 11 | 
 12 | Datatest can validate :mod:`pandas` objects (:class:`DataFrame
 13 | <pandas.DataFrame>`, :class:`Series <pandas.Series>`, and
 14 | :class:`Index <pandas.Index>`) the same way it does with
 15 | built-in types.
 16 | 
 17 | 
 18 | =============
 19 | Some Examples
 20 | =============
 21 | 
 22 | This example uses a :class:`DataFrame <pandas.DataFrame>` to
 23 | load and inspect data from a CSV file (:download:`movies.csv
 24 | </_static/tutorial/movies.csv>`). The CSV file uses the
 25 | following format:
 26 | 
 27 | .. csv-table::
 28 |     :header: title, rating, year, runtime
 29 | 
 30 |     Almost Famous, R, 2000, 122
 31 |     American Pie, R, 1999, 95
 32 |     Back to the Future, PG, 1985, 116
 33 |     Blade Runner, R, 1982, 117
 34 |     ..., ..., ..., ...
 35 | 
 36 | 
 37 | .. tabs::
 38 | 
 39 |     .. group-tab:: Pytest
 40 | 
 41 |         The :download:`test_movies_df.py </_static/tutorial/test_movies_df.py>`
 42 |         script demonstrates pytest-style tests:
 43 | 
 44 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
 45 |             :language: python
 46 |             :lineno-match:
 47 | 
 48 |     .. group-tab:: Unittest
 49 | 
 50 |         The :download:`test_movies_df_unit.py </_static/tutorial/test_movies_df_unit.py>`
 51 |         script demonstrates unittest-style tests:
 52 | 
 53 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
 54 |             :language: python
 55 |             :lineno-match:
 56 | 
 57 | 
 58 | You can run these tests, use the following command:
 59 | 
 60 | .. tabs::
 61 | 
 62 |     .. group-tab:: Pytest
 63 | 
 64 |         .. code-block:: none
 65 | 
 66 |             pytest test_movies_df.py
 67 | 
 68 |     .. group-tab:: Unittest
 69 | 
 70 |         .. code-block:: none
 71 | 
 72 |             python -m datatest test_movies_df_unit.py
 73 | 
 74 | 
 75 | ========================
 76 | Step by Step Explanation
 77 | ========================
 78 | 
 79 | 
 80 | 1. Define a test fixture
 81 | ------------------------
 82 | 
 83 | Define a test fixture that loads the CSV file into a
 84 | :class:`DataFrame <pandas.DataFrame>`:
 85 | 
 86 | .. tabs::
 87 | 
 88 |     .. group-tab:: Pytest
 89 | 
 90 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
 91 |             :pyobject: df
 92 |             :lineno-match:
 93 | 
 94 |     .. group-tab:: Unittest
 95 | 
 96 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
 97 |             :pyobject: setUpModule
 98 |             :lineno-match:
 99 | 
100 | 
101 | 2. Check column names
102 | ---------------------
103 | 
104 | Check that the data includes the expected column names:
105 | 
106 | .. tabs::
107 | 
108 |     .. group-tab:: Pytest
109 | 
110 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
111 |             :pyobject: test_columns
112 |             :lineno-match:
113 | 
114 |     .. group-tab:: Unittest
115 | 
116 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
117 |             :pyobject: TestMovies.test_columns
118 |             :lineno-match:
119 | 
120 | This validation requires that the set of values in ``df.columns``
121 | matches the required :py:class:`set`. The ``df.columns`` attribute is
122 | an :class:`Index <pandas.Index>` object---datatest treats this the same
123 | as any other sequence of values. 
124 | 
125 | This test is marked ``mandatory`` because it's a prerequisite that must
126 | be satisfied before any of the other tests can pass. When a mandatory
127 | test fails, the test suite stops immediately and no more tests are run.
128 | 
129 | 
130 | 3. Check 'title' values
131 | -----------------------
132 | 
133 | Check that values in the **title** column begin with an upper-case letter:
134 | 
135 | .. tabs::
136 | 
137 |     .. group-tab:: Pytest
138 | 
139 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
140 |             :pyobject: test_title
141 |             :lineno-match:
142 | 
143 |     .. group-tab:: Unittest
144 | 
145 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
146 |             :pyobject: TestMovies.test_title
147 |             :lineno-match:
148 | 
149 | This validation checks that each value in the ``df['title']`` matches
150 | the regular expression ``^[A-Z]``.
151 | 
152 | 
153 | 4. Check 'rating' values
154 | ------------------------
155 | 
156 | Check that values in the **rating** column match one of the allowed codes:
157 | 
158 | .. tabs::
159 | 
160 |     .. group-tab:: Pytest
161 | 
162 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
163 |             :pyobject: test_rating
164 |             :lineno-match:
165 | 
166 |     .. group-tab:: Unittest
167 | 
168 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
169 |             :pyobject: TestMovies.test_rating
170 |             :lineno-match:
171 | 
172 | This validation checks that the values in ``df['rating']`` are also
173 | contained in the given set.
174 | 
175 | 
176 | 5. Check 'year' and 'runtime' types
177 | -----------------------------------
178 | 
179 | Check that values in the **year** and **runtime** columns are integers:
180 | 
181 | .. tabs::
182 | 
183 |     .. group-tab:: Pytest
184 | 
185 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
186 |             :pyobject: test_year
187 |             :lineno-match:
188 | 
189 |         .. literalinclude:: /_static/tutorial/test_movies_df.py
190 |             :pyobject: test_runtime
191 |             :lineno-match:
192 | 
193 |     .. group-tab:: Unittest
194 | 
195 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
196 |             :pyobject: TestMovies.test_year
197 |             :lineno-match:
198 | 
199 |         .. literalinclude:: /_static/tutorial/test_movies_df_unit.py
200 |             :pyobject: TestMovies.test_runtime
201 |             :lineno-match:
202 | 
203 | 
204 | ================
205 | More Information
206 | ================
207 | 
208 | .. seealso::
209 | 
210 |     See the :doc:`../intro/validating-pandas` introduction docs
211 |     for more information and examples.
212 | 
213 |     See :ref:`pandas-accessor-docs` to learn about the alternate
214 |     validation syntax provided by pandas **accessor extensions**.
215 | 
216 | 


--------------------------------------------------------------------------------
/datatest/_normalize.py:
--------------------------------------------------------------------------------
  1 | """Normalize objects for validation."""
  2 | 
  3 | import sys
  4 | from ._compatibility.collections.abc import Collection
  5 | from ._compatibility.collections.abc import Iterable
  6 | from ._compatibility.collections.abc import Iterator
  7 | from ._compatibility.collections.abc import Mapping
  8 | 
  9 | from ._utils import exhaustible
 10 | from ._utils import iterpeek
 11 | from ._utils import IterItems
 12 | 
 13 | 
 14 | class TypedIterator(Iterator):
 15 |     def __init__(self, iterable, evaltype):
 16 |         self._iterator = iter(iterable)
 17 |         self.evaltype = evaltype
 18 | 
 19 |     def __iter__(self):
 20 |         return self
 21 | 
 22 |     def __next__(self):
 23 |         return next(self._iterator)
 24 | 
 25 |     def next(self):  # Python 2.x support.
 26 |         return self.__next__()
 27 | 
 28 |     def fetch(self):
 29 |         return self.evaltype(self._iterator)
 30 | 
 31 | 
 32 | NoneType = type(None)
 33 | 
 34 | 
 35 | def _normalize_lazy(obj):
 36 |     """Return an iterator for lazy evaluation."""
 37 |     if isinstance(obj, TypedIterator):
 38 |         if issubclass(obj.evaltype, Mapping):
 39 |             obj = IterItems(obj)
 40 |         return obj  # <- EXIT!
 41 | 
 42 |     # Separate Squint module.
 43 |     squint = sys.modules.get('squint', None)
 44 |     if squint:
 45 |         if isinstance(obj, squint.Query):
 46 |             obj = obj.execute()
 47 |             if issubclass(getattr(obj, 'evaltype', NoneType), Mapping):
 48 |                 obj = IterItems(obj)
 49 |             return obj  # <- EXIT!
 50 | 
 51 |         if isinstance(obj, squint.Result):
 52 |             if issubclass(obj.evaltype, Mapping):
 53 |                 obj = IterItems(obj)
 54 |             return obj  # <- EXIT!
 55 | 
 56 |     pandas = sys.modules.get('pandas', None)
 57 |     if pandas:
 58 |         if isinstance(obj, pandas.DataFrame):
 59 |             if not obj.index.is_unique:
 60 |                 msg = '{0} index contains duplicates, must be unique'
 61 |                 raise ValueError(msg.format(obj.__class__.__name__))
 62 | 
 63 |             if isinstance(obj.index, pandas.RangeIndex):
 64 |                 # DataFrame with RangeIndex is treated as an iterator.
 65 |                 if len(obj.columns) == 1:
 66 |                     obj = (x[0] for x in obj.values)
 67 |                 else:
 68 |                     obj = (tuple(x) for x in obj.values)
 69 |                 return TypedIterator(obj, evaltype=list)  # <- EXIT!
 70 |             else:
 71 |                 # DataFrame with another index type is treated as a mapping.
 72 |                 if len(obj.columns) == 1:
 73 |                     gen = ((x[0], x[1]) for x in obj.itertuples())
 74 |                 else:
 75 |                     gen = ((x[0], tuple(x[1:])) for x in obj.itertuples())
 76 |                 return IterItems(gen)  # <- EXIT!
 77 |         elif isinstance(obj, pandas.Series):
 78 |             if not obj.index.is_unique:
 79 |                 msg = '{0} index contains duplicates, must be unique'
 80 |                 raise ValueError(msg.format(obj.__class__.__name__))
 81 | 
 82 |             if isinstance(obj.index, pandas.RangeIndex):
 83 |                 # Series with RangeIndex is treated as an iterator.
 84 |                 return TypedIterator(obj.values, evaltype=list)  # <- EXIT!
 85 |             else:
 86 |                 # Series with another index type is treated as a mapping.
 87 |                 return IterItems(obj.iteritems())  # <- EXIT!
 88 | 
 89 |     numpy = sys.modules.get('numpy', None)
 90 |     if numpy and isinstance(obj, numpy.ndarray):
 91 |         # Two-dimentional array, recarray, or structured array.
 92 |         if obj.ndim == 2 or (obj.ndim == 1 and len(obj.dtype) > 1):
 93 |             obj = (tuple(x) for x in obj)
 94 |             return TypedIterator(obj, evaltype=list)  # <- EXIT!
 95 | 
 96 |         # One-dimentional array, recarray, or structured array.
 97 |         if obj.ndim == 1:
 98 |             if len(obj.dtype) == 1:        # Unpack single-valued recarray
 99 |                 obj = (x[0] for x in obj)  # or structured array.
100 |             else:
101 |                 obj = iter(obj)
102 |             return TypedIterator(obj, evaltype=list)  # <- EXIT!
103 | 
104 |     # Check for cursor-like object (if obj has DBAPI2 cursor attributes).
105 |     if all(hasattr(obj, n) for n in ('fetchone', 'execute',
106 |                                      'rowcount', 'description')):
107 |         if not isinstance(obj, Iterable):
108 |             def cursor_to_gen(cursor):       # While most cursor objects are
109 |                 while True:                  # iterable, it is not required
110 |                     row = cursor.fetchone()  # by the DBAPI2 specification.
111 |                     if row is None:
112 |                         break
113 |                     yield row
114 |             obj = cursor_to_gen(obj)
115 | 
116 |         first, obj = iterpeek(obj)
117 |         if first and len(first) == 1:
118 |             obj = iter(x[0] for x in obj)  # Unwrap single-value records.
119 |         return obj  # <- EXIT!
120 | 
121 |     return obj
122 | 
123 | 
124 | def _normalize_eager(obj, default_type=None):
125 |     """Eagerly evaluate *obj* when possible. When *obj* is exhaustible,
126 |     a *default_type* must be specified. When provided, *default_type*
127 |     must be a collection type (a sized iterable container).
128 |     """
129 |     if isinstance(obj, TypedIterator):
130 |         return obj.fetch()
131 | 
132 |     # Separate Squint module.
133 |     squint = sys.modules.get('squint', None)
134 |     if squint and isinstance(obj, squint.Result):
135 |         return obj.fetch()
136 | 
137 |     if isinstance(obj, IterItems):
138 |         return dict(obj)
139 | 
140 |     if isinstance(obj, Iterable) and exhaustible(obj):
141 |         if isinstance(default_type, type) and issubclass(default_type, Collection):
142 |             return default_type(obj)
143 |         else:
144 |             cls_name = obj.__class__.__name__
145 |             msg = ("exhaustible type '{0}' cannot be eagerly evaluated "
146 |                    "without specifying a 'default_type' collection")
147 |             raise TypeError(msg.format(cls_name))
148 | 
149 |     return obj
150 | 
151 | 
152 | def normalize(obj, lazy_evaluation=False, default_type=None):
153 |     obj = _normalize_lazy(obj)
154 |     if lazy_evaluation:
155 |         return obj
156 |     return _normalize_eager(obj, default_type)
157 | 


--------------------------------------------------------------------------------