├── .gitignore
├── LICENSE
├── README.md
├── examples
    └── examples.py
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    └── test_checks.py
└── validada
    ├── __init__.py
    ├── convenience.py
    ├── core.py
    ├── decorators
        ├── __init__.py
        ├── raising.py
        └── returning.py
    ├── functions
        ├── __init__.py
        ├── raising.py
        └── returning.py
    └── slicers.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | .spyderproject
27 | 
28 | # PyInstaller
29 | #  Usually these files are written by a python script from a template
30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 | 
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 | 
38 | # Unit test / coverage reports
39 | pytestreport/
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | .noseids
49 | 
50 | # Translations
51 | *.mo
52 | *.pot
53 | 
54 | # Django stuff:
55 | *.log
56 | 
57 | # Sphinx documentation
58 | docs/_build/
59 | 
60 | # PyBuilder
61 | target/
62 | 
63 | # Editor
64 | 
65 | *.swp
66 | 
67 | # other
68 | data/
69 | wheelhouse/
70 | *.zip
71 | *.csv
72 | *.pyc
73 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Jeffrey McLarty
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | The MIT License (MIT)
24 | 
25 | Copyright (c) 2015 Tom Augspurger
26 | 
27 | Permission is hereby granted, free of charge, to any person obtaining a copy
28 | of this software and associated documentation files (the "Software"), to deal
29 | in the Software without restriction, including without limitation the rights
30 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31 | copies of the Software, and to permit persons to whom the Software is
32 | furnished to do so, subject to the following conditions:
33 | 
34 | The above copyright notice and this permission notice shall be included in all
35 | copies or substantial portions of the Software.
36 | 
37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
43 | SOFTWARE.
44 | 
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Validada 
  2 | ========
  3 | 
  4 | (Pronounced "Valid-Data")
  5 | 
  6 | This project started as a fork of [engarde v0.0.2](https://github.com/TomAugspurger/engarde)
  7 | 
  8 | Validada differentiates from engarde under the hood, substantially, in order to implement a richer
  9 | feature set including custom-exceptions, universal slicing API, check object-return.  All, 
 10 | with a focus on code brevity.  
 11 | 
 12 | All of the basics are the same as engarde, with likely a minor hit to speed.  Although,
 13 | in many cases engarde raises on the first problem it finds.  Validada's policy is
 14 | to raise only after checking everything.
 15 | 
 16 | As of 7/7/2015,  validada passes all of the unit tests of engarde.
 17 | 
 18 | Slicing?
 19 | ========
 20 | All checks slice the dataframe internally, so users of validada never have to pass in a sliced dataframe.
 21 | Instead, users can pass in a slice-like object as an argument.  
 22 | 
 23 | How do I pass a slice?
 24 | 
 25 | ```python
 26 | from validada.slicers import iloc, loc
 27 | 
 28 | some_check(adf, iloc[-7:], iloc[:-7])
 29 | 
 30 | # or...
 31 | 
 32 | @some_check(iloc[-1], iloc[:-1])
 33 | def somefunc(adf):
 34 | 	return adf + 1.0
 35 | 
 36 | ```
 37 | 
 38 | All checks can take up to two slice-like arguments.  The first, is the slice which will be checked. 
 39 | The second, is a slice for calculating constants to use during the check.  Both are optional.
 40 | So, say you have a dataframe coming from a source of data, with known "good" data 
 41 | (for instance, before last week), and want to check that the data for just this week is within
 42 | two standard deviations of the data, excluding the latest week of data, you would pass in 
 43 | ```iloc[-7:]``` and ```iloc[:-7]``` as arguments to the check.
 44 | 
 45 | ```
 46 | #To use the same functionality of engarde, one would use...
 47 | from validada.functions.raising import none_missing, is_shape, unique_index
 48 | #or
 49 | from validada.decorators.raising import none_missing, is_shape, unique_index
 50 | ```
 51 | 
 52 | ```
 53 | #But with validada you get more out of the box...
 54 | from validada.functions.returning import none_missing, is_shape, unique_index
 55 | #or
 56 | from validada.decorators.returning import none_missing, is_shape, unique_index
 57 | ```
 58 | 
 59 | Custom Return-Objects?
 60 | ======================
 61 | Depending on the check, there might be some useful information to pass back out, or maybe you
 62 | want to perform a bunch of checks and just collect the boolean results for each?
 63 | 
 64 | ``` python
 65 | from validada.core import ReturnSet
 66 | 
 67 | rs = ReturnSet(('bool', 'obj'))
 68 | none_missing = rs.none_missing
 69 | 
 70 | print "Since we specified 'bool' and 'obj', in that order:"
 71 | a_bool, an_obj = none_missing(adf, ix['2013':], columns='one')
 72 | #a_bool, is the result of the check
 73 | print a_bool
 74 | #an_obj, is a none_missing specific object, it's a way to 
 75 | #get other information out of the check.
 76 | print an_obj
 77 | ```
 78 | 
 79 | 
 80 | Custom Exceptions?
 81 | ==================
 82 | To use the advance features instantiate your own ```CheckSet``` (or child of, eg. ```RaiseSet```,```ReturnSet```) via...
 83 | 
 84 | ``` python
 85 | from validada.core import RaiseSet
 86 | rs = RaiseSet(IOError, "IO error makes no sense, but why not?")
 87 | none_missing = rs.none_missing
 88 | 
 89 | #ready...
 90 | none_missing(adf, ix['2013':])
 91 | 
 92 | #or make a decorator
 93 | none_missing = rs.decorator_maker('none_missing')
 94 | ```
 95 | 
 96 | Dependencies
 97 | ============
 98 | 
 99 | - Pandas
100 | 
101 | 
102 | Supports python 2.7+  and Python 3.6
103 | 
104 | 
105 | Overall Design
106 | ==============
107 | 
108 | Every check has a return-function and raise-function created all sharing a common signature.
109 | These two functions are used to create one staticfunction, for every check, of the CheckSet.
110 | A CheckSet object stores custom-exception, custom-object return, and default slicing settings.
111 | A CheckSet object has a generic way to turn any check, into a decorator using one line.  
112 | An instance of RaiseSet and ReturnSet is used to declare function.*.checks and decorators.*.checks.
113 | 
114 | See Also
115 | ========
116 | 
117 | [assertr](https://github.com/tonyfischetti/assertr)
118 | [engarde](https://github.com/TomAugspurger/engarde)
119 | 
120 | 


--------------------------------------------------------------------------------
/examples/examples.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from validada.functions.raising import none_missing
  3 | from validada.slicers import iloc, loc
  4 | 
  5 | import pandas as pd
  6 | 
  7 | # Create some data...
  8 | ind = pd.date_range('2010', '2015', freq='A')
  9 | adf = pd.DataFrame({'one' : list(range(5)), 'two' : [ i ** 2 for i in range(5)]}, index=ind)
 10 | adf['two'].iloc[4]=pd.np.NaN
 11 | 
 12 | """
 13 |             one  two
 14 | 2010-12-31    0    0
 15 | 2011-12-31    1    1
 16 | 2012-12-31    2    4
 17 | 2013-12-31    3    9
 18 | 2014-12-31    4  NaN
 19 | """
 20 | 
 21 | # Basic call...
 22 | try:
 23 |     none_missing(adf)
 24 | except AssertionError:
 25 |     print("Some values are missing!")
 26 | 
 27 | # Using arguments explicitly
 28 | try:
 29 |     none_missing(adf, columns='one')
 30 |     print("No problem here!")
 31 | except:
 32 |     pass
 33 | 
 34 | # Or implicitly using arguments...
 35 | try:
 36 |     none_missing(adf, 'two')
 37 |     print("Shouldn't see this!")
 38 | except:
 39 |     print("There's a problem, in the second column")
 40 | 
 41 | 
 42 | try:
 43 |     none_missing(adf, iloc[-2:])
 44 | except AssertionError:
 45 |     print("Some values are missing in the last two rows")
 46 | 
 47 | try:
 48 |     # iloc stores the :-2 slice, so this works until iloc is changed
 49 |     none_missing(adf, iloc)
 50 | except AssertionError:
 51 |     print("Some values are still missing in the last two rows")
 52 | 
 53 | 
 54 | try:
 55 |     none_missing(adf, ix[:'2013'])
 56 |     print("There are no problems looking at data before 2013")
 57 | except:
 58 |     pass
 59 | 
 60 | # Did you notice the type detection between passing 'two', and the slicers?
 61 | # Look ma, args only - kwargs not mandatory!
 62 | 
 63 | # Now for some real fun...
 64 | 
 65 | from  validada.core import RaiseSet
 66 | 
 67 | rs = RaiseSet(IOError, "IO error makes no sense, but why not?")
 68 | none_missing = rs.none_missing
 69 | 
 70 | try:
 71 |     none_missing(adf, loc['2013':])
 72 | except IOError as e:
 73 |     print(str(e))
 74 | 
 75 | #This is only needed, since the user has added a custom exception message...
 76 | none_missing = rs.decorator_maker('none_missing')
 77 | 
 78 | @none_missing(loc['2013':])
 79 | def somefunc(anydf):
 80 |     soln = anydf + 1.0
 81 |     return soln
 82 | 
 83 | try:
 84 |     somefunc(adf)
 85 | except IOError as e:
 86 |     print("Second time the charm?")
 87 |     print(str(e))
 88 | 
 89 | from validada.core import ReturnSet
 90 | 
 91 | rs = ReturnSet(('bool', 'obj'))
 92 | none_missing = rs.none_missing
 93 | 
 94 | print("Since we specified 'bool' and 'obj':")
 95 | a_bool, an_obj = none_missing(adf, loc['2013':], columns='one')
 96 | #a_bool, is the result of the check
 97 | print(a_bool)
 98 | #an_obj, is a none_missing specific object, it's a way to get other information out of the check.
 99 | print(an_obj)
100 | 
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal=1
3 | 
4 | [metadata]
5 | description-file = README.md
6 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | # To use a consistent encoding
 3 | from os import path
 4 | 
 5 | here = path.abspath(path.dirname(__file__))
 6 | 
 7 | setup(
 8 |     name='validada',
 9 | 
10 |     version='0.0.2',
11 | 
12 |     description='A python package for defensive data analysis.',
13 |     long_description='A python package for defensive data analysis.',
14 | 
15 |     url='https://github.com/jnmclarty/validada',
16 | 
17 |     # Author details
18 |     author='Jeffrey McLarty',
19 |     author_email='jeffrey.mclarty@gmail.com',
20 | 
21 |     # Choose your license
22 |     license='MIT',
23 | 
24 |     classifiers=[
25 |         'Development Status :: 3 - Alpha',
26 |         'Intended Audience :: Developers',
27 |         'License :: OSI Approved :: MIT License',
28 |         'Programming Language :: Python :: 2.7',
29 |         'Programming Language :: Python :: 3.6',
30 |     ],
31 | 
32 |     keywords='data analysis','engarde', 'validata', 'valid data', 'validada',
33 |     packages=find_packages(exclude=['tests']),
34 |     # install_requires=['numpy', 'pandas'],
35 | 
36 |     # List additional groups of dependencies here (e.g. development
37 |     # dependencies). You can install these using the following syntax,
38 |     # for example:
39 |     # $ pip install -e .[dev,test]
40 |     extras_require={
41 |         'dev': [''],
42 |         'test': ['coverage', 'pytest'],
43 |     },
44 | 
45 | )
46 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jnmclarty/validada/da806ee649d67e2ac36ad4f22493003622493ee5/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_checks.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import pytest
  3 | import numpy as np
  4 | import pandas as pd
  5 | import pandas.util.testing as tm
  6 | 
  7 | from validada.slicers import iloc
  8 | import validada.functions.raising as ck
  9 | import validada.decorators.raising as dc
 10 | 
 11 | import datetime as dt
 12 | 
 13 | def _add_one(df):
 14 |     return df + 1
 15 | 
 16 | def _safe_add_one(df):
 17 |     return df.fillna(0.0) + 1
 18 |     
 19 | def _noop(df):
 20 |     return df
 21 | 
 22 | def test_is_in_index():
 23 |     dr = pd.date_range(start='2015-01-01', periods=6, freq='D')
 24 |     df = pd.DataFrame(data = list(range(6)), index=dr)
 25 |     
 26 |     d = dt.date(2015,1,3)
 27 | 
 28 |     result = ck.has_in_index(df, obj=d)
 29 |     tm.assert_frame_equal(df, result)
 30 | 
 31 |     result = dc.has_in_index(obj=d)(_add_one)(df)
 32 |     tm.assert_frame_equal(result, df + 1)
 33 | 
 34 |     result = ck.has_in_index(df, obj=d, try_ix=True)
 35 | 
 36 |     result = ck.has_in_index(df, obj=d, try_ix=True, try_strftime="%Y-%m")
 37 | 
 38 |     result = ck.has_in_index(df, obj=d, check_na=True)
 39 |     
 40 | def test_is_in_index_raises():
 41 |     dr = pd.date_range(start='2015-01-01', periods=6, freq='D')
 42 |     da = list(range(6))
 43 |     da[2] = pd.np.nan
 44 |     df = pd.DataFrame(data = da, index=dr)
 45 |     
 46 |     d = dt.date(2015,1,12)
 47 |     
 48 |     with pytest.raises(AssertionError):
 49 |         ck.has_in_index(df, obj=d)
 50 | 
 51 |     with pytest.raises(AssertionError):
 52 |         dc.has_in_index(obj=d)(_add_one)(df)
 53 | 
 54 |     with pytest.raises(AssertionError):
 55 |         ck.has_in_index(df, obj=d, try_ix=True)
 56 | 
 57 |     ck.has_in_index(df, obj=d, try_ix=True, try_strftime="%Y-%m")
 58 |     
 59 |     d = dt.datetime(2015,1,3)
 60 |     ck.has_in_index(df, obj=d)
 61 |     ck.has_in_index(df, obj=d, check_na=False)
 62 |     
 63 |     with pytest.raises(AssertionError):
 64 |         ck.has_in_index(df, obj=d, check_na=True)
 65 |         
 66 | def test_equal_columns_sum():
 67 |     df = pd.DataFrame({'A': [1,2,3,4,5], 'B': [1,2,3,4,5]})
 68 |     
 69 |     result = ck.equal_columns_sum(df)
 70 |     tm.assert_frame_equal(df, result)
 71 | 
 72 |     result = dc.equal_columns_sum()(_add_one)(df)
 73 |     tm.assert_frame_equal(result, df + 1)
 74 |     
 75 | def test_equal_columns_sum_raises_slice():
 76 |     df = pd.DataFrame({'A': [None,2,3,4,0], 'B': [1,2,3,4,None]})
 77 |     
 78 |     with pytest.raises(AssertionError):
 79 |         ck.equal_columns_sum(df)
 80 |     with pytest.raises(AssertionError):
 81 |         dc.equal_columns_sum()(_add_one)(df)
 82 | 
 83 |     s = iloc[-3:]
 84 |     result = ck.equal_columns_sum(df, s)
 85 |     tm.assert_frame_equal(df, result)
 86 | 
 87 |     result = dc.equal_columns_sum(s)(_safe_add_one)(df)
 88 |     tm.assert_frame_equal(result, _safe_add_one(df))
 89 |     
 90 | def test_none_missing():
 91 |     df = pd.DataFrame(np.random.randn(5, 3))
 92 |     result = ck.none_missing(df)
 93 |     tm.assert_frame_equal(df, result)
 94 | 
 95 |     result = dc.none_missing()(_add_one)(df)
 96 |     tm.assert_frame_equal(result, df + 1)
 97 | 
 98 | def test_none_missing_raises():
 99 |     df = pd.DataFrame(np.random.randn(5, 3))
100 |     df.iloc[0, 0] = np.nan
101 |     with pytest.raises(AssertionError):
102 |         ck.none_missing(df)
103 | 
104 |     with pytest.raises(AssertionError):
105 |         dc.none_missing()(_add_one)(df)
106 | 
107 | def test_monotonic_increasing_lax():
108 |     df = pd.DataFrame([1, 2, 2])
109 |     tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=True))
110 |     result = dc.is_monotonic(increasing=True)(_add_one)(df)
111 |     tm.assert_frame_equal(result, df + 1)
112 | 
113 |     df = pd.DataFrame([1, 2, 1])
114 |     with pytest.raises(AssertionError):
115 |         ck.is_monotonic(df, increasing=True)
116 |     with pytest.raises(AssertionError):
117 |         dc.is_monotonic(increasing=True)(_add_one)(df)
118 | 
119 |     df = pd.DataFrame([3, 2, 1])
120 |     with pytest.raises(AssertionError):
121 |         ck.is_monotonic(df, increasing=True)
122 |     with pytest.raises(AssertionError):
123 |         dc.is_monotonic(increasing=True)(_add_one)(df)
124 | 
125 | def test_monotonic_increasing_strict():
126 |     df = pd.DataFrame([1, 2, 3])
127 |     tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=True, strict=True))
128 |     result = dc.is_monotonic(increasing=True, strict=True)(_add_one)(df)
129 |     tm.assert_frame_equal(result, df + 1)
130 | 
131 |     df = pd.DataFrame([1, 2, 2])
132 |     with pytest.raises(AssertionError):
133 |         ck.is_monotonic(df, increasing=True, strict=True)
134 |     with pytest.raises(AssertionError):
135 |         dc.is_monotonic(increasing=True, strict=True)(_add_one)(df)
136 | 
137 |     df = pd.DataFrame([3, 2, 1])
138 |     with pytest.raises(AssertionError):
139 |         ck.is_monotonic(df, increasing=True, strict=True)
140 |     with pytest.raises(AssertionError):
141 |         dc.is_monotonic(increasing=True, strict=True)(_add_one)(df)
142 | 
143 | def test_monotonic_decreasing():
144 |     df = pd.DataFrame([2, 2, 1])
145 |     tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=False))
146 |     result = dc.is_monotonic(increasing=False)(_add_one)(df)
147 |     tm.assert_frame_equal(result, df + 1)
148 | 
149 |     df = pd.DataFrame([1, 2, 1])
150 |     with pytest.raises(AssertionError):
151 |         ck.is_monotonic(df, increasing=False)
152 |     with pytest.raises(AssertionError):
153 |         dc.is_monotonic(increasing=False)(_add_one)(df)
154 | 
155 |     df = pd.DataFrame([1, 2, 3])
156 |     with pytest.raises(AssertionError):
157 |         ck.is_monotonic(df, increasing=False)
158 |     with pytest.raises(AssertionError):
159 |         dc.is_monotonic(increasing=False)(_add_one)(df)
160 | 
161 | def test_monotonic_decreasing_strict():
162 |     df = pd.DataFrame([3, 2, 1])
163 |     tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=False,
164 |                                               strict=True))
165 |     result = dc.is_monotonic(increasing=False, strict=True)(_add_one)(df)
166 |     tm.assert_frame_equal(result, df + 1)
167 | 
168 |     df = pd.DataFrame([2, 2, 1])
169 |     with pytest.raises(AssertionError):
170 |         ck.is_monotonic(df, increasing=False, strict=True)
171 |     with pytest.raises(AssertionError):
172 |         dc.is_monotonic(increasing=False, strict=True)(_add_one)(df)
173 | 
174 |     df = pd.DataFrame([1, 2, 3])
175 |     with pytest.raises(AssertionError):
176 |         ck.is_monotonic(df, increasing=False, strict=True)
177 |     with pytest.raises(AssertionError):
178 |         dc.is_monotonic(increasing=False, strict=True)(_add_one)(df)
179 | 
180 | def test_monotonic_either():
181 |     df = pd.DataFrame({'A': [1, 2, 2], 'B': [3, 2, 2]})
182 |     tm.assert_frame_equal(df, ck.is_monotonic(df))
183 |     result = dc.is_monotonic()(_add_one)(df)
184 |     tm.assert_frame_equal(result, df + 1)
185 | 
186 |     df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 2, 1]})
187 |     with pytest.raises(AssertionError):
188 |         ck.is_monotonic(df)
189 |     with pytest.raises(AssertionError):
190 |         dc.is_monotonic()(_add_one)(df)
191 | 
192 | def test_monotonic_either_stict():
193 |     df = pd.DataFrame({'A': [1, 2, 3], 'B': [3, 2, 1]})
194 |     tm.assert_frame_equal(df, ck.is_monotonic(df, strict=True))
195 |     result = dc.is_monotonic(strict=True)(_add_one)(df)
196 |     tm.assert_frame_equal(result, df + 1)
197 | 
198 |     df = pd.DataFrame({'A': [1, 2, 2], 'B': [3, 2, 2]})
199 |     with pytest.raises(AssertionError):
200 |         ck.is_monotonic(df, strict=True)
201 |     with pytest.raises(AssertionError):
202 |         dc.is_monotonic(strict=True)(_add_one)(df)
203 | 
204 | def test_monotonic_items():
205 |     df = pd.DataFrame({'A': [1, 2, 3], 'B': [3, 2, 3]})
206 |     tm.assert_frame_equal(df, ck.is_monotonic(df, items={'A': (True, True)}))
207 |     tm.assert_frame_equal(dc.is_monotonic(items={'A': (True, True)}, strict=True)(_add_one)(
208 |         df), df + 1)
209 | 
210 | def test_is_shape():
211 |     shape = 10, 2
212 |     df = pd.DataFrame(np.random.randn(*shape))
213 |     tm.assert_frame_equal(df, ck.is_shape(df, shape))
214 |     result = dc.is_shape(shape=shape)(_add_one)(df)
215 |     tm.assert_frame_equal(result, df + 1)
216 | 
217 |     with pytest.raises(AssertionError):
218 |         ck.is_shape(df, (9, 2))
219 |     with pytest.raises(AssertionError):
220 |         dc.is_shape((9, 2))(_add_one)(df)
221 | 
222 | def test_unique_index():
223 |     df = pd.DataFrame([1, 2, 3], index=['a', 'b', 'c'])
224 |     tm.assert_frame_equal(df, ck.unique_index(df))
225 |     result = dc.unique_index()(_add_one)(df)
226 |     tm.assert_frame_equal(result, df + 1)
227 | 
228 |     with pytest.raises(AssertionError):
229 |         ck.unique_index(df.reindex(['a', 'a', 'b']))
230 |     with pytest.raises(AssertionError):
231 |         dc.unique_index()(_add_one)(df.reindex(['a', 'a', 'b']))
232 | 
233 | def test_within_set():
234 |     df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
235 |     items = {'A': [1, 2, 3], 'B': ['a', 'b', 'c']}
236 |     tm.assert_frame_equal(df, ck.within_set(df, items))
237 |     tm.assert_frame_equal(df, dc.within_set(items=items)(_noop)(df))
238 | 
239 |     items.pop('A')
240 |     tm.assert_frame_equal(df, ck.within_set(df, items))
241 |     tm.assert_frame_equal(df, dc.within_set(items=items)(_noop)(df))
242 | 
243 |     items['A'] = [1, 2]
244 |     with pytest.raises(AssertionError):
245 |         ck.within_set(df, items)
246 |     with pytest.raises(AssertionError):
247 |         dc.within_set(items=items)(_noop)(df)
248 | 
249 | def test_within_range():
250 |     df = pd.DataFrame({'A': [-1, 0, 1]})
251 |     items = {'A': (-1, 1)}
252 |     tm.assert_frame_equal(df, ck.within_range(df, items))
253 |     tm.assert_frame_equal(df, dc.within_range(items)(_noop)(df))
254 | 
255 |     items['A'] = (0, 1)
256 |     with pytest.raises(AssertionError):
257 |         ck.within_range(df, items)
258 |     with pytest.raises(AssertionError):
259 |         dc.within_range(items)(_noop)(df)
260 | 
261 | def test_within_n_std():
262 |     df = pd.DataFrame({'A': np.arange(10)})
263 |     tm.assert_frame_equal(df, ck.within_n_std(df))
264 |     tm.assert_frame_equal(df, dc.within_n_std()(_noop)(df))
265 | 
266 |     with pytest.raises(AssertionError):
267 |         ck.within_n_std(df, .5)
268 |     with pytest.raises(AssertionError):
269 |         dc.within_n_std(.5)(_noop)(df)
270 | 
271 | def test_has_dtypes():
272 |     df = pd.DataFrame({'A': np.random.randint(0, 10, 10),
273 |                        'B': np.random.randn(10),
274 |                        'C': list('abcdefghij'),
275 |                        'D': pd.Categorical(np.random.choice(['a', 'b'], 10))})
276 |     dtypes = {'A': int, 'B': 'float', 'C': object, 'D': 'category'}
277 |     tm.assert_frame_equal(df, ck.has_dtypes(df, dtypes))
278 |     tm.assert_frame_equal(df, dc.has_dtypes(items=dtypes)(_noop)(df))
279 | 
280 |     with pytest.raises(AssertionError):
281 |         ck.has_dtypes(df, {'A': float})
282 | 
283 |     with pytest.raises(AssertionError):
284 |         dc.has_dtypes(items={'A': bool})(_noop)(df)
285 | 
286 | 


--------------------------------------------------------------------------------
/validada/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/validada/convenience.py:
--------------------------------------------------------------------------------
 1 | from .slicers import SliceStore
 2 | 
 3 | def _pull_out_ret(kwargs, dforig):
 4 |     try:
 5 |         _ret = kwargs['_ret']
 6 |     except KeyError:
 7 |         raise KeyError("_ret must be defined")
 8 |     
 9 |     if not isinstance(_ret, (list, tuple)):
10 |         _ret = (_ret,)
11 |     ret_specd = {'orig' : dforig, 'bool' : None, 'ndframe' : None, 'obj' : None}
12 |     return _ret, ret_specd
13 | 
14 | def _pull_out_raize_kwargs(kwargs):
15 |     try:
16 |         _raize = kwargs['_raize']
17 |         _raize_msg = kwargs['_raize_msg']
18 |     except KeyError:
19 |         raise KeyError("_raize and _raize_msg must be defined")
20 |         
21 |     kwargs = {key : value for key, value in kwargs.items() if key not in ('_raize', '_raize_kwargs')}
22 |     
23 |     return _raize, _raize_msg, kwargs
24 | 
25 | 
26 | def _read_arg_or_kwarg(args, pos, kwargs, arg, ifnone=None):
27 |     if arg in kwargs:
28 |         val = kwargs[arg]
29 |         if val is None:
30 |             val = ifnone
31 |     elif len(args) >= (pos + 1):
32 |         val = args[pos]
33 |     else:
34 |         val = ifnone
35 |     return val
36 | 
37 | def _read_required_arg_or_kwarg(args, pos, kwargs, arg):
38 |     if arg in kwargs:
39 |         val = kwargs[arg]
40 |     elif len(args) >= (pos + 1):
41 |         val = args[pos]
42 |     else:
43 |         raise Exception("Keyword argument '{}' must be defined in function".format(arg))
44 |     return val
45 |     
46 | def _ret_proper_objects(_ret, ret_specd):
47 |     ret = [ret_specd[t] for t in _ret]
48 |     
49 |     if len(ret) == 1:
50 |         return ret[0]
51 |     else:
52 |         return tuple(ret)
53 | 
54 | def _make_generic_raizer(returner):
55 |     def raizer(dforig, dfcheck, dfderive, *args, **kwargs):
56 |         
57 |         _raize, _raize_msg, kwargs = _pull_out_raize_kwargs(kwargs)
58 |         
59 |         _ret = ('bool',)
60 |         result = returner(dforig, dfcheck, dfderive, _ret=_ret, *args, **kwargs)
61 |         
62 |         if not result:
63 |             return dforig
64 |         else:
65 |             raise _raize(_raize_msg)
66 |     return raizer
67 | 
68 | def _generic_check_maker(returner, raizer):
69 |     def check(self, df, *args, **kwargs):
70 |         
71 |         slc, args = _lop_off_head_if_slice(args, self.check_slc)
72 |         slcd, args = _lop_off_head_if_slice(args, self.derive_slc)    
73 |         
74 |         dfc = getattr(df, slc.mode)[slc.slc]
75 |         dfd = getattr(df, slcd.mode)[slcd.slc]
76 |         
77 |         if self.raize is not None:
78 |             result = raizer(df, dfc, dfd, 
79 |                                    *args, _raize=self.raize, 
80 |                                    _raize_msg=self.raize_msg, **kwargs)
81 |         elif self.ret is not None:
82 |             result = returner(df, dfc, dfd, *args, _ret=self.ret, **kwargs)
83 |         else:
84 |             raise Exception("Can't read your mind")
85 |         return result
86 |     return check
87 |    
88 | def _lop_off_head_if_slice(args, otherwise):
89 |     if len(args) >= 1:
90 |         if isinstance(args[0], (slice, SliceStore)):
91 |             if len(args) >= 2:
92 |                 return args[0], args[1:]
93 |             else:
94 |                 return args[0], []
95 |         else:
96 |             return otherwise, args
97 |     return otherwise, []


--------------------------------------------------------------------------------
/validada/core.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pandas as pd
  3 | 
  4 | import datetime as dt
  5 | 
  6 | 
  7 | from . import slicers
  8 | from functools import wraps
  9 | from copy import copy
 10 | from collections import Counter
 11 | 
 12 | from .convenience import _pull_out_ret, _pull_out_raize_kwargs, \
 13 |                         _read_arg_or_kwarg, _read_required_arg_or_kwarg, \
 14 |                         _ret_proper_objects, _make_generic_raizer, \
 15 |                         _generic_check_maker, _lop_off_head_if_slice
 16 | 
 17 | def _has_in_index_ret(dforig, dfcheck, dfderive, *args, **kwargs):
 18 |     
 19 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
 20 | 
 21 |     orig_obj_to_check = _read_required_arg_or_kwarg(args, 0, kwargs, 'obj')
 22 |     obj_to_check = orig_obj_to_check
 23 | 
 24 |     try_ix = _read_arg_or_kwarg(args, 1, kwargs, 'try_ix', False)
 25 |     try_strftime = _read_arg_or_kwarg(args, 2, kwargs, 'try_strftime', False)
 26 |     check_na = _read_arg_or_kwarg(args, 3, kwargs, 'check_na', False)
 27 |     
 28 |     
 29 |     
 30 |     ans = obj_to_check in dfcheck.index
 31 |     
 32 |     if try_strftime:
 33 |         try:
 34 |             obj_to_check = obj_to_check.strftime(try_strftime)
 35 |         except:
 36 |             pass
 37 |     
 38 |     # at this point, if it's in the index, ans will be True
 39 |         
 40 |     if try_ix:
 41 |         try:
 42 |             ans = ans or (len(dfcheck.loc[obj_to_check]) > 0)
 43 |         except:
 44 |             pass
 45 | 
 46 |     # at this point, if it's in the index, or if it's string representation
 47 |     # is in the index, ans will be True
 48 | 
 49 |     if check_na:
 50 |         try:
 51 |             isna = dfcheck.loc[obj_to_check].isnull()[0]
 52 |         except:
 53 |             isna = True
 54 |         ans = not ((not ans) or isna)
 55 |         
 56 |     results = {orig_obj_to_check : ans}
 57 |     ret_specd['obj'] = results
 58 |     ret_specd['ndframe'] = pd.Series(results)
 59 |     ret_specd['bool'] = not ans
 60 |     
 61 |     return _ret_proper_objects(_ret, ret_specd)
 62 | _has_in_index_raize = _make_generic_raizer(_has_in_index_ret)
 63 | 
 64 | def _equal_columns_sum_ret(dforig, dfcheck, dfderive, *args, **kwargs):
 65 |     
 66 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
 67 | 
 68 |     cola = _read_arg_or_kwarg(args, 0, kwargs, 'cola', dfcheck.columns[0])
 69 |     colb = _read_arg_or_kwarg(args, 1, kwargs, 'colb', dfcheck.columns[1])
 70 |     
 71 |     results = {cola : dfcheck[cola].dropna().sum(),
 72 |                colb : dfcheck[colb].dropna().sum()}
 73 | 
 74 |     ret_specd['obj'] = results
 75 |     ret_specd['ndframe'] = pd.Series(results)
 76 |     ret_specd['bool'] = not results[cola] == results[colb]
 77 |     
 78 |     return _ret_proper_objects(_ret, ret_specd)
 79 | _equal_columns_sum_raize = _make_generic_raizer(_equal_columns_sum_ret)
 80 | 
 81 | def _none_missing_ret(dforig, dfcheck, dfderive, *args, **kwargs):
 82 |     
 83 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
 84 | 
 85 |     columns = _read_arg_or_kwarg(args, 0, kwargs, 'columns', dfcheck.columns)
 86 |     
 87 |     ret_specd['obj'] = dfcheck[columns].isnull()
 88 |     ret_specd['ndframe'] = ret_specd['obj'].any()
 89 |     ret_specd['bool'] = ret_specd['ndframe'].any()
 90 |     
 91 |     return _ret_proper_objects(_ret, ret_specd)
 92 | _none_missing_raize = _make_generic_raizer(_none_missing_ret)
 93 | 
 94 | def _is_shape_ret(dforig, dfcheck, dfderive, *args, **kwargs):
 95 |     
 96 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
 97 | 
 98 |     shape = _read_required_arg_or_kwarg(args, 0, kwargs, 'shape')
 99 |     
100 |     ret_specd['obj'] = "is_shape has no output object"
101 |     ret_specd['ndframe'] = "is_shape has no output ndframe"
102 |     ret_specd['bool'] = not dfcheck.shape == shape
103 |     
104 |     return _ret_proper_objects(_ret, ret_specd)   
105 | _is_shape_raize = _make_generic_raizer(_is_shape_ret)
106 | 
107 | def _unique_index_ret(dforig, dfcheck, dfderive, *args, **kwargs):
108 |     
109 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
110 | 
111 |     ret_specd['obj'] = Counter(list(dfcheck.index))
112 |     ret_specd['ndframe'] = pd.Series(ret_specd['obj'])
113 |     ret_specd['bool'] = not dfcheck.index.is_unique
114 |     
115 |     return _ret_proper_objects(_ret, ret_specd)   
116 | _unique_index_raize = _make_generic_raizer(_unique_index_ret)
117 | 
118 | def _is_monotonic_ret(dforig, dfcheck, dfderive, *args, **kwargs):
119 | 
120 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
121 |     
122 |     increasing = _read_arg_or_kwarg(args, 0, kwargs, 'increasing', None)
123 |     strict = _read_arg_or_kwarg(args, 1, kwargs, 'strict', False)
124 |     items = _read_arg_or_kwarg(args, 2, kwargs, 'items', {k: (increasing, strict) for k in dfcheck})
125 |     
126 |     results = {}
127 |     for col, (increasing, strict) in list(items.items()):
128 |         s = pd.Index(dfcheck[col])
129 |         if increasing:
130 |             good = getattr(s, 'is_monotonic_increasing')
131 |         elif increasing is None:
132 |             good = getattr(s, 'is_monotonic') | getattr(s, 'is_monotonic_decreasing')
133 |         else:
134 |             good = getattr(s, 'is_monotonic_decreasing')
135 |         if strict:
136 |             if increasing:
137 |                 good = good & (s.to_series().diff().dropna() > 0).all()
138 |             elif increasing is None:
139 |                 good = good & ((s.to_series().diff().dropna() > 0).all() |
140 |                                (s.to_series().diff().dropna() < 0).all())
141 |             else:
142 |                 good = good & (s.to_series().diff().dropna() < 0).all()
143 |         results[col] = not good
144 |             
145 |     ret_specd['obj'] = results
146 |     ret_specd['ndframe'] = pd.Series(results)
147 |     ret_specd['bool'] = any(list(results.values()))
148 |     
149 |     return _ret_proper_objects(_ret, ret_specd)
150 | _is_monotonic_raize = _make_generic_raizer(_is_monotonic_ret)
151 | 
152 | def _within_set_ret(dforig, dfcheck, dfderive, *args, **kwargs):
153 | 
154 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
155 |     
156 |     items = _read_required_arg_or_kwarg(args, 0, kwargs, 'items')
157 |     
158 |     results = {}
159 |     for k, v in list(items.items()):
160 |         results[k] = not dfcheck[k].isin(v).all()
161 |             
162 |     ret_specd['obj'] = results
163 |     ret_specd['ndframe'] = pd.Series(results)
164 |     ret_specd['bool'] = any(list(results.values()))
165 |     
166 |     return _ret_proper_objects(_ret, ret_specd)
167 | _within_set_raize = _make_generic_raizer(_within_set_ret)
168 | 
169 | def _within_range_ret(dforig, dfcheck, dfderive, *args, **kwargs):
170 | 
171 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
172 |     
173 |     items = _read_required_arg_or_kwarg(args, 0, kwargs, 'items')
174 |     
175 |     results = {}
176 |     for k, (lower, upper) in list(items.items()):
177 |         results[k] = (lower > dfcheck[k]).any() or (upper < dfcheck[k]).any()
178 | 
179 |     ret_specd['obj'] = results
180 |     ret_specd['ndframe'] = pd.Series(results)
181 |     ret_specd['bool'] = any(list(results.values()))
182 |     
183 |     return _ret_proper_objects(_ret, ret_specd)
184 | _within_range_raize = _make_generic_raizer(_within_range_ret)
185 | 
186 | def _within_n_std_ret(dforig, dfcheck, dfderive, *args, **kwargs):
187 | 
188 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
189 |     
190 |     n = _read_arg_or_kwarg(args, 0, kwargs, 'n', 3)
191 | 
192 |     means = dfderive.mean()
193 |     stds = dfderive.std()
194 |     
195 |     results = (pd.np.abs(dfcheck - means) < n * stds)
196 |     
197 |     ret_specd['obj'] = results
198 |     ret_specd['ndframe'] = results
199 |     ret_specd['bool'] = not results.all().all()
200 |     
201 |     return _ret_proper_objects(_ret, ret_specd)
202 | _within_n_std_raize = _make_generic_raizer(_within_n_std_ret)
203 | 
204 | 
205 | def _has_dtypes_ret(dforig, dfcheck, dfderive, *args, **kwargs):
206 | 
207 |     _ret, ret_specd = _pull_out_ret(kwargs, dforig)
208 |     
209 |     items = _read_required_arg_or_kwarg(args, 0, kwargs, 'items')
210 |     
211 |     results = {}    
212 |     dtypes = dfcheck.dtypes
213 |     for k, v in list(items.items()):
214 |         results[k] = not dtypes[k] == v
215 | 
216 |     ret_specd['obj'] = results
217 |     ret_specd['ndframe'] = pd.Series(results)
218 |     ret_specd['bool'] = any(list(results.values()))
219 |     return _ret_proper_objects(_ret, ret_specd)
220 |     
221 | _has_dtypes_raize = _make_generic_raizer(_has_dtypes_ret)
222 |     
223 | class CheckSet(object):
224 |     def __init__(self, ret=None, raize=None, msg=""):
225 |         
226 |         self.check_slc = copy(slicers.loc)
227 |         self.derive_slc = copy(slicers.loc)
228 |         
229 |         self.ret = ret or ('ndframe', 'bool', 'obj')
230 |         self.raize = raize or AssertionError
231 |         self.raize_msg = msg
232 | 
233 |     none_missing = _generic_check_maker(_none_missing_ret, _none_missing_raize)
234 |     none_missing.__doc__ = """
235 |         Asserts that there are no missing values (NaNs) in the DataFrame.
236 |         Parameters
237 |         ==========
238 |         df : Series or DataFrame    
239 |         columns : list of column names
240 |         """
241 |     
242 |     is_monotonic = _generic_check_maker(_is_monotonic_ret, _is_monotonic_raize)
243 |     is_monotonic.__doc__ =     """
244 |         Asserts that the DataFrame is monotonic
245 |     
246 |         Parameters
247 |         ==========
248 |         df : Series or DataFrame
249 |         items : dict
250 |             mapping columns to conditions (increasing, strict)
251 |         increasing : None or bool
252 |             None is either increasing or decreasing.
253 |         strict: whether the comparison should be strict
254 |         """
255 | 
256 |     is_shape = _generic_check_maker(_is_shape_ret, _is_shape_raize)
257 |     is_shape.__doc__ = """
258 |     Asserts that the DataFrame is of a known shape.
259 | 
260 |     Parameters
261 |     ==========
262 | 
263 |     df: DataFrame
264 |     shape : tuple (n_rows, n_columns)
265 |     """
266 |     
267 |     unique_index = _generic_check_maker(_unique_index_ret, _unique_index_raize)
268 |     unique_index.__doc__ = """Assert that the index is unique"""
269 |     
270 |     within_set = _generic_check_maker(_within_set_ret, _within_set_raize)
271 |     within_set.__doc__ = """
272 |     Assert that df is a subset of items
273 | 
274 |     Parameters
275 |     ==========
276 | 
277 |     df : DataFrame
278 |     items : dict
279 |         mapping of columns (k) to array-like of values (v) that
280 |         ``df[k]`` is expected to be a subset of
281 |     """
282 |     
283 |     within_range = _generic_check_maker(_within_range_ret, _within_range_raize)
284 |     within_range.__doc__ = """
285 |     Assert that a DataFrame is within a range.
286 | 
287 |     Parameters
288 |     ==========
289 |     df : DataFame
290 |     items : dict
291 |         mapping of columns (k) to a (low, high) tuple (v)
292 |         that ``df[k]`` is expected to be between.
293 |     """
294 |     
295 |     within_n_std = _generic_check_maker(_within_n_std_ret, _within_n_std_raize)
296 |     within_n_std.__doc__ = """
297 |     Assert that a DataFrame is within a range.
298 | 
299 |     Parameters
300 |     ==========
301 |     df : DataFame
302 |     n : float
303 |         Number of standard deviations the columns should be within.
304 |     """
305 |     
306 |     has_dtypes = _generic_check_maker(_has_dtypes_ret, _has_dtypes_raize)
307 |     has_dtypes.__doc__ = """
308 |     Assert that a DataFrame has `dtypes`
309 | 
310 |     Parameters
311 |     ==========
312 |     df: DataFrame
313 |     items: dict
314 |         mapping of columns to dtype.
315 |     """
316 |     
317 |     equal_columns_sum = _generic_check_maker(_equal_columns_sum_ret, 
318 |                                              _equal_columns_sum_raize)
319 |     equal_columns_sum.__doc__ = """
320 |     Assert that the sum of two columns are equal
321 | 
322 |     Parameters
323 |     ==========
324 |     df: DataFrame
325 |     cola: str
326 |         column one
327 |     colb: str
328 |         column two
329 |     """
330 |     
331 |     has_in_index = _generic_check_maker(_has_in_index_ret, 
332 |                                         _has_in_index_raize)
333 |     has_in_index.__doc__ = """
334 |     Assert that the sume of two columns are equal
335 | 
336 |     Parameters
337 |     ==========
338 |     df: DataFrame
339 |     obj: obj
340 |         Any hashable object that would be in an index
341 |     try_ix: boolean, default to False
342 |         will apply an additional check to see if the object can be converted
343 |         using ix's logic.
344 |     try_strftime: str or boolean, defaults to False
345 |         If set to a string, it will be used to attempt obj.strftime(try_strftime)
346 |         If set to True, it will be used to attempt obj.strftime('%Y-%m-%d')
347 |         Does nothing if try_ix is False
348 |     """
349 |     
350 |     def decorator_maker(self, name, *args, **kwargs):
351 |         def adecorator(*args, **kwargs):
352 |             def decorate(func):
353 |                 @wraps(func)
354 |                 def wrapper(*wargs, **wkwargs):
355 |                     result = func(*wargs, **wkwargs)
356 |                     ans = getattr(self, name)(result, *args, **kwargs)
357 |                     #if ans:
358 |                     #    result = [result] + list(ans)
359 |                     #    result = tuple(result)
360 |                     return result
361 |                 return wrapper
362 |             return decorate
363 |         return adecorator
364 |         
365 | class ReturnSet(CheckSet):
366 |     def __init__(self, ret=None):
367 |         
368 |         self.check_slc = copy(slicers.loc)
369 |         self.derive_slc = copy(slicers.loc)
370 |         
371 |         self.ret = ret or ('orig','bool','ndframe','obj')
372 |         self.raize = None
373 |         self.raize_msg = None
374 | 
375 | 
376 | class RaiseSet(CheckSet):
377 |     def __init__(self, raize=None, msg=""):
378 |         
379 |         self.check_slc = copy(slicers.loc)
380 |         self.derive_slc = copy(slicers.loc)
381 |         
382 |         self.ret = ('orig',)
383 |         self.raize = raize or AssertionError
384 |         self.raize_msg = msg
385 |         
386 | if __name__ == '__main__':
387 | 
388 |     df = pd.DataFrame(data=[1,2,3,4], columns=['acol'])
389 |     
390 |     none_missing = CheckSet().none_missing
391 |     none_missing(df)
392 | 
393 |     none_missing_dec = CheckSet().decorator_maker('none_missing')()
394 |     
395 |     @none_missing_dec
396 |     def myfunc(adf):
397 |         return adf + 1.0
398 |     
399 |     print(myfunc(df))
400 | 


--------------------------------------------------------------------------------
/validada/decorators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jnmclarty/validada/da806ee649d67e2ac36ad4f22493003622493ee5/validada/decorators/__init__.py


--------------------------------------------------------------------------------
/validada/decorators/raising.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from validada.core import RaiseSet
 3 | 
 4 | none_missing = RaiseSet().decorator_maker('none_missing')
 5 | is_shape = RaiseSet().decorator_maker('is_shape')
 6 | unique_index = RaiseSet().decorator_maker('unique_index')
 7 | is_monotonic = RaiseSet().decorator_maker('is_monotonic')
 8 | within_set = RaiseSet().decorator_maker('within_set')
 9 | within_range = RaiseSet().decorator_maker('within_range')
10 | within_n_std = RaiseSet().decorator_maker('within_n_std')
11 | has_dtypes = RaiseSet().decorator_maker('has_dtypes')
12 | equal_columns_sum = RaiseSet().decorator_maker('equal_columns_sum')
13 | has_in_index = RaiseSet().decorator_maker('has_in_index')
14 | 
15 | __all__ = [none_missing, is_monotonic, is_shape, none_missing, unique_index,
16 |            within_n_std, has_dtypes, equal_columns_sum, has_in_index]
17 | 


--------------------------------------------------------------------------------
/validada/decorators/returning.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from validada.core import ReturnSet
 3 | 
 4 | none_missing = ReturnSet().decorator_maker('none_missing')
 5 | is_shape = ReturnSet().decorator_maker('is_shape')
 6 | unique_index = ReturnSet().decorator_maker('unique_index')
 7 | is_monotonic = ReturnSet().decorator_maker('is_monotonic')
 8 | within_set = ReturnSet().decorator_maker('within_set')
 9 | within_range = ReturnSet().decorator_maker('within_range')
10 | within_n_std = ReturnSet().decorator_maker('within_n_std')
11 | has_dtypes = ReturnSet().decorator_maker('has_dtypes')
12 | equal_columns_sum = ReturnSet().decorator_maker('equal_columns_sum')
13 | has_in_index = ReturnSet().decorator_maker('has_in_index')
14 | 
15 | __all__ = [none_missing, is_monotonic, is_shape, none_missing, unique_index,
16 |            within_n_std, has_dtypes, equal_columns_sum, has_in_index]
17 | 


--------------------------------------------------------------------------------
/validada/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jnmclarty/validada/da806ee649d67e2ac36ad4f22493003622493ee5/validada/functions/__init__.py


--------------------------------------------------------------------------------
/validada/functions/raising.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from validada.core import RaiseSet
 3 | 
 4 | none_missing = RaiseSet().none_missing
 5 | is_monotonic = RaiseSet().is_monotonic
 6 | is_shape = RaiseSet().is_shape
 7 | unique_index = RaiseSet().unique_index
 8 | within_set = RaiseSet().within_set
 9 | within_range = RaiseSet().within_range
10 | within_n_std = RaiseSet().within_n_std
11 | has_dtypes = RaiseSet().has_dtypes
12 | equal_columns_sum = RaiseSet().equal_columns_sum
13 | has_in_index = RaiseSet().has_in_index
14 | 
15 | __all__ = [is_monotonic, is_shape, none_missing, unique_index, within_n_std,
16 |            within_range, within_set, has_dtypes, equal_columns_sum, has_in_index]
17 | 
18 | 


--------------------------------------------------------------------------------
/validada/functions/returning.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from validada.core import ReturnSet
 4 | 
 5 | none_missing = ReturnSet().none_missing
 6 | is_monotonic = ReturnSet().is_monotonic
 7 | is_shape = ReturnSet().is_shape
 8 | unique_index = ReturnSet().unique_index
 9 | within_set = ReturnSet().within_set
10 | within_range = ReturnSet().within_range
11 | within_n_std = ReturnSet().within_n_std
12 | has_dtypes = ReturnSet().has_dtypes
13 | equal_columns_sum = ReturnSet().equal_columns_sum
14 | has_in_index = ReturnSet().has_in_index
15 | 
16 | __all__ = [is_monotonic, is_shape, none_missing, unique_index, within_n_std,
17 |            within_range, within_set, has_dtypes, equal_columns_sum, has_in_index]
18 | 
19 | 


--------------------------------------------------------------------------------
/validada/slicers.py:
--------------------------------------------------------------------------------
 1 | class SliceStore(object):
 2 |     def __init__(self, slc=None, mode='loc'):
 3 |         self.slc = slc or slice(None) 
 4 |         self.mode = mode
 5 |     def __getitem__(self, slc):
 6 |         self.slc = slc
 7 |         return self
 8 |     def __setitem__(self, _, __):
 9 |         raise Exception("SliceStore cannot be assigned values")
10 |     def __str__(self):
11 |         return "{{.{}[{}]}}".format(self.mode, repr(self.slc))
12 | 
13 | def _index_slicer_factory(defaultmode):
14 |     class IndexSlicer(SliceStore):
15 |         def __init__(self, slc=None, mode=defaultmode):
16 |             self.slc = slc or slice(None) 
17 |             self.mode = mode
18 |     return IndexSlicer
19 | 
20 | # ix = _index_slicer_factory('ix')()
21 | iloc = _index_slicer_factory('iloc')()
22 | loc = _index_slicer_factory('loc')()
23 | 
24 | if __name__ == '__main__':
25 |     print(iloc)
26 |     print(loc)
27 |     iloc[1:10:2]
28 |     print(iloc)
29 | 


--------------------------------------------------------------------------------