├── .gitignore ├── LICENSE ├── README.md ├── examples └── examples.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py └── test_checks.py └── validada ├── __init__.py ├── convenience.py ├── core.py ├── decorators ├── __init__.py ├── raising.py └── returning.py ├── functions ├── __init__.py ├── raising.py └── returning.py └── slicers.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | .spyderproject 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | pytestreport/ 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | .noseids 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | 57 | # Sphinx documentation 58 | docs/_build/ 59 | 60 | # PyBuilder 61 | target/ 62 | 63 | # Editor 64 | 65 | *.swp 66 | 67 | # other 68 | data/ 69 | wheelhouse/ 70 | *.zip 71 | *.csv 72 | *.pyc 73 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Jeffrey McLarty 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | The MIT License (MIT) 24 | 25 | Copyright (c) 2015 Tom Augspurger 26 | 27 | Permission is hereby granted, free of charge, to any person obtaining a copy 28 | of this software and associated documentation files (the "Software"), to deal 29 | in the Software without restriction, including without limitation the rights 30 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 31 | copies of the Software, and to permit persons to whom the Software is 32 | furnished to do so, subject to the following conditions: 33 | 34 | The above copyright notice and this permission notice shall be included in all 35 | copies or substantial portions of the Software. 36 | 37 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 38 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 39 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 40 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 41 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 42 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 43 | SOFTWARE. 44 | 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Validada 2 | ======== 3 | 4 | (Pronounced "Valid-Data") 5 | 6 | This project started as a fork of [engarde v0.0.2](https://github.com/TomAugspurger/engarde) 7 | 8 | Validada differentiates from engarde under the hood, substantially, in order to implement a richer 9 | feature set including custom-exceptions, universal slicing API, check object-return. All, 10 | with a focus on code brevity. 11 | 12 | All of the basics are the same as engarde, with likely a minor hit to speed. Although, 13 | in many cases engarde raises on the first problem it finds. Validada's policy is 14 | to raise only after checking everything. 15 | 16 | As of 7/7/2015, validada passes all of the unit tests of engarde. 17 | 18 | Slicing? 19 | ======== 20 | All checks slice the dataframe internally, so users of validada never have to pass in a sliced dataframe. 21 | Instead, users can pass in a slice-like object as an argument. 22 | 23 | How do I pass a slice? 24 | 25 | ```python 26 | from validada.slicers import iloc, loc 27 | 28 | some_check(adf, iloc[-7:], iloc[:-7]) 29 | 30 | # or... 31 | 32 | @some_check(iloc[-1], iloc[:-1]) 33 | def somefunc(adf): 34 | return adf + 1.0 35 | 36 | ``` 37 | 38 | All checks can take up to two slice-like arguments. The first, is the slice which will be checked. 39 | The second, is a slice for calculating constants to use during the check. Both are optional. 40 | So, say you have a dataframe coming from a source of data, with known "good" data 41 | (for instance, before last week), and want to check that the data for just this week is within 42 | two standard deviations of the data, excluding the latest week of data, you would pass in 43 | ```iloc[-7:]``` and ```iloc[:-7]``` as arguments to the check. 44 | 45 | ``` 46 | #To use the same functionality of engarde, one would use... 47 | from validada.functions.raising import none_missing, is_shape, unique_index 48 | #or 49 | from validada.decorators.raising import none_missing, is_shape, unique_index 50 | ``` 51 | 52 | ``` 53 | #But with validada you get more out of the box... 54 | from validada.functions.returning import none_missing, is_shape, unique_index 55 | #or 56 | from validada.decorators.returning import none_missing, is_shape, unique_index 57 | ``` 58 | 59 | Custom Return-Objects? 60 | ====================== 61 | Depending on the check, there might be some useful information to pass back out, or maybe you 62 | want to perform a bunch of checks and just collect the boolean results for each? 63 | 64 | ``` python 65 | from validada.core import ReturnSet 66 | 67 | rs = ReturnSet(('bool', 'obj')) 68 | none_missing = rs.none_missing 69 | 70 | print "Since we specified 'bool' and 'obj', in that order:" 71 | a_bool, an_obj = none_missing(adf, ix['2013':], columns='one') 72 | #a_bool, is the result of the check 73 | print a_bool 74 | #an_obj, is a none_missing specific object, it's a way to 75 | #get other information out of the check. 76 | print an_obj 77 | ``` 78 | 79 | 80 | Custom Exceptions? 81 | ================== 82 | To use the advance features instantiate your own ```CheckSet``` (or child of, eg. ```RaiseSet```,```ReturnSet```) via... 83 | 84 | ``` python 85 | from validada.core import RaiseSet 86 | rs = RaiseSet(IOError, "IO error makes no sense, but why not?") 87 | none_missing = rs.none_missing 88 | 89 | #ready... 90 | none_missing(adf, ix['2013':]) 91 | 92 | #or make a decorator 93 | none_missing = rs.decorator_maker('none_missing') 94 | ``` 95 | 96 | Dependencies 97 | ============ 98 | 99 | - Pandas 100 | 101 | 102 | Supports python 2.7+ and Python 3.6 103 | 104 | 105 | Overall Design 106 | ============== 107 | 108 | Every check has a return-function and raise-function created all sharing a common signature. 109 | These two functions are used to create one staticfunction, for every check, of the CheckSet. 110 | A CheckSet object stores custom-exception, custom-object return, and default slicing settings. 111 | A CheckSet object has a generic way to turn any check, into a decorator using one line. 112 | An instance of RaiseSet and ReturnSet is used to declare function.*.checks and decorators.*.checks. 113 | 114 | See Also 115 | ======== 116 | 117 | [assertr](https://github.com/tonyfischetti/assertr) 118 | [engarde](https://github.com/TomAugspurger/engarde) 119 | 120 | -------------------------------------------------------------------------------- /examples/examples.py: -------------------------------------------------------------------------------- 1 | 2 | from validada.functions.raising import none_missing 3 | from validada.slicers import iloc, loc 4 | 5 | import pandas as pd 6 | 7 | # Create some data... 8 | ind = pd.date_range('2010', '2015', freq='A') 9 | adf = pd.DataFrame({'one' : list(range(5)), 'two' : [ i ** 2 for i in range(5)]}, index=ind) 10 | adf['two'].iloc[4]=pd.np.NaN 11 | 12 | """ 13 | one two 14 | 2010-12-31 0 0 15 | 2011-12-31 1 1 16 | 2012-12-31 2 4 17 | 2013-12-31 3 9 18 | 2014-12-31 4 NaN 19 | """ 20 | 21 | # Basic call... 22 | try: 23 | none_missing(adf) 24 | except AssertionError: 25 | print("Some values are missing!") 26 | 27 | # Using arguments explicitly 28 | try: 29 | none_missing(adf, columns='one') 30 | print("No problem here!") 31 | except: 32 | pass 33 | 34 | # Or implicitly using arguments... 35 | try: 36 | none_missing(adf, 'two') 37 | print("Shouldn't see this!") 38 | except: 39 | print("There's a problem, in the second column") 40 | 41 | 42 | try: 43 | none_missing(adf, iloc[-2:]) 44 | except AssertionError: 45 | print("Some values are missing in the last two rows") 46 | 47 | try: 48 | # iloc stores the :-2 slice, so this works until iloc is changed 49 | none_missing(adf, iloc) 50 | except AssertionError: 51 | print("Some values are still missing in the last two rows") 52 | 53 | 54 | try: 55 | none_missing(adf, ix[:'2013']) 56 | print("There are no problems looking at data before 2013") 57 | except: 58 | pass 59 | 60 | # Did you notice the type detection between passing 'two', and the slicers? 61 | # Look ma, args only - kwargs not mandatory! 62 | 63 | # Now for some real fun... 64 | 65 | from validada.core import RaiseSet 66 | 67 | rs = RaiseSet(IOError, "IO error makes no sense, but why not?") 68 | none_missing = rs.none_missing 69 | 70 | try: 71 | none_missing(adf, loc['2013':]) 72 | except IOError as e: 73 | print(str(e)) 74 | 75 | #This is only needed, since the user has added a custom exception message... 76 | none_missing = rs.decorator_maker('none_missing') 77 | 78 | @none_missing(loc['2013':]) 79 | def somefunc(anydf): 80 | soln = anydf + 1.0 81 | return soln 82 | 83 | try: 84 | somefunc(adf) 85 | except IOError as e: 86 | print("Second time the charm?") 87 | print(str(e)) 88 | 89 | from validada.core import ReturnSet 90 | 91 | rs = ReturnSet(('bool', 'obj')) 92 | none_missing = rs.none_missing 93 | 94 | print("Since we specified 'bool' and 'obj':") 95 | a_bool, an_obj = none_missing(adf, loc['2013':], columns='one') 96 | #a_bool, is the result of the check 97 | print(a_bool) 98 | #an_obj, is a none_missing specific object, it's a way to get other information out of the check. 99 | print(an_obj) 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [metadata] 5 | description-file = README.md 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | # To use a consistent encoding 3 | from os import path 4 | 5 | here = path.abspath(path.dirname(__file__)) 6 | 7 | setup( 8 | name='validada', 9 | 10 | version='0.0.2', 11 | 12 | description='A python package for defensive data analysis.', 13 | long_description='A python package for defensive data analysis.', 14 | 15 | url='https://github.com/jnmclarty/validada', 16 | 17 | # Author details 18 | author='Jeffrey McLarty', 19 | author_email='jeffrey.mclarty@gmail.com', 20 | 21 | # Choose your license 22 | license='MIT', 23 | 24 | classifiers=[ 25 | 'Development Status :: 3 - Alpha', 26 | 'Intended Audience :: Developers', 27 | 'License :: OSI Approved :: MIT License', 28 | 'Programming Language :: Python :: 2.7', 29 | 'Programming Language :: Python :: 3.6', 30 | ], 31 | 32 | keywords='data analysis','engarde', 'validata', 'valid data', 'validada', 33 | packages=find_packages(exclude=['tests']), 34 | # install_requires=['numpy', 'pandas'], 35 | 36 | # List additional groups of dependencies here (e.g. development 37 | # dependencies). You can install these using the following syntax, 38 | # for example: 39 | # $ pip install -e .[dev,test] 40 | extras_require={ 41 | 'dev': [''], 42 | 'test': ['coverage', 'pytest'], 43 | }, 44 | 45 | ) 46 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jnmclarty/validada/da806ee649d67e2ac36ad4f22493003622493ee5/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_checks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pytest 3 | import numpy as np 4 | import pandas as pd 5 | import pandas.util.testing as tm 6 | 7 | from validada.slicers import iloc 8 | import validada.functions.raising as ck 9 | import validada.decorators.raising as dc 10 | 11 | import datetime as dt 12 | 13 | def _add_one(df): 14 | return df + 1 15 | 16 | def _safe_add_one(df): 17 | return df.fillna(0.0) + 1 18 | 19 | def _noop(df): 20 | return df 21 | 22 | def test_is_in_index(): 23 | dr = pd.date_range(start='2015-01-01', periods=6, freq='D') 24 | df = pd.DataFrame(data = list(range(6)), index=dr) 25 | 26 | d = dt.date(2015,1,3) 27 | 28 | result = ck.has_in_index(df, obj=d) 29 | tm.assert_frame_equal(df, result) 30 | 31 | result = dc.has_in_index(obj=d)(_add_one)(df) 32 | tm.assert_frame_equal(result, df + 1) 33 | 34 | result = ck.has_in_index(df, obj=d, try_ix=True) 35 | 36 | result = ck.has_in_index(df, obj=d, try_ix=True, try_strftime="%Y-%m") 37 | 38 | result = ck.has_in_index(df, obj=d, check_na=True) 39 | 40 | def test_is_in_index_raises(): 41 | dr = pd.date_range(start='2015-01-01', periods=6, freq='D') 42 | da = list(range(6)) 43 | da[2] = pd.np.nan 44 | df = pd.DataFrame(data = da, index=dr) 45 | 46 | d = dt.date(2015,1,12) 47 | 48 | with pytest.raises(AssertionError): 49 | ck.has_in_index(df, obj=d) 50 | 51 | with pytest.raises(AssertionError): 52 | dc.has_in_index(obj=d)(_add_one)(df) 53 | 54 | with pytest.raises(AssertionError): 55 | ck.has_in_index(df, obj=d, try_ix=True) 56 | 57 | ck.has_in_index(df, obj=d, try_ix=True, try_strftime="%Y-%m") 58 | 59 | d = dt.datetime(2015,1,3) 60 | ck.has_in_index(df, obj=d) 61 | ck.has_in_index(df, obj=d, check_na=False) 62 | 63 | with pytest.raises(AssertionError): 64 | ck.has_in_index(df, obj=d, check_na=True) 65 | 66 | def test_equal_columns_sum(): 67 | df = pd.DataFrame({'A': [1,2,3,4,5], 'B': [1,2,3,4,5]}) 68 | 69 | result = ck.equal_columns_sum(df) 70 | tm.assert_frame_equal(df, result) 71 | 72 | result = dc.equal_columns_sum()(_add_one)(df) 73 | tm.assert_frame_equal(result, df + 1) 74 | 75 | def test_equal_columns_sum_raises_slice(): 76 | df = pd.DataFrame({'A': [None,2,3,4,0], 'B': [1,2,3,4,None]}) 77 | 78 | with pytest.raises(AssertionError): 79 | ck.equal_columns_sum(df) 80 | with pytest.raises(AssertionError): 81 | dc.equal_columns_sum()(_add_one)(df) 82 | 83 | s = iloc[-3:] 84 | result = ck.equal_columns_sum(df, s) 85 | tm.assert_frame_equal(df, result) 86 | 87 | result = dc.equal_columns_sum(s)(_safe_add_one)(df) 88 | tm.assert_frame_equal(result, _safe_add_one(df)) 89 | 90 | def test_none_missing(): 91 | df = pd.DataFrame(np.random.randn(5, 3)) 92 | result = ck.none_missing(df) 93 | tm.assert_frame_equal(df, result) 94 | 95 | result = dc.none_missing()(_add_one)(df) 96 | tm.assert_frame_equal(result, df + 1) 97 | 98 | def test_none_missing_raises(): 99 | df = pd.DataFrame(np.random.randn(5, 3)) 100 | df.iloc[0, 0] = np.nan 101 | with pytest.raises(AssertionError): 102 | ck.none_missing(df) 103 | 104 | with pytest.raises(AssertionError): 105 | dc.none_missing()(_add_one)(df) 106 | 107 | def test_monotonic_increasing_lax(): 108 | df = pd.DataFrame([1, 2, 2]) 109 | tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=True)) 110 | result = dc.is_monotonic(increasing=True)(_add_one)(df) 111 | tm.assert_frame_equal(result, df + 1) 112 | 113 | df = pd.DataFrame([1, 2, 1]) 114 | with pytest.raises(AssertionError): 115 | ck.is_monotonic(df, increasing=True) 116 | with pytest.raises(AssertionError): 117 | dc.is_monotonic(increasing=True)(_add_one)(df) 118 | 119 | df = pd.DataFrame([3, 2, 1]) 120 | with pytest.raises(AssertionError): 121 | ck.is_monotonic(df, increasing=True) 122 | with pytest.raises(AssertionError): 123 | dc.is_monotonic(increasing=True)(_add_one)(df) 124 | 125 | def test_monotonic_increasing_strict(): 126 | df = pd.DataFrame([1, 2, 3]) 127 | tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=True, strict=True)) 128 | result = dc.is_monotonic(increasing=True, strict=True)(_add_one)(df) 129 | tm.assert_frame_equal(result, df + 1) 130 | 131 | df = pd.DataFrame([1, 2, 2]) 132 | with pytest.raises(AssertionError): 133 | ck.is_monotonic(df, increasing=True, strict=True) 134 | with pytest.raises(AssertionError): 135 | dc.is_monotonic(increasing=True, strict=True)(_add_one)(df) 136 | 137 | df = pd.DataFrame([3, 2, 1]) 138 | with pytest.raises(AssertionError): 139 | ck.is_monotonic(df, increasing=True, strict=True) 140 | with pytest.raises(AssertionError): 141 | dc.is_monotonic(increasing=True, strict=True)(_add_one)(df) 142 | 143 | def test_monotonic_decreasing(): 144 | df = pd.DataFrame([2, 2, 1]) 145 | tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=False)) 146 | result = dc.is_monotonic(increasing=False)(_add_one)(df) 147 | tm.assert_frame_equal(result, df + 1) 148 | 149 | df = pd.DataFrame([1, 2, 1]) 150 | with pytest.raises(AssertionError): 151 | ck.is_monotonic(df, increasing=False) 152 | with pytest.raises(AssertionError): 153 | dc.is_monotonic(increasing=False)(_add_one)(df) 154 | 155 | df = pd.DataFrame([1, 2, 3]) 156 | with pytest.raises(AssertionError): 157 | ck.is_monotonic(df, increasing=False) 158 | with pytest.raises(AssertionError): 159 | dc.is_monotonic(increasing=False)(_add_one)(df) 160 | 161 | def test_monotonic_decreasing_strict(): 162 | df = pd.DataFrame([3, 2, 1]) 163 | tm.assert_frame_equal(df, ck.is_monotonic(df, increasing=False, 164 | strict=True)) 165 | result = dc.is_monotonic(increasing=False, strict=True)(_add_one)(df) 166 | tm.assert_frame_equal(result, df + 1) 167 | 168 | df = pd.DataFrame([2, 2, 1]) 169 | with pytest.raises(AssertionError): 170 | ck.is_monotonic(df, increasing=False, strict=True) 171 | with pytest.raises(AssertionError): 172 | dc.is_monotonic(increasing=False, strict=True)(_add_one)(df) 173 | 174 | df = pd.DataFrame([1, 2, 3]) 175 | with pytest.raises(AssertionError): 176 | ck.is_monotonic(df, increasing=False, strict=True) 177 | with pytest.raises(AssertionError): 178 | dc.is_monotonic(increasing=False, strict=True)(_add_one)(df) 179 | 180 | def test_monotonic_either(): 181 | df = pd.DataFrame({'A': [1, 2, 2], 'B': [3, 2, 2]}) 182 | tm.assert_frame_equal(df, ck.is_monotonic(df)) 183 | result = dc.is_monotonic()(_add_one)(df) 184 | tm.assert_frame_equal(result, df + 1) 185 | 186 | df = pd.DataFrame({'A': [1, 2, 3], 'B': [1, 2, 1]}) 187 | with pytest.raises(AssertionError): 188 | ck.is_monotonic(df) 189 | with pytest.raises(AssertionError): 190 | dc.is_monotonic()(_add_one)(df) 191 | 192 | def test_monotonic_either_stict(): 193 | df = pd.DataFrame({'A': [1, 2, 3], 'B': [3, 2, 1]}) 194 | tm.assert_frame_equal(df, ck.is_monotonic(df, strict=True)) 195 | result = dc.is_monotonic(strict=True)(_add_one)(df) 196 | tm.assert_frame_equal(result, df + 1) 197 | 198 | df = pd.DataFrame({'A': [1, 2, 2], 'B': [3, 2, 2]}) 199 | with pytest.raises(AssertionError): 200 | ck.is_monotonic(df, strict=True) 201 | with pytest.raises(AssertionError): 202 | dc.is_monotonic(strict=True)(_add_one)(df) 203 | 204 | def test_monotonic_items(): 205 | df = pd.DataFrame({'A': [1, 2, 3], 'B': [3, 2, 3]}) 206 | tm.assert_frame_equal(df, ck.is_monotonic(df, items={'A': (True, True)})) 207 | tm.assert_frame_equal(dc.is_monotonic(items={'A': (True, True)}, strict=True)(_add_one)( 208 | df), df + 1) 209 | 210 | def test_is_shape(): 211 | shape = 10, 2 212 | df = pd.DataFrame(np.random.randn(*shape)) 213 | tm.assert_frame_equal(df, ck.is_shape(df, shape)) 214 | result = dc.is_shape(shape=shape)(_add_one)(df) 215 | tm.assert_frame_equal(result, df + 1) 216 | 217 | with pytest.raises(AssertionError): 218 | ck.is_shape(df, (9, 2)) 219 | with pytest.raises(AssertionError): 220 | dc.is_shape((9, 2))(_add_one)(df) 221 | 222 | def test_unique_index(): 223 | df = pd.DataFrame([1, 2, 3], index=['a', 'b', 'c']) 224 | tm.assert_frame_equal(df, ck.unique_index(df)) 225 | result = dc.unique_index()(_add_one)(df) 226 | tm.assert_frame_equal(result, df + 1) 227 | 228 | with pytest.raises(AssertionError): 229 | ck.unique_index(df.reindex(['a', 'a', 'b'])) 230 | with pytest.raises(AssertionError): 231 | dc.unique_index()(_add_one)(df.reindex(['a', 'a', 'b'])) 232 | 233 | def test_within_set(): 234 | df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']}) 235 | items = {'A': [1, 2, 3], 'B': ['a', 'b', 'c']} 236 | tm.assert_frame_equal(df, ck.within_set(df, items)) 237 | tm.assert_frame_equal(df, dc.within_set(items=items)(_noop)(df)) 238 | 239 | items.pop('A') 240 | tm.assert_frame_equal(df, ck.within_set(df, items)) 241 | tm.assert_frame_equal(df, dc.within_set(items=items)(_noop)(df)) 242 | 243 | items['A'] = [1, 2] 244 | with pytest.raises(AssertionError): 245 | ck.within_set(df, items) 246 | with pytest.raises(AssertionError): 247 | dc.within_set(items=items)(_noop)(df) 248 | 249 | def test_within_range(): 250 | df = pd.DataFrame({'A': [-1, 0, 1]}) 251 | items = {'A': (-1, 1)} 252 | tm.assert_frame_equal(df, ck.within_range(df, items)) 253 | tm.assert_frame_equal(df, dc.within_range(items)(_noop)(df)) 254 | 255 | items['A'] = (0, 1) 256 | with pytest.raises(AssertionError): 257 | ck.within_range(df, items) 258 | with pytest.raises(AssertionError): 259 | dc.within_range(items)(_noop)(df) 260 | 261 | def test_within_n_std(): 262 | df = pd.DataFrame({'A': np.arange(10)}) 263 | tm.assert_frame_equal(df, ck.within_n_std(df)) 264 | tm.assert_frame_equal(df, dc.within_n_std()(_noop)(df)) 265 | 266 | with pytest.raises(AssertionError): 267 | ck.within_n_std(df, .5) 268 | with pytest.raises(AssertionError): 269 | dc.within_n_std(.5)(_noop)(df) 270 | 271 | def test_has_dtypes(): 272 | df = pd.DataFrame({'A': np.random.randint(0, 10, 10), 273 | 'B': np.random.randn(10), 274 | 'C': list('abcdefghij'), 275 | 'D': pd.Categorical(np.random.choice(['a', 'b'], 10))}) 276 | dtypes = {'A': int, 'B': 'float', 'C': object, 'D': 'category'} 277 | tm.assert_frame_equal(df, ck.has_dtypes(df, dtypes)) 278 | tm.assert_frame_equal(df, dc.has_dtypes(items=dtypes)(_noop)(df)) 279 | 280 | with pytest.raises(AssertionError): 281 | ck.has_dtypes(df, {'A': float}) 282 | 283 | with pytest.raises(AssertionError): 284 | dc.has_dtypes(items={'A': bool})(_noop)(df) 285 | 286 | -------------------------------------------------------------------------------- /validada/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /validada/convenience.py: -------------------------------------------------------------------------------- 1 | from .slicers import SliceStore 2 | 3 | def _pull_out_ret(kwargs, dforig): 4 | try: 5 | _ret = kwargs['_ret'] 6 | except KeyError: 7 | raise KeyError("_ret must be defined") 8 | 9 | if not isinstance(_ret, (list, tuple)): 10 | _ret = (_ret,) 11 | ret_specd = {'orig' : dforig, 'bool' : None, 'ndframe' : None, 'obj' : None} 12 | return _ret, ret_specd 13 | 14 | def _pull_out_raize_kwargs(kwargs): 15 | try: 16 | _raize = kwargs['_raize'] 17 | _raize_msg = kwargs['_raize_msg'] 18 | except KeyError: 19 | raise KeyError("_raize and _raize_msg must be defined") 20 | 21 | kwargs = {key : value for key, value in kwargs.items() if key not in ('_raize', '_raize_kwargs')} 22 | 23 | return _raize, _raize_msg, kwargs 24 | 25 | 26 | def _read_arg_or_kwarg(args, pos, kwargs, arg, ifnone=None): 27 | if arg in kwargs: 28 | val = kwargs[arg] 29 | if val is None: 30 | val = ifnone 31 | elif len(args) >= (pos + 1): 32 | val = args[pos] 33 | else: 34 | val = ifnone 35 | return val 36 | 37 | def _read_required_arg_or_kwarg(args, pos, kwargs, arg): 38 | if arg in kwargs: 39 | val = kwargs[arg] 40 | elif len(args) >= (pos + 1): 41 | val = args[pos] 42 | else: 43 | raise Exception("Keyword argument '{}' must be defined in function".format(arg)) 44 | return val 45 | 46 | def _ret_proper_objects(_ret, ret_specd): 47 | ret = [ret_specd[t] for t in _ret] 48 | 49 | if len(ret) == 1: 50 | return ret[0] 51 | else: 52 | return tuple(ret) 53 | 54 | def _make_generic_raizer(returner): 55 | def raizer(dforig, dfcheck, dfderive, *args, **kwargs): 56 | 57 | _raize, _raize_msg, kwargs = _pull_out_raize_kwargs(kwargs) 58 | 59 | _ret = ('bool',) 60 | result = returner(dforig, dfcheck, dfderive, _ret=_ret, *args, **kwargs) 61 | 62 | if not result: 63 | return dforig 64 | else: 65 | raise _raize(_raize_msg) 66 | return raizer 67 | 68 | def _generic_check_maker(returner, raizer): 69 | def check(self, df, *args, **kwargs): 70 | 71 | slc, args = _lop_off_head_if_slice(args, self.check_slc) 72 | slcd, args = _lop_off_head_if_slice(args, self.derive_slc) 73 | 74 | dfc = getattr(df, slc.mode)[slc.slc] 75 | dfd = getattr(df, slcd.mode)[slcd.slc] 76 | 77 | if self.raize is not None: 78 | result = raizer(df, dfc, dfd, 79 | *args, _raize=self.raize, 80 | _raize_msg=self.raize_msg, **kwargs) 81 | elif self.ret is not None: 82 | result = returner(df, dfc, dfd, *args, _ret=self.ret, **kwargs) 83 | else: 84 | raise Exception("Can't read your mind") 85 | return result 86 | return check 87 | 88 | def _lop_off_head_if_slice(args, otherwise): 89 | if len(args) >= 1: 90 | if isinstance(args[0], (slice, SliceStore)): 91 | if len(args) >= 2: 92 | return args[0], args[1:] 93 | else: 94 | return args[0], [] 95 | else: 96 | return otherwise, args 97 | return otherwise, [] -------------------------------------------------------------------------------- /validada/core.py: -------------------------------------------------------------------------------- 1 | 2 | import pandas as pd 3 | 4 | import datetime as dt 5 | 6 | 7 | from . import slicers 8 | from functools import wraps 9 | from copy import copy 10 | from collections import Counter 11 | 12 | from .convenience import _pull_out_ret, _pull_out_raize_kwargs, \ 13 | _read_arg_or_kwarg, _read_required_arg_or_kwarg, \ 14 | _ret_proper_objects, _make_generic_raizer, \ 15 | _generic_check_maker, _lop_off_head_if_slice 16 | 17 | def _has_in_index_ret(dforig, dfcheck, dfderive, *args, **kwargs): 18 | 19 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 20 | 21 | orig_obj_to_check = _read_required_arg_or_kwarg(args, 0, kwargs, 'obj') 22 | obj_to_check = orig_obj_to_check 23 | 24 | try_ix = _read_arg_or_kwarg(args, 1, kwargs, 'try_ix', False) 25 | try_strftime = _read_arg_or_kwarg(args, 2, kwargs, 'try_strftime', False) 26 | check_na = _read_arg_or_kwarg(args, 3, kwargs, 'check_na', False) 27 | 28 | 29 | 30 | ans = obj_to_check in dfcheck.index 31 | 32 | if try_strftime: 33 | try: 34 | obj_to_check = obj_to_check.strftime(try_strftime) 35 | except: 36 | pass 37 | 38 | # at this point, if it's in the index, ans will be True 39 | 40 | if try_ix: 41 | try: 42 | ans = ans or (len(dfcheck.loc[obj_to_check]) > 0) 43 | except: 44 | pass 45 | 46 | # at this point, if it's in the index, or if it's string representation 47 | # is in the index, ans will be True 48 | 49 | if check_na: 50 | try: 51 | isna = dfcheck.loc[obj_to_check].isnull()[0] 52 | except: 53 | isna = True 54 | ans = not ((not ans) or isna) 55 | 56 | results = {orig_obj_to_check : ans} 57 | ret_specd['obj'] = results 58 | ret_specd['ndframe'] = pd.Series(results) 59 | ret_specd['bool'] = not ans 60 | 61 | return _ret_proper_objects(_ret, ret_specd) 62 | _has_in_index_raize = _make_generic_raizer(_has_in_index_ret) 63 | 64 | def _equal_columns_sum_ret(dforig, dfcheck, dfderive, *args, **kwargs): 65 | 66 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 67 | 68 | cola = _read_arg_or_kwarg(args, 0, kwargs, 'cola', dfcheck.columns[0]) 69 | colb = _read_arg_or_kwarg(args, 1, kwargs, 'colb', dfcheck.columns[1]) 70 | 71 | results = {cola : dfcheck[cola].dropna().sum(), 72 | colb : dfcheck[colb].dropna().sum()} 73 | 74 | ret_specd['obj'] = results 75 | ret_specd['ndframe'] = pd.Series(results) 76 | ret_specd['bool'] = not results[cola] == results[colb] 77 | 78 | return _ret_proper_objects(_ret, ret_specd) 79 | _equal_columns_sum_raize = _make_generic_raizer(_equal_columns_sum_ret) 80 | 81 | def _none_missing_ret(dforig, dfcheck, dfderive, *args, **kwargs): 82 | 83 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 84 | 85 | columns = _read_arg_or_kwarg(args, 0, kwargs, 'columns', dfcheck.columns) 86 | 87 | ret_specd['obj'] = dfcheck[columns].isnull() 88 | ret_specd['ndframe'] = ret_specd['obj'].any() 89 | ret_specd['bool'] = ret_specd['ndframe'].any() 90 | 91 | return _ret_proper_objects(_ret, ret_specd) 92 | _none_missing_raize = _make_generic_raizer(_none_missing_ret) 93 | 94 | def _is_shape_ret(dforig, dfcheck, dfderive, *args, **kwargs): 95 | 96 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 97 | 98 | shape = _read_required_arg_or_kwarg(args, 0, kwargs, 'shape') 99 | 100 | ret_specd['obj'] = "is_shape has no output object" 101 | ret_specd['ndframe'] = "is_shape has no output ndframe" 102 | ret_specd['bool'] = not dfcheck.shape == shape 103 | 104 | return _ret_proper_objects(_ret, ret_specd) 105 | _is_shape_raize = _make_generic_raizer(_is_shape_ret) 106 | 107 | def _unique_index_ret(dforig, dfcheck, dfderive, *args, **kwargs): 108 | 109 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 110 | 111 | ret_specd['obj'] = Counter(list(dfcheck.index)) 112 | ret_specd['ndframe'] = pd.Series(ret_specd['obj']) 113 | ret_specd['bool'] = not dfcheck.index.is_unique 114 | 115 | return _ret_proper_objects(_ret, ret_specd) 116 | _unique_index_raize = _make_generic_raizer(_unique_index_ret) 117 | 118 | def _is_monotonic_ret(dforig, dfcheck, dfderive, *args, **kwargs): 119 | 120 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 121 | 122 | increasing = _read_arg_or_kwarg(args, 0, kwargs, 'increasing', None) 123 | strict = _read_arg_or_kwarg(args, 1, kwargs, 'strict', False) 124 | items = _read_arg_or_kwarg(args, 2, kwargs, 'items', {k: (increasing, strict) for k in dfcheck}) 125 | 126 | results = {} 127 | for col, (increasing, strict) in list(items.items()): 128 | s = pd.Index(dfcheck[col]) 129 | if increasing: 130 | good = getattr(s, 'is_monotonic_increasing') 131 | elif increasing is None: 132 | good = getattr(s, 'is_monotonic') | getattr(s, 'is_monotonic_decreasing') 133 | else: 134 | good = getattr(s, 'is_monotonic_decreasing') 135 | if strict: 136 | if increasing: 137 | good = good & (s.to_series().diff().dropna() > 0).all() 138 | elif increasing is None: 139 | good = good & ((s.to_series().diff().dropna() > 0).all() | 140 | (s.to_series().diff().dropna() < 0).all()) 141 | else: 142 | good = good & (s.to_series().diff().dropna() < 0).all() 143 | results[col] = not good 144 | 145 | ret_specd['obj'] = results 146 | ret_specd['ndframe'] = pd.Series(results) 147 | ret_specd['bool'] = any(list(results.values())) 148 | 149 | return _ret_proper_objects(_ret, ret_specd) 150 | _is_monotonic_raize = _make_generic_raizer(_is_monotonic_ret) 151 | 152 | def _within_set_ret(dforig, dfcheck, dfderive, *args, **kwargs): 153 | 154 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 155 | 156 | items = _read_required_arg_or_kwarg(args, 0, kwargs, 'items') 157 | 158 | results = {} 159 | for k, v in list(items.items()): 160 | results[k] = not dfcheck[k].isin(v).all() 161 | 162 | ret_specd['obj'] = results 163 | ret_specd['ndframe'] = pd.Series(results) 164 | ret_specd['bool'] = any(list(results.values())) 165 | 166 | return _ret_proper_objects(_ret, ret_specd) 167 | _within_set_raize = _make_generic_raizer(_within_set_ret) 168 | 169 | def _within_range_ret(dforig, dfcheck, dfderive, *args, **kwargs): 170 | 171 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 172 | 173 | items = _read_required_arg_or_kwarg(args, 0, kwargs, 'items') 174 | 175 | results = {} 176 | for k, (lower, upper) in list(items.items()): 177 | results[k] = (lower > dfcheck[k]).any() or (upper < dfcheck[k]).any() 178 | 179 | ret_specd['obj'] = results 180 | ret_specd['ndframe'] = pd.Series(results) 181 | ret_specd['bool'] = any(list(results.values())) 182 | 183 | return _ret_proper_objects(_ret, ret_specd) 184 | _within_range_raize = _make_generic_raizer(_within_range_ret) 185 | 186 | def _within_n_std_ret(dforig, dfcheck, dfderive, *args, **kwargs): 187 | 188 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 189 | 190 | n = _read_arg_or_kwarg(args, 0, kwargs, 'n', 3) 191 | 192 | means = dfderive.mean() 193 | stds = dfderive.std() 194 | 195 | results = (pd.np.abs(dfcheck - means) < n * stds) 196 | 197 | ret_specd['obj'] = results 198 | ret_specd['ndframe'] = results 199 | ret_specd['bool'] = not results.all().all() 200 | 201 | return _ret_proper_objects(_ret, ret_specd) 202 | _within_n_std_raize = _make_generic_raizer(_within_n_std_ret) 203 | 204 | 205 | def _has_dtypes_ret(dforig, dfcheck, dfderive, *args, **kwargs): 206 | 207 | _ret, ret_specd = _pull_out_ret(kwargs, dforig) 208 | 209 | items = _read_required_arg_or_kwarg(args, 0, kwargs, 'items') 210 | 211 | results = {} 212 | dtypes = dfcheck.dtypes 213 | for k, v in list(items.items()): 214 | results[k] = not dtypes[k] == v 215 | 216 | ret_specd['obj'] = results 217 | ret_specd['ndframe'] = pd.Series(results) 218 | ret_specd['bool'] = any(list(results.values())) 219 | return _ret_proper_objects(_ret, ret_specd) 220 | 221 | _has_dtypes_raize = _make_generic_raizer(_has_dtypes_ret) 222 | 223 | class CheckSet(object): 224 | def __init__(self, ret=None, raize=None, msg=""): 225 | 226 | self.check_slc = copy(slicers.loc) 227 | self.derive_slc = copy(slicers.loc) 228 | 229 | self.ret = ret or ('ndframe', 'bool', 'obj') 230 | self.raize = raize or AssertionError 231 | self.raize_msg = msg 232 | 233 | none_missing = _generic_check_maker(_none_missing_ret, _none_missing_raize) 234 | none_missing.__doc__ = """ 235 | Asserts that there are no missing values (NaNs) in the DataFrame. 236 | Parameters 237 | ========== 238 | df : Series or DataFrame 239 | columns : list of column names 240 | """ 241 | 242 | is_monotonic = _generic_check_maker(_is_monotonic_ret, _is_monotonic_raize) 243 | is_monotonic.__doc__ = """ 244 | Asserts that the DataFrame is monotonic 245 | 246 | Parameters 247 | ========== 248 | df : Series or DataFrame 249 | items : dict 250 | mapping columns to conditions (increasing, strict) 251 | increasing : None or bool 252 | None is either increasing or decreasing. 253 | strict: whether the comparison should be strict 254 | """ 255 | 256 | is_shape = _generic_check_maker(_is_shape_ret, _is_shape_raize) 257 | is_shape.__doc__ = """ 258 | Asserts that the DataFrame is of a known shape. 259 | 260 | Parameters 261 | ========== 262 | 263 | df: DataFrame 264 | shape : tuple (n_rows, n_columns) 265 | """ 266 | 267 | unique_index = _generic_check_maker(_unique_index_ret, _unique_index_raize) 268 | unique_index.__doc__ = """Assert that the index is unique""" 269 | 270 | within_set = _generic_check_maker(_within_set_ret, _within_set_raize) 271 | within_set.__doc__ = """ 272 | Assert that df is a subset of items 273 | 274 | Parameters 275 | ========== 276 | 277 | df : DataFrame 278 | items : dict 279 | mapping of columns (k) to array-like of values (v) that 280 | ``df[k]`` is expected to be a subset of 281 | """ 282 | 283 | within_range = _generic_check_maker(_within_range_ret, _within_range_raize) 284 | within_range.__doc__ = """ 285 | Assert that a DataFrame is within a range. 286 | 287 | Parameters 288 | ========== 289 | df : DataFame 290 | items : dict 291 | mapping of columns (k) to a (low, high) tuple (v) 292 | that ``df[k]`` is expected to be between. 293 | """ 294 | 295 | within_n_std = _generic_check_maker(_within_n_std_ret, _within_n_std_raize) 296 | within_n_std.__doc__ = """ 297 | Assert that a DataFrame is within a range. 298 | 299 | Parameters 300 | ========== 301 | df : DataFame 302 | n : float 303 | Number of standard deviations the columns should be within. 304 | """ 305 | 306 | has_dtypes = _generic_check_maker(_has_dtypes_ret, _has_dtypes_raize) 307 | has_dtypes.__doc__ = """ 308 | Assert that a DataFrame has `dtypes` 309 | 310 | Parameters 311 | ========== 312 | df: DataFrame 313 | items: dict 314 | mapping of columns to dtype. 315 | """ 316 | 317 | equal_columns_sum = _generic_check_maker(_equal_columns_sum_ret, 318 | _equal_columns_sum_raize) 319 | equal_columns_sum.__doc__ = """ 320 | Assert that the sum of two columns are equal 321 | 322 | Parameters 323 | ========== 324 | df: DataFrame 325 | cola: str 326 | column one 327 | colb: str 328 | column two 329 | """ 330 | 331 | has_in_index = _generic_check_maker(_has_in_index_ret, 332 | _has_in_index_raize) 333 | has_in_index.__doc__ = """ 334 | Assert that the sume of two columns are equal 335 | 336 | Parameters 337 | ========== 338 | df: DataFrame 339 | obj: obj 340 | Any hashable object that would be in an index 341 | try_ix: boolean, default to False 342 | will apply an additional check to see if the object can be converted 343 | using ix's logic. 344 | try_strftime: str or boolean, defaults to False 345 | If set to a string, it will be used to attempt obj.strftime(try_strftime) 346 | If set to True, it will be used to attempt obj.strftime('%Y-%m-%d') 347 | Does nothing if try_ix is False 348 | """ 349 | 350 | def decorator_maker(self, name, *args, **kwargs): 351 | def adecorator(*args, **kwargs): 352 | def decorate(func): 353 | @wraps(func) 354 | def wrapper(*wargs, **wkwargs): 355 | result = func(*wargs, **wkwargs) 356 | ans = getattr(self, name)(result, *args, **kwargs) 357 | #if ans: 358 | # result = [result] + list(ans) 359 | # result = tuple(result) 360 | return result 361 | return wrapper 362 | return decorate 363 | return adecorator 364 | 365 | class ReturnSet(CheckSet): 366 | def __init__(self, ret=None): 367 | 368 | self.check_slc = copy(slicers.loc) 369 | self.derive_slc = copy(slicers.loc) 370 | 371 | self.ret = ret or ('orig','bool','ndframe','obj') 372 | self.raize = None 373 | self.raize_msg = None 374 | 375 | 376 | class RaiseSet(CheckSet): 377 | def __init__(self, raize=None, msg=""): 378 | 379 | self.check_slc = copy(slicers.loc) 380 | self.derive_slc = copy(slicers.loc) 381 | 382 | self.ret = ('orig',) 383 | self.raize = raize or AssertionError 384 | self.raize_msg = msg 385 | 386 | if __name__ == '__main__': 387 | 388 | df = pd.DataFrame(data=[1,2,3,4], columns=['acol']) 389 | 390 | none_missing = CheckSet().none_missing 391 | none_missing(df) 392 | 393 | none_missing_dec = CheckSet().decorator_maker('none_missing')() 394 | 395 | @none_missing_dec 396 | def myfunc(adf): 397 | return adf + 1.0 398 | 399 | print(myfunc(df)) 400 | -------------------------------------------------------------------------------- /validada/decorators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jnmclarty/validada/da806ee649d67e2ac36ad4f22493003622493ee5/validada/decorators/__init__.py -------------------------------------------------------------------------------- /validada/decorators/raising.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from validada.core import RaiseSet 3 | 4 | none_missing = RaiseSet().decorator_maker('none_missing') 5 | is_shape = RaiseSet().decorator_maker('is_shape') 6 | unique_index = RaiseSet().decorator_maker('unique_index') 7 | is_monotonic = RaiseSet().decorator_maker('is_monotonic') 8 | within_set = RaiseSet().decorator_maker('within_set') 9 | within_range = RaiseSet().decorator_maker('within_range') 10 | within_n_std = RaiseSet().decorator_maker('within_n_std') 11 | has_dtypes = RaiseSet().decorator_maker('has_dtypes') 12 | equal_columns_sum = RaiseSet().decorator_maker('equal_columns_sum') 13 | has_in_index = RaiseSet().decorator_maker('has_in_index') 14 | 15 | __all__ = [none_missing, is_monotonic, is_shape, none_missing, unique_index, 16 | within_n_std, has_dtypes, equal_columns_sum, has_in_index] 17 | -------------------------------------------------------------------------------- /validada/decorators/returning.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from validada.core import ReturnSet 3 | 4 | none_missing = ReturnSet().decorator_maker('none_missing') 5 | is_shape = ReturnSet().decorator_maker('is_shape') 6 | unique_index = ReturnSet().decorator_maker('unique_index') 7 | is_monotonic = ReturnSet().decorator_maker('is_monotonic') 8 | within_set = ReturnSet().decorator_maker('within_set') 9 | within_range = ReturnSet().decorator_maker('within_range') 10 | within_n_std = ReturnSet().decorator_maker('within_n_std') 11 | has_dtypes = ReturnSet().decorator_maker('has_dtypes') 12 | equal_columns_sum = ReturnSet().decorator_maker('equal_columns_sum') 13 | has_in_index = ReturnSet().decorator_maker('has_in_index') 14 | 15 | __all__ = [none_missing, is_monotonic, is_shape, none_missing, unique_index, 16 | within_n_std, has_dtypes, equal_columns_sum, has_in_index] 17 | -------------------------------------------------------------------------------- /validada/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jnmclarty/validada/da806ee649d67e2ac36ad4f22493003622493ee5/validada/functions/__init__.py -------------------------------------------------------------------------------- /validada/functions/raising.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from validada.core import RaiseSet 3 | 4 | none_missing = RaiseSet().none_missing 5 | is_monotonic = RaiseSet().is_monotonic 6 | is_shape = RaiseSet().is_shape 7 | unique_index = RaiseSet().unique_index 8 | within_set = RaiseSet().within_set 9 | within_range = RaiseSet().within_range 10 | within_n_std = RaiseSet().within_n_std 11 | has_dtypes = RaiseSet().has_dtypes 12 | equal_columns_sum = RaiseSet().equal_columns_sum 13 | has_in_index = RaiseSet().has_in_index 14 | 15 | __all__ = [is_monotonic, is_shape, none_missing, unique_index, within_n_std, 16 | within_range, within_set, has_dtypes, equal_columns_sum, has_in_index] 17 | 18 | -------------------------------------------------------------------------------- /validada/functions/returning.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from validada.core import ReturnSet 4 | 5 | none_missing = ReturnSet().none_missing 6 | is_monotonic = ReturnSet().is_monotonic 7 | is_shape = ReturnSet().is_shape 8 | unique_index = ReturnSet().unique_index 9 | within_set = ReturnSet().within_set 10 | within_range = ReturnSet().within_range 11 | within_n_std = ReturnSet().within_n_std 12 | has_dtypes = ReturnSet().has_dtypes 13 | equal_columns_sum = ReturnSet().equal_columns_sum 14 | has_in_index = ReturnSet().has_in_index 15 | 16 | __all__ = [is_monotonic, is_shape, none_missing, unique_index, within_n_std, 17 | within_range, within_set, has_dtypes, equal_columns_sum, has_in_index] 18 | 19 | -------------------------------------------------------------------------------- /validada/slicers.py: -------------------------------------------------------------------------------- 1 | class SliceStore(object): 2 | def __init__(self, slc=None, mode='loc'): 3 | self.slc = slc or slice(None) 4 | self.mode = mode 5 | def __getitem__(self, slc): 6 | self.slc = slc 7 | return self 8 | def __setitem__(self, _, __): 9 | raise Exception("SliceStore cannot be assigned values") 10 | def __str__(self): 11 | return "{{.{}[{}]}}".format(self.mode, repr(self.slc)) 12 | 13 | def _index_slicer_factory(defaultmode): 14 | class IndexSlicer(SliceStore): 15 | def __init__(self, slc=None, mode=defaultmode): 16 | self.slc = slc or slice(None) 17 | self.mode = mode 18 | return IndexSlicer 19 | 20 | # ix = _index_slicer_factory('ix')() 21 | iloc = _index_slicer_factory('iloc')() 22 | loc = _index_slicer_factory('loc')() 23 | 24 | if __name__ == '__main__': 25 | print(iloc) 26 | print(loc) 27 | iloc[1:10:2] 28 | print(iloc) 29 | --------------------------------------------------------------------------------