├── .codeclimate.yml ├── .coveralls.yml ├── .gitignore ├── .travis.yml ├── AUTHORS ├── CHANGES.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── example ├── pandas_validator_example_en.ipynb └── pandas_validator_example_ja.ipynb ├── pandas_validator ├── __init__.py ├── core │ ├── __init__.py │ └── exceptions.py └── validators │ ├── __init__.py │ ├── columns.py │ ├── dataframe.py │ ├── index.py │ ├── series.py │ └── test │ ├── __init__.py │ ├── test_columns.py │ ├── test_dataframe.py │ ├── test_index.py │ └── test_series.py ├── requirements ├── constraints.txt ├── dev.txt └── general.txt ├── setup.cfg ├── setup.py └── tox.ini /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | languages: 2 | Python: true 3 | exclude_paths: 4 | pandas_validator/validators/test 5 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-pro 2 | repo_token: iNMrCrbdQgCEgf4x02Q1zWelEJLmDNKCB 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # ipython notebook 60 | .ipynb_checkpoints/ 61 | 62 | # IntelliJ 63 | .idea/ 64 | 65 | # pyvenv 66 | venv/ 67 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 3.6 4 | cache: 5 | directories: 6 | - "$HOME/.tox" 7 | before_cache: 8 | - rm -f $HOME/.tox/$TOXENV/log/*.log 9 | env: 10 | - TOXENV=py27-pandas18 11 | - TOXENV=py27-pandas19 12 | - TOXENV=py34-pandas18 13 | - TOXENV=py34-pandas19 14 | - TOXENV=py36-pandas18 15 | - TOXENV=py36-pandas19 16 | - TOXENV=flake8 17 | install: 18 | - travis_retry pip install -q tox coveralls 19 | - travis_retry pip install -q -r requirements/general.txt 20 | script: 21 | - travis_retry tox 22 | after_success: 23 | - coverage run --source=pandas_validator setup.py test 24 | - coveralls 25 | notifications: 26 | email: false 27 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Origin: 2 | 3 | * Masashi Shibata 4 | 5 | Contributors: 6 | 7 | * Takumi Sueda (2016-07-08) 8 | * yubessy (2016-09-14) 9 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | CHANGES 2 | ======= 3 | 4 | 0.5.0 (2017-01-06) 5 | ------------------ 6 | 7 | * Support Python3.6 and pandas 1.8, 1.9 8 | * Does not support Python 3.4 and pandas 1.6, 1.7. 9 | * Add LambdaColumnValidator 10 | * Add IndexValidator 11 | * ``.validate(df)`` method is deprecated. Please use ``.is_valid(df, raise_exception=True)`` 12 | 13 | 0.4.0 (2015-10-28) 14 | ------------------ 15 | 16 | * Hot fix: cannot include source file 17 | 18 | 0.3.2 (2015-10-28) 19 | ------------------ 20 | 21 | * Python 2.7, 3.2, 3.3, 3.4, 3.5 support 22 | * pandas 0.14, 0.15, 0.16, 0.17 support 23 | 24 | 0.3.1 (2015-10-28) 25 | ------------------ 26 | 27 | * Update support python version 28 | * Update dependencies library version 29 | 30 | 0.3.0 (2015-07-15) 31 | ------------------ 32 | 33 | * Critical bug fix 34 | 35 | 0.2.0 (2015-05-24) 36 | ------------------ 37 | 38 | * Support char type validation 39 | * flake8 testing 40 | 41 | 0.1.0 (2015-05-22) 42 | ------------------ 43 | 44 | Initial release. 45 | 46 | * Support integer series validator 47 | * Support float series validator 48 | * Support dataframe validator 49 | 50 | * Testing on python2.7 and python 3.4 51 | 52 | 0.0.0 (2015-05-17) 53 | ------------------ 54 | 55 | Create this project. 56 | 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 MASASHI Shibata 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE *.rst -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | pandas-validator 3 | ================ 4 | 5 | .. image:: https://travis-ci.org/c-bata/pandas-validator.svg?branch=master 6 | :target: https://travis-ci.org/c-bata/pandas-validator 7 | 8 | .. image:: https://badge.fury.io/py/pandas_validator.svg 9 | :target: http://badge.fury.io/py/pandas_validator 10 | 11 | .. image:: https://coveralls.io/repos/github/c-bata/pandas-validator/badge.svg?branch=master 12 | :target: https://coveralls.io/github/c-bata/pandas-validator?branch=master 13 | :alt: Coveralls Status 14 | 15 | 16 | 17 | Validates the pandas object such as DataFrame and Series. 18 | And this can define validator like django form class. 19 | 20 | 21 | Why bugs occur in Data Wrangling with pandas 22 | -------------------------------------------- 23 | 24 | When we wrangle our data with pandas, We use `DataFrame` frequently. 25 | `DataFrame` is very powerfull and easy to handle. 26 | But `DataFrame` has no it's schema, so It allows irregular values without being aware of it. 27 | We are confused by these values and affect the results of data wrangling. 28 | 29 | `pandas-validator` offers the functions for validating `DataFrame` or `Series` objects. 30 | 31 | 32 | Overview 33 | -------- 34 | 35 | .. code-block:: python 36 | 37 | import pandas as pd 38 | import pandas_validator as pv 39 | 40 | class SampleDataFrameValidator(pv.DataFrameValidator): 41 | row_num = 5 42 | column_num = 2 43 | label1 = pv.IntegerColumnValidator('label1', min_value=0, max_value=10) 44 | label2 = pv.FloatColumnValidator('label2', min_value=0, max_value=10) 45 | 46 | validator = SampleDataFrameValidator() 47 | 48 | df = pd.DataFrame({'label1': [0, 1, 2, 3, 4], 'label2': [5.0, 6.0, 7.0, 8.0, 9.0]}) 49 | validator.is_valid(df) # True. 50 | 51 | df = pd.DataFrame({'label1': [11, 12, 13, 14, 15], 'label2': [5.0, 6.0, 7.0, 8.0, 9.0]}) 52 | validator.is_valid(df) # False. 53 | 54 | df = pd.DataFrame({'label1': [0, 1, 2], 'label2': [5.0, 6.0, 7.0]}) 55 | validator.is_valid(df) # False 56 | 57 | 58 | Getting Started 59 | =============== 60 | 61 | Requirements 62 | ------------ 63 | 64 | * Support python version: 2.7, 3.4, 3.5, 3.6 65 | * Support pandas version: 0.18, 0.19 66 | 67 | Installation 68 | ------------ 69 | 70 | .. code-block:: console 71 | 72 | $ pip install pandas_validator 73 | 74 | Usage 75 | ----- 76 | 77 | Please see the following demo written by ipython notebook. 78 | 79 | * `Demo in Japanese `_ 80 | * `Demo in English `_ 81 | 82 | 83 | License 84 | ======= 85 | 86 | This software is licensed under the MIT License. 87 | 88 | 89 | Resources 90 | ========= 91 | 92 | * `Github `_ 93 | * `PyPI `_ 94 | -------------------------------------------------------------------------------- /example/pandas_validator_example_en.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pandas-validator example\n", 8 | "\n", 9 | "This is example of pandas-validator in English." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# Please install this package using following command.\n", 19 | "# $ pip install pandas-validator\n", 20 | "import pandas_validator as pv" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import pandas as pd\n", 30 | "import numpy as np" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Series Validator" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "# Create validator's instance\n", 47 | "validator = pv.IntegerSeriesValidator(min_value=0, max_value=10)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "name": "stdout", 57 | "output_type": "stream", 58 | "text": [ 59 | "True\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "series = pd.Series([0, 3, 6, 9]) # This series is valid.\n", 65 | "print(validator.is_valid(series))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "False\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "series = pd.Series([0, 4, 8, 12]) # This series is invalid. because that includes 12 number.\n", 83 | "print(validator.is_valid(series))" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": {}, 89 | "source": [ 90 | "## DataFrame Validator\n", 91 | "\n", 92 | "DataFrameValidator class can validate panda's dataframe object.\n", 93 | "It can define easily like Django's model definition." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "# Define validator\n", 103 | "class SampleDataFrameValidator(pv.DataFrameValidator):\n", 104 | " row_num = 5\n", 105 | " column_num = 2\n", 106 | " label1 = pv.IntegerColumnValidator('label1', min_value=0, max_value=10)\n", 107 | " label2 = pv.FloatColumnValidator('label2', min_value=0, max_value=10)\n", 108 | "\n", 109 | "# Create validator's instance\n", 110 | "validator = SampleDataFrameValidator()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 7, 116 | "metadata": {}, 117 | "outputs": [ 118 | { 119 | "name": "stdout", 120 | "output_type": "stream", 121 | "text": [ 122 | "True\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "df = pd.DataFrame({'label1': [0, 1, 2, 3, 4], 'label2': [5.0, 6.0, 7.0, 8.0, 9.0]}) # This data frame is valid.\n", 128 | "print(validator.is_valid(df))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 8, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "False\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "df = pd.DataFrame({'label1': [11, 12, 13, 14, 15], 'label2': [5.0, 6.0, 7.0, 8.0, 9.0]}) # This data frame is invalid.\n", 146 | "print(validator.is_valid(df))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 9, 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "False\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "df = pd.DataFrame({'label1': [0, 1, 2], 'label2': [5.0, 6.0, 7.0]}) # This data frame is invalid.\n", 164 | "print(validator.is_valid(df))" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "metadata": {}, 171 | "outputs": [], 172 | "source": [ 173 | "" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3.0 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.4.3" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 0 198 | } -------------------------------------------------------------------------------- /example/pandas_validator_example_ja.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pandas-validator example\n", 8 | "\n", 9 | "pandas-validatorの使用例。" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "# 以下のコマンドでインストールして下さい。\n", 19 | "# $ pip install pandas-validator\n", 20 | "import pandas_validator as pv" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import pandas as pd\n", 30 | "import numpy as np" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Series Validator" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "validator = import pv.IntegerSeriesValidator(min_value=0, max_value=10)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "True\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "# バリデーションを通る例\n", 64 | "series = pd.Series([0, 3, 6, 9])\n", 65 | "print(validator.is_valid(series))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 5, 71 | "metadata": {}, 72 | "outputs": [ 73 | { 74 | "name": "stdout", 75 | "output_type": "stream", 76 | "text": [ 77 | "False\n" 78 | ] 79 | } 80 | ], 81 | "source": [ 82 | "# 12は大きいためこのseriesはバリデーションを通りません\n", 83 | "series = pd.Series([0, 4, 8, 12])\n", 84 | "print(validator.is_valid(series))" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## DataFrame Validator\n", 92 | "\n", 93 | "pandasのデータフレームオブジェクトのバリデーションをdjangoのModelやFormクラスのように定義できます。" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [], 101 | "source": [ 102 | "# バリデータの定義\n", 103 | "class SampleDataFrameValidator(pv.DataFrameValidator):\n", 104 | " row_num = 5\n", 105 | " column_num = 2\n", 106 | " label1 = pv.IntegerColumnValidator('label1', min_value=0, max_value=10)\n", 107 | " label2 = pv.FloatColumnValidator('label2', min_value=0, max_value=10)\n", 108 | "\n", 109 | "validator = SampleDataFrameValidator()" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 7, 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "name": "stdout", 119 | "output_type": "stream", 120 | "text": [ 121 | "True\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "# バリデーションを通る例\n", 127 | "df = pd.DataFrame({'label1': [0, 1, 2, 3, 4], 'label2': [5.0, 6.0, 7.0, 8.0, 9.0]})\n", 128 | "print(validator.is_valid(df))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 8, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "False\n" 141 | ] 142 | } 143 | ], 144 | "source": [ 145 | "# このケースではdf['label1']の値が0~10に収まっていないため、バリデーションを通りません。\n", 146 | "df = pd.DataFrame({'label1': [11, 12, 13, 14, 15], 'label2': [5.0, 6.0, 7.0, 8.0, 9.0]})\n", 147 | "print(validator.is_valid(df))" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 9, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "False\n" 160 | ] 161 | } 162 | ], 163 | "source": [ 164 | "# DataFrameの行数が5行でないためバリデーションを通りません。\n", 165 | "df = pd.DataFrame({'label1': [0, 1, 2], 'label2': [5.0, 6.0, 7.0]})\n", 166 | "print(validator.is_valid(df))" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "" 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3.0 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.4.3" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 0 200 | } -------------------------------------------------------------------------------- /pandas_validator/__init__.py: -------------------------------------------------------------------------------- 1 | # flake8: noqa 2 | 3 | from .validators.series import ( 4 | BaseSeriesValidator, 5 | IntegerSeriesValidator, 6 | FloatSeriesValidator, 7 | CharSeriesValidator, 8 | LambdaSeriesValidator, 9 | ) 10 | from .validators.columns import ( 11 | IntegerColumnValidator, 12 | FloatColumnValidator, 13 | CharColumnValidator, 14 | LambdaColumnValidator, 15 | ) 16 | from .validators.dataframe import ( 17 | DataFrameValidator, 18 | ) 19 | from .validators.index import ( 20 | BaseIndexValidator, 21 | IndexValidator, 22 | ColumnsValidator, 23 | ) 24 | -------------------------------------------------------------------------------- /pandas_validator/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-bata/pandas-validator/5d1387884fe0216b0ccdb1720982c7d33a4cef7e/pandas_validator/core/__init__.py -------------------------------------------------------------------------------- /pandas_validator/core/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | class ValidationError(Exception): 3 | """An error while validating data.""" 4 | def __init__(self, message): 5 | super(ValidationError, self).__init__(message) 6 | 7 | self.message = message 8 | -------------------------------------------------------------------------------- /pandas_validator/validators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-bata/pandas-validator/5d1387884fe0216b0ccdb1720982c7d33a4cef7e/pandas_validator/validators/__init__.py -------------------------------------------------------------------------------- /pandas_validator/validators/columns.py: -------------------------------------------------------------------------------- 1 | from .series import ( 2 | BaseSeriesValidator, 3 | IntegerSeriesValidator, 4 | FloatSeriesValidator, 5 | CharSeriesValidator, 6 | LambdaSeriesValidator 7 | ) 8 | 9 | 10 | class ColumnValidatorMixin(BaseSeriesValidator): 11 | def __init__(self, label, *args, **kwargs): 12 | super(ColumnValidatorMixin, self).__init__(*args, **kwargs) 13 | self.label = label 14 | 15 | def validate(self, dataframe): 16 | super(ColumnValidatorMixin, self).validate(dataframe[self.label]) 17 | 18 | 19 | class IntegerColumnValidator(ColumnValidatorMixin, IntegerSeriesValidator): 20 | pass 21 | 22 | 23 | class FloatColumnValidator(ColumnValidatorMixin, FloatSeriesValidator): 24 | pass 25 | 26 | 27 | class CharColumnValidator(ColumnValidatorMixin, CharSeriesValidator): 28 | pass 29 | 30 | 31 | class LambdaColumnValidator(ColumnValidatorMixin, LambdaSeriesValidator): 32 | pass 33 | -------------------------------------------------------------------------------- /pandas_validator/validators/dataframe.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from .columns import ColumnValidatorMixin 4 | from .index import IndexValidator, ColumnsValidator 5 | from ..core.exceptions import ValidationError 6 | 7 | 8 | class DataFrameValidator(object): 9 | index = None 10 | columns = None 11 | 12 | column_num = None # int The number of column. 13 | row_num = None # int The number of row. 14 | 15 | def __init__(self, nullable=False): 16 | self.nullable = nullable 17 | self._setup_index_and_columns_validator() 18 | 19 | def _setup_index_and_columns_validator(self): 20 | if self.row_num is not None and self.index is None: 21 | self.index = IndexValidator(size=self.row_num) 22 | 23 | if self.column_num is not None and self.columns is None: 24 | self.columns = ColumnsValidator(size=self.column_num) 25 | 26 | def _run_column_validator(self, df): 27 | fields = [getattr(self, x) for x in dir(self)] 28 | column_validators = [x for x in fields 29 | if isinstance(x, ColumnValidatorMixin)] 30 | 31 | for v in column_validators: 32 | v.validate(df) 33 | return True 34 | 35 | def _run_index_and_columns_validator(self, df): 36 | if self.index is not None: 37 | self.index.validate(df.index) 38 | 39 | if self.columns is not None: 40 | self.columns.validate(df.columns) 41 | 42 | return True 43 | 44 | def _check_dataframe_size(self, df): 45 | if self.column_num is not None and len(df.columns) != self.column_num: 46 | raise ValidationError('DataFrame columns number is not %s' 47 | % self.column_num) 48 | 49 | if self.row_num is not None and len(df.index) != self.row_num: 50 | raise ValidationError('DataFrame rows number is not %s' 51 | % self.row_num) 52 | 53 | def is_valid(self, df, raise_exception=False, **kwargs): 54 | try: 55 | for key, value in kwargs.items(): 56 | setattr(self, key, value) 57 | self._run_index_and_columns_validator(df) 58 | self._run_column_validator(df) 59 | self._check_dataframe_size(df) 60 | except ValidationError: 61 | if not raise_exception: 62 | return False 63 | raise 64 | else: 65 | return True 66 | 67 | def validate(self, df, **kwargs): 68 | warnings.warn("deprecated", DeprecationWarning) 69 | self.is_valid(df, raise_exception=True, **kwargs) 70 | -------------------------------------------------------------------------------- /pandas_validator/validators/index.py: -------------------------------------------------------------------------------- 1 | from ..core.exceptions import ValidationError 2 | 3 | 4 | class BaseIndexValidator(object): 5 | def __init__(self, size=None, type=None): 6 | self.size = size 7 | self.type = type 8 | 9 | def validate(self, index): 10 | self._check_size(index) 11 | self._check_type(index) 12 | 13 | def _check_size(self, index): 14 | if self.size is not None and index.size != self.size: 15 | raise ValidationError('Index has the different size.') 16 | 17 | def _check_type(self, index): 18 | if self.type is not None and index.dtype.type != self.type: 19 | raise ValidationError('Index has the different type.') 20 | 21 | def is_valid(self, index): 22 | try: 23 | self.validate(index) 24 | except ValidationError: 25 | return False 26 | else: 27 | return True 28 | 29 | 30 | class IndexValidator(BaseIndexValidator): 31 | pass 32 | 33 | 34 | class ColumnsValidator(BaseIndexValidator): 35 | pass 36 | -------------------------------------------------------------------------------- /pandas_validator/validators/series.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..core.exceptions import ValidationError 4 | 5 | 6 | class BaseSeriesValidator(object): 7 | def __init__(self, series_type=None): 8 | self.series_type = series_type 9 | 10 | def validate(self, series): 11 | self._check_type(series) 12 | 13 | def _check_type(self, series): 14 | if (self.series_type is not None and 15 | not series.dtype.type == self.series_type): 16 | raise ValidationError('Series has the different type variables.') 17 | 18 | def is_valid(self, series): 19 | try: 20 | self.validate(series) 21 | except ValidationError: 22 | return False 23 | else: 24 | return True 25 | 26 | 27 | class IntegerSeriesValidator(BaseSeriesValidator): 28 | def __init__(self, min_value=None, max_value=None, series_type=np.int64): 29 | super(IntegerSeriesValidator, self).__init__(series_type) 30 | 31 | self.max_value, self.min_value = max_value, min_value 32 | 33 | def validate(self, series): 34 | super(IntegerSeriesValidator, self).validate(series) 35 | 36 | if (self.max_value is not None and 37 | len(series[series > self.max_value]) > 0): 38 | raise ValidationError('Series has the value greater than max.') 39 | 40 | if (self.min_value is not None and 41 | len(series[series < self.min_value]) > 0): 42 | raise ValidationError('Series has the value smaller than min.') 43 | 44 | 45 | class FloatSeriesValidator(IntegerSeriesValidator): 46 | def __init__(self, series_type=np.float64, *args, **kwargs): 47 | super(FloatSeriesValidator, self).__init__(series_type=series_type, 48 | *args, **kwargs) 49 | 50 | 51 | class CharSeriesValidator(BaseSeriesValidator): 52 | def __init__(self, min_length=None, max_length=None, *args, **kwargs): 53 | super(CharSeriesValidator, self).__init__(*args, **kwargs) 54 | 55 | self.min_length, self.max_length = min_length, max_length 56 | 57 | def _check_type(self, series): 58 | if len(series[series.map(lambda x: not isinstance(x, str))]) > 0: 59 | raise ValidationError('Series has the different type variables.') 60 | 61 | def validate(self, series): 62 | super(CharSeriesValidator, self).validate(series) 63 | 64 | if (self.max_length is not None and 65 | series.map(lambda x: len(x)).max() > self.max_length): 66 | raise ValidationError('Series has the value longer than max.') 67 | 68 | if (self.min_length is not None and 69 | series.map(lambda x: len(x)).min() < self.min_length): 70 | raise ValidationError('Series has the value shorter than min.') 71 | 72 | 73 | class LambdaSeriesValidator(BaseSeriesValidator): 74 | def __init__(self, function, *args, **kwargs): 75 | super(LambdaSeriesValidator, self).__init__(*args, **kwargs) 76 | 77 | self.function = function 78 | 79 | def _check_type(self, series): 80 | pass 81 | 82 | def validate(self, series): 83 | super(LambdaSeriesValidator, self).validate(series) 84 | 85 | if (not self.function(series)): 86 | raise ValidationError('Validator function returned False.') 87 | -------------------------------------------------------------------------------- /pandas_validator/validators/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/c-bata/pandas-validator/5d1387884fe0216b0ccdb1720982c7d33a4cef7e/pandas_validator/validators/test/__init__.py -------------------------------------------------------------------------------- /pandas_validator/validators/test/test_columns.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pandas as pd 3 | 4 | import pandas_validator as pv 5 | 6 | 7 | class IntegerColumnValidatorTest(TestCase): 8 | def setUp(self): 9 | self.dataframe = pd.DataFrame({'label1': [0, 1], 'label2': [1., 2.]}) 10 | 11 | def test_is_valid(self): 12 | column_validator = pv.IntegerColumnValidator('label1') 13 | self.assertTrue(column_validator.is_valid(self.dataframe)) 14 | 15 | def test_is_invalid(self): 16 | column_validator = pv.IntegerColumnValidator('label2') 17 | self.assertFalse(column_validator.is_valid(self.dataframe)) 18 | 19 | 20 | class FloatColumnValidatorTest(TestCase): 21 | def setUp(self): 22 | self.dataframe = pd.DataFrame({'label1': [0., 1.], 'label2': [1, 2]}) 23 | 24 | def test_is_valid(self): 25 | column_validator = pv.FloatColumnValidator('label1') 26 | self.assertTrue(column_validator.is_valid(self.dataframe)) 27 | 28 | def test_is_invalid(self): 29 | column_validator = pv.FloatColumnValidator('label2') 30 | self.assertFalse(column_validator.is_valid(self.dataframe)) 31 | 32 | 33 | class CharColumnValidatorTest(TestCase): 34 | def setUp(self): 35 | self.dataframe = pd.DataFrame({'label1': ['', 'aa'], 'label2': [0, 1]}) 36 | 37 | def test_is_valid(self): 38 | column_validator = pv.CharColumnValidator('label1') 39 | self.assertTrue(column_validator.is_valid(self.dataframe)) 40 | 41 | def test_is_invalid(self): 42 | column_validator = pv.CharColumnValidator('label2') 43 | self.assertFalse(column_validator.is_valid(self.dataframe)) 44 | 45 | 46 | class LambdaColumnValidatorTest(TestCase): 47 | def setUp(self): 48 | self.dataframe = pd.DataFrame({'label1': [1, 'a']}) 49 | 50 | def test_is_valid_when_lambda_returns_true(self): 51 | validator = pv.LambdaColumnValidator('label1', lambda df: True) 52 | self.assertTrue(validator.is_valid(self.dataframe)) 53 | 54 | def test_is_invalid_when_lambda_returns_false(self): 55 | validator = pv.LambdaColumnValidator('label1', lambda df: False) 56 | self.assertFalse(validator.is_valid(self.dataframe)) 57 | -------------------------------------------------------------------------------- /pandas_validator/validators/test/test_dataframe.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pandas as pd 3 | import numpy as np 4 | 5 | import pandas_validator as pv 6 | 7 | 8 | class DataFrameValidatorFixture(pv.DataFrameValidator): 9 | """Fixture for testing the validation of column type.""" 10 | integer_field = pv.IntegerColumnValidator('i') 11 | float_field = pv.FloatColumnValidator('f') 12 | 13 | 14 | class DataFrameValidatorTest(TestCase): 15 | """Testing the validation of column type.""" 16 | def setUp(self): 17 | self.validator = DataFrameValidatorFixture() 18 | 19 | def test_valid(self): 20 | df = pd.DataFrame({'i': [0, 1], 'f': [0., 1.]}) 21 | self.assertTrue(self.validator.is_valid(df)) 22 | 23 | def test_invalid_when_given_integer_series_to_float_column_validator(self): 24 | df = pd.DataFrame({'i': [0, 1], 'f': [0, 1]}) 25 | self.assertFalse(self.validator.is_valid(df)) 26 | 27 | 28 | class DataFrameValidatorFixtureWithSize(pv.DataFrameValidator): 29 | """Fixture for testing the validation of column and row number.""" 30 | row_num = 3 31 | column_num = 2 32 | 33 | 34 | class DataFrameValidatorSizeTest(TestCase): 35 | """Testing the validation of column and row number.""" 36 | def setUp(self): 37 | self.validator = DataFrameValidatorFixtureWithSize() 38 | 39 | def test_valid_when_matches_row_numbers(self): 40 | df = pd.DataFrame({'x': [0, 1, 2], 'y': [1., 2., 3.]}) 41 | self.assertTrue(self.validator.is_valid(df)) 42 | 43 | def test_invalid_when_not_matches_row_numbers(self): 44 | df = pd.DataFrame({'x': [0, 1], 'y': [1., 2.]}) 45 | self.assertFalse(self.validator.is_valid(df)) 46 | 47 | def test_invalid_when_not_matches_column_numbers(self): 48 | df = pd.DataFrame({'x': [0, 1, 2], 'y': [1., 2., 3.], 'z': [1, 2, 3]}) 49 | self.assertFalse(self.validator.is_valid(df)) 50 | 51 | 52 | class DataFrameValidatorFixtureWithIndex(pv.DataFrameValidator): 53 | """Fixture for testing the validation of index validator.""" 54 | index = pv.IndexValidator(size=3, type=np.int64) 55 | 56 | 57 | class DataFrameValidatorIndexTest(TestCase): 58 | """Testing the validation of index size and type.""" 59 | def setUp(self): 60 | self.validator = DataFrameValidatorFixtureWithIndex() 61 | 62 | def test_valid_when_matches_index_size_and_type(self): 63 | df = pd.DataFrame([0, 1, 2]) 64 | self.assertTrue(self.validator.is_valid(df)) 65 | 66 | def test_invalid_when_not_matches_index_size(self): 67 | df = pd.DataFrame([0, 1, 2, 3]) 68 | self.assertFalse(self.validator.is_valid(df)) 69 | 70 | def test_invalid_when_not_matches_index_type(self): 71 | df = pd.DataFrame([0, 1, 2], index=['a', 'b', 'c']) 72 | self.assertFalse(self.validator.is_valid(df)) 73 | 74 | 75 | class DataFrameValidatorFixtureWithColumns(pv.DataFrameValidator): 76 | """Fixture for testing the validation of columns validator.""" 77 | columns = pv.ColumnsValidator(size=2, type=np.object_) 78 | 79 | 80 | class DataFrameValidatorColumnsIndexTest(TestCase): 81 | """Testing the validation of columns size and type""" 82 | def setUp(self): 83 | self.validator = DataFrameValidatorFixtureWithColumns() 84 | 85 | def test_valid_when_matches_columns_size_and_type(self): 86 | df = pd.DataFrame({'x': [0, 1, 2], 'y': [1., 2., 3.]}) 87 | self.assertTrue(self.validator.is_valid(df)) 88 | 89 | def test_invalid_when_not_matches_columns_size(self): 90 | df = pd.DataFrame({'x': [0, 1, 2], 'y': [1., 2., 3.], 'z': [1, 2, 3]}) 91 | self.assertFalse(self.validator.is_valid(df)) 92 | 93 | def test_invalid_when_not_matches_columns_type(self): 94 | df = pd.DataFrame([[0, 1, 2], [1., 2., 3.]]) 95 | self.assertFalse(self.validator.is_valid(df)) 96 | -------------------------------------------------------------------------------- /pandas_validator/validators/test/test_index.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pandas as pd 3 | import numpy as np 4 | 5 | import pandas_validator as pv 6 | from pandas_validator.core.exceptions import ValidationError 7 | 8 | 9 | class BaseIndexValidatorTest(TestCase): 10 | def setUp(self): 11 | self.validator = pv.BaseIndexValidator(size=3, type=np.int64) 12 | 13 | def test_is_valid_when_size_and_type_are_ok(self): 14 | index = pd.Index([0, 1, 2]) 15 | self.assertIsNone(self.validator.validate(index)) 16 | 17 | def test_is_invalid_when_size_is_not_ok(self): 18 | index = pd.Index([0, 1, 2, 3]) 19 | self.assertRaises(ValidationError, self.validator.validate, index) 20 | 21 | def test_is_invalid_when_type_is_not_ok(self): 22 | index = pd.Index(['a', 'b', 'c']) 23 | self.assertRaises(ValidationError, self.validator.validate, index) 24 | -------------------------------------------------------------------------------- /pandas_validator/validators/test/test_series.py: -------------------------------------------------------------------------------- 1 | from unittest import TestCase 2 | import pandas as pd 3 | import numpy as np 4 | 5 | import pandas_validator as pv 6 | from pandas_validator.core.exceptions import ValidationError 7 | 8 | 9 | class BaseSeriesValidatorTest(TestCase): 10 | def setUp(self): 11 | self.validator = pv.BaseSeriesValidator(series_type=np.int64) 12 | 13 | def test_is_valid_when_given_int64_series(self): 14 | series = pd.Series([0, 1]) 15 | self.assertTrue(self.validator.is_valid(series)) 16 | 17 | def test_is_invalid_when_given_float_series(self): 18 | series = pd.Series([0., 1.]) 19 | self.assertFalse(self.validator.is_valid(series)) 20 | 21 | def test_should_return_true_when_given_int64_series(self): 22 | series = pd.Series([0, 1]) 23 | self.assertIsNone(self.validator.validate(series)) 24 | 25 | def test_should_return_false_when_given_float_series(self): 26 | series = pd.Series([0., 1.]) 27 | self.assertRaises(ValidationError, self.validator.validate, series) 28 | 29 | 30 | class IntegerSeriesValidatorTest(TestCase): 31 | def setUp(self): 32 | self.validator = pv.IntegerSeriesValidator(min_value=0, max_value=2) 33 | 34 | def test_is_valid(self): 35 | series = pd.Series([0, 1, 2]) 36 | self.assertTrue(self.validator.is_valid(series)) 37 | 38 | def test_is_invalid_by_too_low_value(self): 39 | series = pd.Series([-1, 0, 1, 2]) 40 | self.assertFalse(self.validator.is_valid(series)) 41 | 42 | def test_is_invalid_by_too_high_value(self): 43 | series = pd.Series([0, 1, 2, 3]) 44 | self.assertFalse(self.validator.is_valid(series)) 45 | 46 | 47 | class FloatSeriesValidatorTest(TestCase): 48 | def setUp(self): 49 | self.validator = pv.FloatSeriesValidator(min_value=0, max_value=2) 50 | 51 | def test_is_valid(self): 52 | series = pd.Series([0., 1., 2.]) 53 | self.assertTrue(self.validator.is_valid(series)) 54 | 55 | def test_is_invalid_when_given_integer_series(self): 56 | series = pd.Series([0, 1, 2]) 57 | self.assertFalse(self.validator.is_valid(series)) 58 | 59 | def test_is_invalid_by_too_low_value(self): 60 | series = pd.Series([-0.1, 0., 1.]) 61 | self.assertFalse(self.validator.is_valid(series)) 62 | 63 | def test_is_invalid_by_too_high_value(self): 64 | series = pd.Series([0., 1., 2.1]) 65 | self.assertFalse(self.validator.is_valid(series)) 66 | 67 | 68 | class CharSeriesValidatorTest(TestCase): 69 | def setUp(self): 70 | self.validator = pv.CharSeriesValidator(min_length=0, max_length=4) 71 | 72 | def test_is_valid(self): 73 | series = pd.Series(['', 'ab', 'abcd']) 74 | self.assertTrue(self.validator.is_valid(series)) 75 | 76 | def test_is_invalid_when_given_integer_series(self): 77 | series = pd.Series([0, 1, 2]) 78 | self.assertFalse(self.validator.is_valid(series)) 79 | 80 | def test_is_invalid_by_too_long_length(self): 81 | series = pd.Series(['', 'ab', 'abcde']) 82 | self.assertFalse(self.validator.is_valid(series)) 83 | 84 | 85 | class LambdaSeriesValidatorTest(TestCase): 86 | def setUp(self): 87 | self.series = pd.Series([1, 'a', b'4', 2j]) 88 | 89 | def test_is_valid_when_lambda_returns_true(self): 90 | validator = pv.LambdaSeriesValidator(lambda s: True) 91 | self.assertTrue(validator.is_valid(self.series)) 92 | 93 | def test_is_invalid_when_lambda_returns_false(self): 94 | validator = pv.LambdaSeriesValidator(lambda s: False) 95 | self.assertFalse(validator.is_valid(self.series)) 96 | -------------------------------------------------------------------------------- /requirements/constraints.txt: -------------------------------------------------------------------------------- 1 | appnope==0.1.0 2 | args==0.1.0 3 | clint==0.5.1 4 | decorator==4.0.10 5 | flake8==3.2.1 6 | ipython==5.1.0 7 | ipython-genutils==0.1.0 8 | mccabe==0.5.3 9 | numpy==1.11.3 10 | pandas==0.19.2 11 | pexpect==4.2.1 12 | pickleshare==0.7.4 13 | pkginfo==1.4.1 14 | pluggy==0.4.0 15 | prompt-toolkit==1.0.9 16 | ptyprocess==0.5.1 17 | py==1.4.32 18 | pycodestyle==2.2.0 19 | pyflakes==1.3.0 20 | Pygments==2.1.3 21 | pytest==3.0.5 22 | python-dateutil==2.6.0 23 | pytz==2016.10 24 | requests==2.12.4 25 | requests-toolbelt==0.7.0 26 | simplegeneric==0.8.1 27 | six==1.10.0 28 | tox==2.5.0 29 | traitlets==4.3.1 30 | twine==1.8.1 31 | virtualenv==15.1.0 32 | wcwidth==0.1.7 33 | -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | ipython 2 | tox 3 | wheel 4 | twine 5 | pytest 6 | flake8 7 | -------------------------------------------------------------------------------- /requirements/general.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = venv/*.py,build/*.py,docs/*.py 3 | 4 | [pytest] 5 | testpaths = pandas_validator 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools import setup, find_packages 4 | from setuptools.command.test import test as TestCommand 5 | 6 | BASE_PATH = os.path.abspath(os.path.dirname(__file__)) 7 | README = open(os.path.join(BASE_PATH, 'README.rst')).read() 8 | CHANGES = open(os.path.join(BASE_PATH, 'CHANGES.rst')).read() 9 | 10 | __author__ = 'Masashi Shibata ' 11 | __version__ = '0.5.0' 12 | __license__ = 'MIT License' 13 | __author_email__ = 'contact@c-bata.link' 14 | __url__ = 'https://github.com/c-bata/pandas-validator' 15 | __description__ = 'Validate the pandas objects such as DataFrame and Series.' 16 | __classifiers__ = [ 17 | 'Development Status :: 3 - Alpha', 18 | 'Intended Audience :: Science/Research', 19 | 'Topic :: Scientific/Engineering', 20 | 'License :: OSI Approved :: MIT License', 21 | 'Programming Language :: Python', 22 | 'Programming Language :: Python :: 2', 23 | 'Programming Language :: Python :: 3', 24 | 'Programming Language :: Python :: 2.7', 25 | 'Programming Language :: Python :: 3.4', 26 | 'Programming Language :: Python :: 3.5', 27 | ] 28 | 29 | 30 | class PyTest(TestCommand): 31 | user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] 32 | 33 | def initialize_options(self): 34 | TestCommand.initialize_options(self) 35 | self.pytest_args = [] 36 | 37 | def finalize_options(self): 38 | TestCommand.finalize_options(self) 39 | self.test_args = [] 40 | self.test_suite = True 41 | 42 | def run_tests(self): 43 | # import here, cause outside the eggs aren't loaded 44 | import pytest 45 | errno = pytest.main(self.pytest_args) 46 | sys.exit(errno) 47 | 48 | 49 | setup( 50 | name='pandas_validator', 51 | version=__version__, 52 | author=__author__, 53 | author_email=__author_email__, 54 | url=__url__, 55 | description=__description__, 56 | long_description=README + '\n\n' + CHANGES, 57 | packages=find_packages(exclude=['test*']), 58 | install_requires=['pandas'], 59 | keywords='pandas validator', 60 | license=__license__, 61 | include_package_data=True, 62 | tests_require=['pytest'], 63 | cmdclass={'test': PyTest}, 64 | test_suite='pandas_validator', 65 | ) 66 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = {py27,py34,py35,py36}-pandas{18,19}, flake8, check_old_packages 3 | 4 | [testenv] 5 | basepython = 6 | py27: python2.7 7 | py34: python3.4 8 | py35: python3.5 9 | py36: python3.6 10 | deps = 11 | pytest 12 | numpy 13 | pandas18: pandas>=0.18,<0.19 14 | pandas19: pandas>=0.19,<0.20 15 | commands = python setup.py test 16 | 17 | [testenv:flake8] 18 | basepython = python3.6 19 | deps = flake8 20 | commands = flake8 pandas_validator 21 | 22 | [testenv:check_old_packages] 23 | commands = pip list -o 24 | --------------------------------------------------------------------------------