├── csvs ├── simple2.tsv ├── too-many-columns.csv ├── simple.csv ├── ints.csv ├── invalid-types.csv └── types.csv ├── csv2parquet ├── __main__.py ├── __init__.py ├── test_argparse.py ├── test_write.py └── csv2parquet.py ├── .gitignore ├── csv2parquet.sh ├── Makefile ├── .travis.yml ├── Pipfile ├── MAINTAINERS.md ├── setup.py ├── CHANGELOG.md ├── README.md ├── LICENSE ├── pylintrc └── Pipfile.lock /csvs/simple2.tsv: -------------------------------------------------------------------------------- 1 | a b 2 | 1 b 3 | -------------------------------------------------------------------------------- /csvs/too-many-columns.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | 1,2,3 3 | -------------------------------------------------------------------------------- /csvs/simple.csv: -------------------------------------------------------------------------------- 1 | a,b 2 | 1,a 3 | 2,b 4 | 3,c 5 | -------------------------------------------------------------------------------- /csv2parquet/__main__.py: -------------------------------------------------------------------------------- 1 | from .csv2parquet import main 2 | main() 3 | -------------------------------------------------------------------------------- /csvs/ints.csv: -------------------------------------------------------------------------------- 1 | int8,int16,int32 2 | 1,2,3 3 | -200,-40000,-4000000000 4 | -------------------------------------------------------------------------------- /csv2parquet/__init__.py: -------------------------------------------------------------------------------- 1 | name = "csv2parquet" # pylint: disable=invalid-name 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | 4 | build/ 5 | dist/ 6 | csv2parquet.egg-info/ 7 | .pytest_cache/ 8 | .coverage 9 | csvs/*.parquet 10 | -------------------------------------------------------------------------------- /csv2parquet.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -euo pipefail 3 | 4 | here="${BASH_SOURCE[0]}" 5 | here=$(dirname "$here") 6 | cd "$here" 7 | pipenv run python -m csv2parquet "$@" 8 | -------------------------------------------------------------------------------- /csvs/invalid-types.csv: -------------------------------------------------------------------------------- 1 | bool,float32,float64,int8,int16,int32,int64,string,timestamp 2 | 1,0.5,0.75,12,400,132000,6000000000,string,2018-07-09 3 | blah,blah,blah,blah,blah,blah,blah,blah,blah 4 | -------------------------------------------------------------------------------- /csvs/types.csv: -------------------------------------------------------------------------------- 1 | bool,float32,float64,int8,int16,int32,int64,string,timestamp 2 | 1,0.5,0.75,12,400,132000,6000000000,string,2018-07-09 3 | 0,0.6,1.75,13,401,132001,6000000001,string,2018-07-10 4 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | setup: 2 | pip install pipenv 3 | pipenv install --dev --three 4 | 5 | test: 6 | pipenv run -- pylint csv2parquet 7 | pipenv run -- pytest --cov=csv2parquet 8 | pipenv run -- codecov 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | env: 5 | global: 6 | - PIPENV_VENV_IN_PROJECT=1 7 | - PIPENV_IGNORE_VIRTUALENVS=1 8 | # command to install dependencies 9 | install: make setup 10 | # command to run tests 11 | script: make test 12 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | 3 | verify_ssl = true 4 | name = "pypi" 5 | url = "https://pypi.python.org/simple" 6 | 7 | 8 | [packages] 9 | 10 | pyarrow = "*" 11 | pylint = "*" 12 | pytest = "*" 13 | pytest-cov = "*" 14 | codecov = "*" 15 | pytz = "*" 16 | 17 | 18 | [dev-packages] 19 | 20 | -------------------------------------------------------------------------------- /MAINTAINERS.md: -------------------------------------------------------------------------------- 1 | # Maintainers & Contributors 2 | 3 | * Colin Dellow 4 | 5 | Feedback, issue reports, PRs, etc, welcome. 6 | 7 | ## Doing a release to pypi 8 | 9 | ``` 10 | rm -rf build dist csv2parquet.egg-info 11 | python3 setup.py sdist bdist_wheel 12 | 13 | # Upload to test pypi 14 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 15 | python3 -m pip install --index-url https://test.pypi.org/simple/ csv2parquet 16 | 17 | # Upload to for reals pypi 18 | twine upload dist/* 19 | ``` 20 | 21 | See also https://packaging.python.org/tutorials/packaging-projects/ 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="csv2parquet", 8 | version="0.0.9", 9 | author="Colin Dellow", 10 | author_email="cldellow@cldellow.com", 11 | description="A tool to convert CSVs to Parquet files", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/cldellow/csv2parquet", 15 | packages=setuptools.find_packages(), 16 | entry_points = { 17 | "console_scripts": ['csv2parquet = csv2parquet.csv2parquet:main'] 18 | }, 19 | classifiers=( 20 | "Programming Language :: Python :: 3", 21 | "License :: OSI Approved :: Apache Software License", 22 | "Operating System :: OS Independent", 23 | ), 24 | ) 25 | 26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changes 2 | 3 | ## 0.0.9 4 | 5 | Better error message when a row in the CSV has too many columns in it, 6 | courtesy [dazzag24](https://github.com/cldellow/csv2parquet/pull/14) 7 | 8 | ## 0.0.8 9 | 10 | Upgrade to be compatible with Arrow 0.15.1, which removed the `Column` type. 11 | 12 | ## 0.0.7 13 | 14 | Add `base64` type to interpret a base64-encoded string and store it as a binary field. 15 | 16 | ## 0.0.6 17 | 18 | Upgrade to pyarrow 0.10.0, which supports zstd. 19 | 20 | ## 0.0.5 21 | 22 | Support `--type`. 23 | 24 | ## 0.0.4 25 | 26 | Support `--include`, `--exclude`, `--rename`. 27 | 28 | ## 0.0.3 29 | 30 | Support `--rows`, sanitize column names 31 | 32 | ## 0.0.2 33 | 34 | Fix regression of `output_file` 35 | 36 | ## 0.0.1 37 | 38 | Initial pypi release. 39 | 40 | # How to do a release 41 | 42 | These are internal notes based on https://packaging.python.org/tutorials/packaging-projects/, 43 | because otherwise I'll forget how to do this. 44 | 45 | ## Package 46 | 47 | ``` 48 | python3 -m pip install --user --upgrade setuptools wheel 49 | python3 setup.py sdist bdist_wheel 50 | ``` 51 | 52 | `dist/` should now have a wheel and a tarball. 53 | 54 | ## Test upload 55 | 56 | ``` 57 | python3 -m pip install --user --upgrade twine 58 | twine upload --repository-url https://test.pypi.org/legacy/ dist/* 59 | 60 | # Now validate with: 61 | python3 -m pip install --index-url https://test.pypi.org/simple/ example_pkg 62 | ``` 63 | 64 | ## Release upload 65 | 66 | ``` 67 | twine upload dist/* 68 | ``` 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csv2parquet 2 | 3 | [![Build Status](https://travis-ci.org/cldellow/csv2parquet.svg?branch=master)](https://travis-ci.org/cldellow/csv2parquet) 4 | [![codecov](https://codecov.io/gh/cldellow/csv2parquet/branch/master/graph/badge.svg)](https://codecov.io/gh/cldellow/csv2parquet) 5 | 6 | Convert a CSV to a parquet file. You may also find [sqlite-parquet-vtable](https://github.com/cldellow/sqlite-parquet-vtable) or 7 | [parquet-metadata](https://github.com/cldellow/parquet-metadata) useful. 8 | 9 | ## Installing 10 | 11 | If you just want to use the tool: 12 | 13 | ``` 14 | sudo pip install pyarrow csv2parquet 15 | ``` 16 | 17 | If you want to clone the repo and work on the tool, install its dependencies via pipenv: 18 | 19 | ``` 20 | pipenv install 21 | ``` 22 | 23 | ## Usage 24 | 25 | Next, create some Parquet files. The tool supports CSV and TSV files. 26 | 27 | ``` 28 | usage: csv2parquet [-h] [-n ROWS] [-r ROW_GROUP_SIZE] [-o OUTPUT] [-c CODEC] 29 | [-i INCLUDE [INCLUDE ...] | -x EXCLUDE [EXCLUDE ...]] 30 | [-R RENAME [RENAME ...]] [-t TYPE [TYPE ...]] 31 | csv_file 32 | 33 | positional arguments: 34 | csv_file input file, can be CSV or TSV 35 | 36 | optional arguments: 37 | -h, --help show this help message and exit 38 | -n ROWS, --rows ROWS The number of rows to include, useful for testing. 39 | -r ROW_GROUP_SIZE, --row-group-size ROW_GROUP_SIZE 40 | The number of rows per row group. 41 | -o OUTPUT, --output OUTPUT 42 | The parquet file 43 | -c CODEC, --codec CODEC 44 | The compression codec to use (brotli, gzip, snappy, 45 | zstd, none) 46 | -i INCLUDE [INCLUDE ...], --include INCLUDE [INCLUDE ...] 47 | Include the given columns (by index or name) 48 | -x EXCLUDE [EXCLUDE ...], --exclude EXCLUDE [EXCLUDE ...] 49 | Exclude the given columns (by index or name) 50 | -R RENAME [RENAME ...], --rename RENAME [RENAME ...] 51 | Rename a column. Specify the column to be renamed and 52 | its new name, eg: 0=age or person_age=age 53 | -t TYPE [TYPE ...], --type TYPE [TYPE ...] 54 | Parse a column as a given type. Specify the column and 55 | its type, eg: 0=bool? or person_age=int8. Parse errors 56 | are fatal unless the type is followed by a question 57 | mark. Valid types are string (default), base64, bool, 58 | float32, float64, int8, int16, int32, int64, timestamp 59 | ``` 60 | 61 | ## Testing 62 | 63 | ``` 64 | pylint csv2parquet 65 | pytest 66 | ``` 67 | -------------------------------------------------------------------------------- /csv2parquet/test_argparse.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pyarrow as pa 3 | from . import csv2parquet 4 | 5 | def capture_args(_map): 6 | def func(csv_file, output_file, row_group_size, codec, rows, 7 | rename, include, exclude, raw_types): 8 | _map['csv_file'] = csv_file 9 | _map['output_file'] = output_file 10 | _map['row_group_size'] = row_group_size 11 | _map['codec'] = codec 12 | _map['rows'] = rows 13 | _map['rename'] = rename 14 | _map['include'] = include 15 | _map['exclude'] = exclude 16 | _map['raw_types'] = raw_types 17 | 18 | return func 19 | 20 | def test_argparse_csv(): 21 | _map = {} 22 | csv2parquet.main_with_args(capture_args(_map), ['foo.csv']) 23 | assert _map['csv_file'] == 'foo.csv' 24 | assert _map['output_file'] == 'foo.parquet' 25 | 26 | def test_argparse_tsv(): 27 | _map = {} 28 | csv2parquet.main_with_args(capture_args(_map), ['foo.tsv']) 29 | assert _map['csv_file'] == 'foo.tsv' 30 | assert _map['output_file'] == 'foo.parquet' 31 | assert _map['rows'] is None 32 | assert _map['raw_types'] == [] 33 | 34 | def test_argparse_types(): 35 | _map = {} 36 | csv2parquet.main_with_args(capture_args(_map), ['foo.csv', '--type', '0=string', '0=int8?']) 37 | assert _map['raw_types'] == [('0', pa.string(), False), ('0', pa.int8(), True)] 38 | 39 | def test_argparse_override(): 40 | """Can override the default values.""" 41 | _map = {} 42 | csv2parquet.main_with_args( 43 | capture_args(_map), 44 | ['foo.csv', '-o', 'output', '-c', 'somecodec', '-r', '123', '-n', '234']) 45 | assert _map['row_group_size'] == 123 46 | assert _map['codec'] == 'somecodec' 47 | assert _map['output_file'] == 'output' 48 | assert _map['rows'] == 234 49 | 50 | def test_argparse_rename(): 51 | _map = {} 52 | csv2parquet.main_with_args(capture_args(_map), ['foo.csv', '--rename', '0=foo', 'bar=baz']) 53 | assert _map['rename'] == [('0', 'foo'), ('bar', 'baz')] 54 | 55 | def test_argparse_bad_no_args(): 56 | """No args should be an error.""" 57 | with pytest.raises(SystemExit) as pytest_wrapped_e: 58 | csv2parquet.main_with_args(None, []) 59 | assert pytest_wrapped_e.type == SystemExit 60 | assert pytest_wrapped_e.value.code == 2 61 | 62 | def test_argparse_bad_inc_and_exc(): 63 | # Can't do both --include and --exclude 64 | with pytest.raises(SystemExit) as pytest_wrapped_e: 65 | csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '-i', 'foo', '-x', 'bar']) 66 | assert pytest_wrapped_e.type == SystemExit 67 | assert pytest_wrapped_e.value.code == 2 68 | 69 | def test_argparse_bad_rename(): 70 | with pytest.raises(SystemExit) as pytest_wrapped_e: 71 | csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '--rename', 'foo']) 72 | assert pytest_wrapped_e.type == SystemExit 73 | assert pytest_wrapped_e.value.code == 2 74 | 75 | def test_argparse_bad_type(): 76 | with pytest.raises(SystemExit) as pytest_wrapped_e: 77 | csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '--type', 'foo']) 78 | assert pytest_wrapped_e.type == SystemExit 79 | assert pytest_wrapped_e.value.code == 2 80 | 81 | def test_argparse_bad_type2(): 82 | with pytest.raises(SystemExit) as pytest_wrapped_e: 83 | csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '--type', 'foo=bar']) 84 | assert pytest_wrapped_e.type == SystemExit 85 | assert pytest_wrapped_e.value.code == 2 86 | -------------------------------------------------------------------------------- /csv2parquet/test_write.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import pyarrow.parquet as pq 3 | import pytest 4 | from . import csv2parquet 5 | 6 | def test_write_from_csv(): 7 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv']) 8 | pqf = pq.ParquetFile('csvs/simple.parquet') 9 | assert pqf.num_row_groups == 1 10 | schema = pqf.schema 11 | assert schema.names == ['a', 'b'] 12 | assert schema.column(0).logical_type.type == 'STRING' 13 | assert schema.column(1).logical_type.type == 'STRING' 14 | row_group = pqf.read_row_group(0) 15 | assert row_group.num_rows == 3 16 | row_group = pqf.read_row_group(0) 17 | assert row_group.num_rows == 3 18 | col_a = row_group.column(0).to_pylist() 19 | assert col_a == ['1', '2', '3'] 20 | col_b = row_group.column(1).to_pylist() 21 | assert col_b == ['a', 'b', 'c'] 22 | 23 | 24 | def test_write_from_tsv(): 25 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple2.tsv']) 26 | pqf = pq.ParquetFile('csvs/simple2.parquet') 27 | assert pqf.num_row_groups == 1 28 | schema = pqf.schema 29 | assert schema.names == ['a', 'b'] 30 | assert schema.column(0).logical_type.type == 'STRING' 31 | assert schema.column(1).logical_type.type == 'STRING' 32 | row_group = pqf.read_row_group(0) 33 | assert row_group.num_rows == 1 34 | col_a = row_group.column(0).to_pylist() 35 | assert col_a == ['1'] 36 | col_b = row_group.column(1).to_pylist() 37 | assert col_b == ['b'] 38 | 39 | def test_write_rename(): 40 | csv2parquet.main_with_args(csv2parquet.convert, 41 | ['csvs/simple.csv', '--rename', '0=alpha', 'b=bee']) 42 | pqf = pq.ParquetFile('csvs/simple.parquet') 43 | schema = pqf.schema 44 | assert schema.names == ['alpha', 'bee'] 45 | 46 | def test_write_row_group_size(): 47 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--row-group-size', '1']) 48 | pqf = pq.ParquetFile('csvs/simple.parquet') 49 | assert pqf.num_row_groups == 3 50 | 51 | def test_write_limit(): 52 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--rows', '1']) 53 | pqf = pq.ParquetFile('csvs/simple.parquet') 54 | row_group = pqf.read_row_group(0) 55 | assert row_group.num_rows == 1 56 | 57 | def test_write_include_by_name(): 58 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--include', 'a']) 59 | pqf = pq.ParquetFile('csvs/simple.parquet') 60 | schema = pqf.schema 61 | assert schema.names == ['a'] 62 | row_group = pqf.read_row_group(0) 63 | assert row_group.num_rows == 3 64 | col_a = row_group.column(0).to_pylist() 65 | assert col_a == ['1', '2', '3'] 66 | 67 | def test_write_include_by_index(): 68 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--include', '0']) 69 | pqf = pq.ParquetFile('csvs/simple.parquet') 70 | schema = pqf.schema 71 | assert schema.names == ['a'] 72 | row_group = pqf.read_row_group(0) 73 | assert row_group.num_rows == 3 74 | col_a = row_group.column(0).to_pylist() 75 | assert col_a == ['1', '2', '3'] 76 | 77 | def test_write_exclude_by_name(): 78 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--exclude', 'a']) 79 | pqf = pq.ParquetFile('csvs/simple.parquet') 80 | schema = pqf.schema 81 | assert schema.names == ['b'] 82 | row_group = pqf.read_row_group(0) 83 | assert row_group.num_rows == 3 84 | col_b = row_group.column(0).to_pylist() 85 | assert col_b == ['a', 'b', 'c'] 86 | 87 | def test_write_exclude_by_index(): 88 | csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--exclude', '0']) 89 | pqf = pq.ParquetFile('csvs/simple.parquet') 90 | schema = pqf.schema 91 | assert schema.names == ['b'] 92 | row_group = pqf.read_row_group(0) 93 | assert row_group.num_rows == 3 94 | col_b = row_group.column(0).to_pylist() 95 | assert col_b == ['a', 'b', 'c'] 96 | 97 | def test_sanitize_column_name(): 98 | assert csv2parquet.sanitize_column_name('foo') == 'foo' 99 | assert csv2parquet.sanitize_column_name(' foo ') == 'foo' 100 | assert csv2parquet.sanitize_column_name('foo bar') == 'foo_bar' 101 | assert csv2parquet.sanitize_column_name('foo bar') == 'foo_bar' 102 | assert csv2parquet.sanitize_column_name('PostalCode') == 'postalcode' 103 | 104 | def test_required_types(): 105 | csv2parquet.main_with_args(csv2parquet.convert, 106 | ['csvs/types.csv', '--type', 107 | 'bool=bool', 'float32=float32', 'float64=float64', 'int8=int8', 108 | 'int16=int16', 'int32=int32', 'int64=int64', 'string=string', 109 | 'timestamp=timestamp']) 110 | pqf = pq.ParquetFile('csvs/types.parquet') 111 | schema = pqf.schema 112 | assert schema.names == ['bool', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 113 | 'string', 'timestamp'] 114 | row_group = pqf.read_row_group(0) 115 | assert row_group.num_rows == 2 116 | bools = row_group.column(0).to_pylist() 117 | assert bools == [True, False] 118 | float32 = row_group.column(1).to_pylist() 119 | assert float32 == pytest.approx([0.5, 0.6]) 120 | float64 = row_group.column(2).to_pylist() 121 | assert float64 == [0.75, 1.75] 122 | int8 = row_group.column(3).to_pylist() 123 | assert int8 == [12, 13] 124 | int16 = row_group.column(4).to_pylist() 125 | assert int16 == [400, 401] 126 | int32 = row_group.column(5).to_pylist() 127 | assert int32 == [132000, 132001] 128 | int64 = row_group.column(6).to_pylist() 129 | assert int64 == [6000000000, 6000000001] 130 | string = row_group.column(7).to_pylist() 131 | assert string == ['string', 'string'] 132 | timestamp = row_group.column(8).to_pylist() 133 | assert timestamp == [datetime(2018, 7, 9, 0, 0), datetime(2018, 7, 10, 0, 0)] 134 | 135 | def test_required_invalid_types(): 136 | with pytest.raises(ValueError): 137 | csv2parquet.main_with_args(csv2parquet.convert, 138 | ['csvs/invalid-types.csv', '--type', 139 | 'bool=bool', 'float32=float32', 'float64=float64', 'int8=int8', 140 | 'int16=int16', 'int32=int32', 'int64=int64', 'string=string', 141 | 'timestamp=timestamp']) 142 | 143 | def test_too_many_columns(): 144 | with pytest.raises(IndexError): 145 | csv2parquet.main_with_args(csv2parquet.convert, 146 | ['csvs/too-many-columns.csv']) 147 | 148 | def test_opt_invalid_types(): 149 | csv2parquet.main_with_args(csv2parquet.convert, 150 | ['csvs/invalid-types.csv', '--type', 151 | 'bool=bool?', 'float32=float32?', 'float64=float64?', 'int8=int8?', 152 | 'int16=int16?', 'int32=int32?', 'int64=int64?', 'string=string?', 153 | 'timestamp=timestamp?']) 154 | pqf = pq.ParquetFile('csvs/invalid-types.parquet') 155 | schema = pqf.schema 156 | assert schema.names == ['bool', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64', 157 | 'string', 'timestamp'] 158 | row_group = pqf.read_row_group(0) 159 | assert row_group.num_rows == 2 160 | bools = row_group.column(0).to_pylist() 161 | assert bools == [True, None] 162 | float32 = row_group.column(1).to_pylist() 163 | assert len(float32) == 2 164 | assert float32[0] == pytest.approx(0.5) 165 | assert float32[1] is None 166 | float64 = row_group.column(2).to_pylist() 167 | assert float64 == [0.75, None] 168 | int8 = row_group.column(3).to_pylist() 169 | assert int8 == [12, None] 170 | int16 = row_group.column(4).to_pylist() 171 | assert int16 == [400, None] 172 | int32 = row_group.column(5).to_pylist() 173 | assert int32 == [132000, None] 174 | int64 = row_group.column(6).to_pylist() 175 | assert int64 == [6000000000, None] 176 | string = row_group.column(7).to_pylist() 177 | assert string == ['string', 'blah'] 178 | timestamp = row_group.column(8).to_pylist() 179 | assert timestamp == [datetime(2018, 7, 9, 0, 0), None] 180 | 181 | def test_required_invalid_ints(): 182 | with pytest.raises(ValueError): 183 | csv2parquet.main_with_args(csv2parquet.convert, 184 | ['csvs/ints.csv', '--type', 185 | 'int8=int8', 'int16=int16', 'int32=int32']) 186 | 187 | def test_opt_invalid_ints(): 188 | csv2parquet.main_with_args(csv2parquet.convert, 189 | ['csvs/ints.csv', '--type', 190 | 'int8=int8?', 'int16=int16?', 'int32=int32?']) 191 | pqf = pq.ParquetFile('csvs/ints.parquet') 192 | schema = pqf.schema 193 | assert schema.names == ['int8', 'int16', 'int32'] 194 | row_group = pqf.read_row_group(0) 195 | assert row_group.num_rows == 2 196 | int8 = row_group.column(0).to_pylist() 197 | assert int8 == [1, None] 198 | int16 = row_group.column(1).to_pylist() 199 | assert int16 == [2, None] 200 | int32 = row_group.column(2).to_pylist() 201 | assert int32 == [3, None] 202 | -------------------------------------------------------------------------------- /csv2parquet/csv2parquet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import csv 3 | import re 4 | import sys 5 | from datetime import datetime 6 | from base64 import standard_b64decode 7 | import pyarrow as pa 8 | import pyarrow.parquet as pq 9 | 10 | PA_BOOL = pa.bool_() 11 | PA_FLOAT32 = pa.float32() 12 | PA_FLOAT64 = pa.float64() 13 | PA_INT8 = pa.int8() 14 | PA_INT16 = pa.int16() 15 | PA_INT32 = pa.int32() 16 | PA_INT64 = pa.int64() 17 | PA_STRING = pa.string() 18 | PA_TIMESTAMP = pa.timestamp('ns') 19 | PA_BINARY = pa.binary() 20 | 21 | def get_delimiter(csv_file): 22 | if csv_file[-4:] == '.tsv': 23 | return '\t' 24 | return ',' 25 | 26 | def sanitize_column_name(name): 27 | cleaned = re.sub('[^a-z0-9]', '_', name.lower()) 28 | cleaned = re.sub('__*', '_', cleaned) 29 | cleaned = re.sub('^_*', '', cleaned) 30 | cleaned = re.sub('_*$', '', cleaned) 31 | return cleaned 32 | 33 | def get_column_names(csv_file, rename): 34 | with open(csv_file) as csvfile: 35 | spamreader = csv.reader(csvfile, delimiter=get_delimiter(csv_file)) 36 | column_names = [] 37 | for row in spamreader: 38 | for idx, col in enumerate(row): 39 | clean = sanitize_column_name(col) 40 | for old, new in rename: 41 | if old == clean or old == str(idx): 42 | clean = new 43 | column_names.append(clean) 44 | return column_names 45 | 46 | def get_pyarrow_types(): 47 | return { 48 | 'bool': PA_BOOL, 49 | 'float32': PA_FLOAT32, 50 | 'float64': PA_FLOAT64, 51 | 'int8': PA_INT8, 52 | 'int16': PA_INT16, 53 | 'int32': PA_INT32, 54 | 'int64': PA_INT64, 55 | 'string': PA_STRING, 56 | 'timestamp': PA_TIMESTAMP, 57 | 'base64': PA_BINARY 58 | } 59 | 60 | # pylint: disable=too-many-branches,too-many-statements 61 | def convert(csv_file, output_file, row_group_size, codec, max_rows, 62 | rename, include, exclude, raw_types): 63 | column_names = get_column_names(csv_file, rename) 64 | columns = [[] for x in column_names] 65 | arrs = [[] for x in column_names] 66 | dropped_values = [0 for x in column_names] 67 | dropped_value_examples = [[] for x in column_names] 68 | 69 | types = [] 70 | for idx, name in enumerate(column_names): 71 | opt = False 72 | column_type = pa.string() # default to string if unspecified 73 | for target, new_type, new_opt in raw_types: 74 | if str(idx) == target or name == target: 75 | opt = new_opt 76 | column_type = new_type 77 | 78 | types.append((column_type, opt)) 79 | 80 | if include: 81 | keep = [value in include or str(idx) in include 82 | for idx, value in enumerate(column_names)] 83 | else: 84 | keep = [not (value in exclude or str(idx) in exclude) 85 | for idx, value in enumerate(column_names)] 86 | 87 | def add_arrays(cols): 88 | for colnum, col in enumerate(cols): 89 | arr = pa.array(col, type=types[colnum][0]) 90 | arrs[colnum].append(arr) 91 | 92 | with open(csv_file) as csvfile: 93 | spamreader = csv.reader(csvfile, delimiter=get_delimiter(csv_file)) 94 | rownum = -1 95 | for row in spamreader: 96 | rownum = rownum + 1 97 | if rownum == 0: 98 | continue 99 | idx = -1 100 | for value in row: 101 | idx += 1 102 | try: 103 | if not keep[idx]: 104 | continue 105 | 106 | expected_type = types[idx][0] 107 | if expected_type == PA_STRING: 108 | pass 109 | elif expected_type == PA_BOOL: 110 | if value in ('0', 'N', 'F', 'false'): 111 | value = False 112 | elif value in ('1', 'Y', 'T', 'true'): 113 | value = True 114 | else: 115 | raise ValueError() 116 | elif expected_type in (PA_FLOAT32, PA_FLOAT64): 117 | value = float(value) 118 | elif expected_type == PA_INT8: 119 | value = int(value) 120 | if value < -128 or value > 127: 121 | raise ValueError() 122 | elif expected_type == PA_INT16: 123 | value = int(value) 124 | if value < -32768 or value > 32767: 125 | raise ValueError() 126 | elif expected_type == PA_INT32: 127 | value = int(value) 128 | if value < -2147483648 or value > 2147483647: 129 | raise ValueError() 130 | elif expected_type == PA_INT64: 131 | value = int(value) 132 | elif expected_type == PA_TIMESTAMP: 133 | # Currently only support YYYY-MM-DD dates. 134 | comps = value.split('-') 135 | if len(comps) != 3: 136 | raise ValueError() 137 | value = datetime(int(comps[0]), int(comps[1]), int(comps[2])) 138 | elif expected_type == PA_BINARY: 139 | value = standard_b64decode(value) 140 | 141 | except ValueError: 142 | if types[idx][1]: 143 | dropped_values[idx] += 1 144 | if dropped_values[idx] < 10: 145 | dropped_value_examples[idx].append(str(value)) 146 | value = None 147 | else: 148 | raise ValueError('unexpected value for column {}, type {}: {}' 149 | .format(column_names[idx], expected_type, str(value))) 150 | except IndexError: 151 | raise IndexError('Too many columns {} for row {}'.format(idx, rownum)) 152 | 153 | columns[idx].append(value) 154 | if rownum % 10000 == 0: 155 | add_arrays(columns) 156 | columns = [[] for x in range(len(column_names))] 157 | 158 | if rownum == max_rows: 159 | break 160 | 161 | if columns and any(columns): 162 | add_arrays(columns) 163 | 164 | data = [ 165 | pa.array([item.as_py() for sublist in arr for item in sublist], type=types[idx][0]) if keep[idx] else None 166 | for idx, arr in enumerate(arrs)] 167 | data = [x for x in data if x is not None] 168 | batch = pa.RecordBatch.from_arrays(data, [column_names[x] for x in range(len(arrs)) if keep[x]]) 169 | table = pa.Table.from_batches([batch]) 170 | 171 | pq.write_table(table, 172 | output_file, 173 | version='1.0', 174 | compression=codec, 175 | use_dictionary=True, 176 | row_group_size=row_group_size) 177 | 178 | def main_with_args(func, argv): 179 | parser = argparse.ArgumentParser() 180 | parser.add_argument('csv_file', help="input file, can be CSV or TSV") 181 | parser.add_argument('-n', '--rows', type=int, 182 | help='The number of rows to include, useful for testing.', nargs=1) 183 | parser.add_argument('-r', '--row-group-size', default=[10000], type=int, 184 | help='The number of rows per row group.', nargs=1) 185 | parser.add_argument('-o', '--output', help='The parquet file', nargs=1) 186 | parser.add_argument('-c', '--codec', default=['snappy'], 187 | help='The compression codec to use (brotli, gzip, snappy, zstd, none)', nargs=1) 188 | group = parser.add_mutually_exclusive_group() 189 | group.add_argument('-i', '--include', default=[], 190 | help='Include the given columns (by index or name)', nargs='+') 191 | group.add_argument('-x', '--exclude', default=[], 192 | help='Exclude the given columns (by index or name)', nargs='+') 193 | 194 | parser.add_argument('-R', '--rename', default=[], nargs='+', 195 | help='Rename a column. Specify the column to be renamed and its new name,' + 196 | ' eg: 0=age or person_age=age') 197 | parser.add_argument('-t', '--type', default=[], nargs='+', 198 | help='Parse a column as a given type. Specify the column and its type,' + 199 | ' eg: 0=bool? or person_age=int8. Parse errors are fatal unless the type' + 200 | ' is followed by a question mark. Valid types are string (default), base64, bool,' + 201 | ' float32, float64, int8, int16, int32, int64, timestamp') 202 | 203 | args = parser.parse_args(argv) 204 | output = args.output 205 | if output is None: 206 | output = args.csv_file 207 | output = re.sub(r'\.tsv$|\.csv$', '', output) 208 | output = output + '.parquet' 209 | else: 210 | output = output[0] 211 | 212 | for i in range(len(args.rename)): 213 | haystack = args.rename[i] 214 | needle = haystack.find('=') 215 | if needle == -1: 216 | print(haystack + ' is not a valid option for --rename, it must have the form') 217 | print('colspec=new-name, where colspec is a numeric index or the original name.') 218 | sys.exit(2) 219 | 220 | args.rename[i] = (haystack[:needle], haystack[needle + 1:]) 221 | 222 | for i in range(len(args.type)): 223 | haystack = args.type[i] 224 | needle = haystack.find('=') 225 | if needle == -1: 226 | print(haystack + ' is not a valid option for --type, it must have the form') 227 | print('colspec=type, where colspec is a numeric index or the original name.') 228 | sys.exit(2) 229 | 230 | opt = haystack[-1] == '?' 231 | if opt: 232 | haystack = haystack[:-1] 233 | 234 | column_type_raw = haystack[needle + 1:] 235 | column_type = get_pyarrow_types().get(column_type_raw, None) 236 | if column_type is None: 237 | print(haystack + ' is not a valid option for --type. ' + 238 | column_type_raw + ' is unknown.') 239 | sys.exit(2) 240 | 241 | 242 | args.type[i] = (haystack[:needle], column_type, opt) 243 | 244 | args.rows = args.rows[0] if args.rows else None 245 | args.row_group_size = args.row_group_size[0] 246 | args.codec = args.codec[0] 247 | func(args.csv_file, 248 | output, 249 | args.row_group_size, 250 | args.codec, 251 | args.rows, 252 | args.rename, 253 | args.include, 254 | args.exclude, 255 | args.type) 256 | 257 | def main(): 258 | main_with_args(convert, sys.argv[1:]) 259 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Add files or directories to the blacklist. They should be base names, not 11 | # paths. 12 | ignore=CVS 13 | 14 | # Pickle collected data for later comparisons. 15 | persistent=yes 16 | 17 | # List of plugins (as comma separated values of python modules names) to load, 18 | # usually to register additional checkers. 19 | load-plugins= 20 | 21 | # Use multiple processes to speed up Pylint. 22 | jobs=1 23 | 24 | # Allow loading of arbitrary C extensions. Extensions are imported into the 25 | # active Python interpreter and may run arbitrary code. 26 | unsafe-load-any-extension=no 27 | 28 | # A comma-separated list of package or module names from where C extensions may 29 | # be loaded. Extensions are loading into the active Python interpreter and may 30 | # run arbitrary code 31 | extension-pkg-whitelist= 32 | 33 | # Allow optimization of some AST trees. This will activate a peephole AST 34 | # optimizer, which will apply various small optimizations. For instance, it can 35 | # be used to obtain the result of joining multiple strings with the addition 36 | # operator. Joining a lot of strings can lead to a maximum recursion error in 37 | # Pylint and this flag can prevent that. It has one side effect, the resulting 38 | # AST will be different than the one from reality. 39 | optimize-ast=no 40 | 41 | 42 | [MESSAGES CONTROL] 43 | 44 | # Only show warnings with the listed confidence levels. Leave empty to show 45 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 46 | confidence= 47 | 48 | # Enable the message, report, category or checker with the given id(s). You can 49 | # either give multiple identifier separated by comma (,) or put this option 50 | # multiple time. See also the "--disable" option for examples. 51 | #enable= 52 | 53 | # Disable the message, report, category or checker with the given id(s). You 54 | # can either give multiple identifiers separated by comma (,) or put this 55 | # option multiple times (only on the command line, not in the configuration 56 | # file where it should appear only once).You can also use "--disable=all" to 57 | # disable everything first and then reenable specific checks. For example, if 58 | # you want to run only the similarities checker, you can use "--disable=all 59 | # --enable=similarities". If you want to run only the classes checker, but have 60 | # no Warning level messages displayed, use"--disable=all --enable=classes 61 | # --disable=W" 62 | disable=too-many-arguments,too-many-locals,locally-disabled,missing-docstring,import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating 63 | 64 | 65 | [REPORTS] 66 | 67 | # Set the output format. Available formats are text, parseable, colorized, msvs 68 | # (visual studio) and html. You can also give a reporter class, eg 69 | # mypackage.mymodule.MyReporterClass. 70 | output-format=text 71 | 72 | # Put messages in a separate file for each module / package specified on the 73 | # command line instead of printing them on stdout. Reports (if any) will be 74 | # written in a file name "pylint_global.[txt|html]". 75 | files-output=no 76 | 77 | # Tells whether to display a full report or only the messages 78 | reports=yes 79 | 80 | # Python expression which should return a note less than 10 (10 is the highest 81 | # note). You have access to the variables errors warning, statement which 82 | # respectively contain the number of errors / warnings messages and the total 83 | # number of statements analyzed. This is used by the global evaluation report 84 | # (RP0004). 85 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 86 | 87 | # Template used to display messages. This is a python new-style format string 88 | # used to format the message information. See doc for all details 89 | #msg-template= 90 | 91 | 92 | [BASIC] 93 | 94 | # List of builtins function names that should not be used, separated by a comma 95 | bad-functions=map,filter,input 96 | 97 | # Good variable names which should always be accepted, separated by a comma 98 | good-names=i,j,k,ex,Run,_ 99 | 100 | # Bad variable names which should always be refused, separated by a comma 101 | bad-names=foo,bar,baz,toto,tutu,tata 102 | 103 | # Colon-delimited sets of names that determine each other's naming style when 104 | # the name regexes allow several styles. 105 | name-group= 106 | 107 | # Include a hint for the correct naming format with invalid-name 108 | include-naming-hint=no 109 | 110 | # Regular expression matching correct function names 111 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 112 | 113 | # Naming hint for function names 114 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 115 | 116 | # Regular expression matching correct variable names 117 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 118 | 119 | # Naming hint for variable names 120 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 121 | 122 | # Regular expression matching correct constant names 123 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 124 | 125 | # Naming hint for constant names 126 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 127 | 128 | # Regular expression matching correct attribute names 129 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 130 | 131 | # Naming hint for attribute names 132 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 133 | 134 | # Regular expression matching correct argument names 135 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 136 | 137 | # Naming hint for argument names 138 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 139 | 140 | # Regular expression matching correct class attribute names 141 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 142 | 143 | # Naming hint for class attribute names 144 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 145 | 146 | # Regular expression matching correct inline iteration names 147 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 148 | 149 | # Naming hint for inline iteration names 150 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 151 | 152 | # Regular expression matching correct class names 153 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 154 | 155 | # Naming hint for class names 156 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 157 | 158 | # Regular expression matching correct module names 159 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 160 | 161 | # Naming hint for module names 162 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 163 | 164 | # Regular expression matching correct method names 165 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 166 | 167 | # Naming hint for method names 168 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 169 | 170 | # Regular expression which should only match function or class names that do 171 | # not require a docstring. 172 | no-docstring-rgx=^_ 173 | 174 | # Minimum line length for functions/classes that require docstrings, shorter 175 | # ones are exempt. 176 | docstring-min-length=-1 177 | 178 | 179 | [ELIF] 180 | 181 | # Maximum number of nested blocks for function / method body 182 | max-nested-blocks=5 183 | 184 | 185 | [LOGGING] 186 | 187 | # Logging modules to check that the string format arguments are in logging 188 | # function parameter format 189 | logging-modules=logging 190 | 191 | 192 | [FORMAT] 193 | 194 | # Maximum number of characters on a single line. 195 | max-line-length=120 196 | 197 | # Regexp for a line that is allowed to be longer than the limit. 198 | ignore-long-lines=^\s*(# )??$ 199 | 200 | # Allow the body of an if to be on the same line as the test if there is no 201 | # else. 202 | single-line-if-stmt=no 203 | 204 | # List of optional constructs for which whitespace checking is disabled. `dict- 205 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 206 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 207 | # `empty-line` allows space-only lines. 208 | no-space-check=trailing-comma,dict-separator 209 | 210 | # Maximum number of lines in a module 211 | max-module-lines=1000 212 | 213 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 214 | # tab). 215 | indent-string=' ' 216 | 217 | # Number of spaces of indent required inside a hanging or continued line. 218 | indent-after-paren=4 219 | 220 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 221 | expected-line-ending-format= 222 | 223 | 224 | [SPELLING] 225 | 226 | # Spelling dictionary name. Available dictionaries: none. To make it working 227 | # install python-enchant package. 228 | spelling-dict= 229 | 230 | # List of comma separated words that should not be checked. 231 | spelling-ignore-words= 232 | 233 | # A path to a file that contains private dictionary; one word per line. 234 | spelling-private-dict-file= 235 | 236 | # Tells whether to store unknown words to indicated private dictionary in 237 | # --spelling-private-dict-file option instead of raising a message. 238 | spelling-store-unknown-words=no 239 | 240 | 241 | [MISCELLANEOUS] 242 | 243 | # List of note tags to take in consideration, separated by a comma. 244 | notes=FIXME,XXX,TODO 245 | 246 | 247 | [VARIABLES] 248 | 249 | # Tells whether we should check for unused import in __init__ files. 250 | init-import=no 251 | 252 | # A regular expression matching the name of dummy variables (i.e. expectedly 253 | # not used). 254 | dummy-variables-rgx=_$|dummy 255 | 256 | # List of additional names supposed to be defined in builtins. Remember that 257 | # you should avoid to define new builtins when possible. 258 | additional-builtins= 259 | 260 | # List of strings which can identify a callback function by name. A callback 261 | # name must start or end with one of those strings. 262 | callbacks=cb_,_cb 263 | 264 | 265 | [TYPECHECK] 266 | 267 | # Tells whether missing members accessed in mixin class should be ignored. A 268 | # mixin class is detected if its name ends with "mixin" (case insensitive). 269 | ignore-mixin-members=yes 270 | 271 | # List of module names for which member attributes should not be checked 272 | # (useful for modules/projects where namespaces are manipulated during runtime 273 | # and thus existing member attributes cannot be deduced by static analysis. It 274 | # supports qualified module names, as well as Unix pattern matching. 275 | ignored-modules= 276 | 277 | # List of classes names for which member attributes should not be checked 278 | # (useful for classes with attributes dynamically set). This supports can work 279 | # with qualified names. 280 | ignored-classes= 281 | 282 | # List of members which are set dynamically and missed by pylint inference 283 | # system, and so shouldn't trigger E1101 when accessed. Python regular 284 | # expressions are accepted. 285 | generated-members= 286 | 287 | 288 | [SIMILARITIES] 289 | 290 | # Minimum lines number of a similarity. 291 | min-similarity-lines=4 292 | 293 | # Ignore comments when computing similarities. 294 | ignore-comments=yes 295 | 296 | # Ignore docstrings when computing similarities. 297 | ignore-docstrings=yes 298 | 299 | # Ignore imports when computing similarities. 300 | ignore-imports=no 301 | 302 | 303 | [CLASSES] 304 | 305 | # List of method names used to declare (i.e. assign) instance attributes. 306 | defining-attr-methods=__init__,__new__,setUp 307 | 308 | # List of valid names for the first argument in a class method. 309 | valid-classmethod-first-arg=cls 310 | 311 | # List of valid names for the first argument in a metaclass class method. 312 | valid-metaclass-classmethod-first-arg=mcs 313 | 314 | # List of member names, which should be excluded from the protected access 315 | # warning. 316 | exclude-protected=_asdict,_fields,_replace,_source,_make 317 | 318 | 319 | [IMPORTS] 320 | 321 | # Deprecated modules which should not be used, separated by a comma 322 | deprecated-modules=regsub,TERMIOS,Bastion,rexec 323 | 324 | # Create a graph of every (i.e. internal and external) dependencies in the 325 | # given file (report RP0402 must not be disabled) 326 | import-graph= 327 | 328 | # Create a graph of external dependencies in the given file (report RP0402 must 329 | # not be disabled) 330 | ext-import-graph= 331 | 332 | # Create a graph of internal dependencies in the given file (report RP0402 must 333 | # not be disabled) 334 | int-import-graph= 335 | 336 | 337 | [DESIGN] 338 | 339 | # Maximum number of arguments for function / method 340 | max-args=5 341 | 342 | # Argument names that match this expression will be ignored. Default to name 343 | # with leading underscore 344 | ignored-argument-names=_.* 345 | 346 | # Maximum number of locals for function / method body 347 | max-locals=15 348 | 349 | # Maximum number of return / yield for function / method body 350 | max-returns=6 351 | 352 | # Maximum number of branch for function / method body 353 | max-branches=12 354 | 355 | # Maximum number of statements in function / method body 356 | max-statements=50 357 | 358 | # Maximum number of parents for a class (see R0901). 359 | max-parents=7 360 | 361 | # Maximum number of attributes for a class (see R0902). 362 | max-attributes=7 363 | 364 | # Minimum number of public methods for a class (see R0903). 365 | min-public-methods=2 366 | 367 | # Maximum number of public methods for a class (see R0904). 368 | max-public-methods=20 369 | 370 | # Maximum number of boolean expressions in a if statement 371 | max-bool-expr=5 372 | 373 | 374 | [EXCEPTIONS] 375 | 376 | # Exceptions that will emit a warning when being caught. Defaults to 377 | # "Exception" 378 | overgeneral-exceptions=Exception 379 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "042eb59af1add84b9b64485d9844d8d9f5eae89e83f6824441449783cfb7fae8" 5 | }, 6 | "host-environment-markers": { 7 | "implementation_name": "cpython", 8 | "implementation_version": "3.5.2", 9 | "os_name": "posix", 10 | "platform_machine": "x86_64", 11 | "platform_python_implementation": "CPython", 12 | "platform_release": "4.9.3-040903-generic", 13 | "platform_system": "Linux", 14 | "platform_version": "#201701120631 SMP Thu Jan 12 11:33:59 UTC 2017", 15 | "python_full_version": "3.5.2", 16 | "python_version": "3.5", 17 | "sys_platform": "linux" 18 | }, 19 | "pipfile-spec": 6, 20 | "requires": {}, 21 | "sources": [ 22 | { 23 | "name": "pypi", 24 | "url": "https://pypi.python.org/simple", 25 | "verify_ssl": true 26 | } 27 | ] 28 | }, 29 | "default": { 30 | "astroid": { 31 | "hashes": [ 32 | "sha256:840947ebfa8b58f318d42301cf8c0a20fd794a33b61cc4638e28e9e61ba32f42", 33 | "sha256:71ea07f44df9568a75d0f354c49143a4575d90645e9fead6dfb52c26a85ed13a" 34 | ], 35 | "version": "==2.3.3" 36 | }, 37 | "atomicwrites": { 38 | "hashes": [ 39 | "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", 40 | "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" 41 | ], 42 | "version": "==1.3.0" 43 | }, 44 | "attrs": { 45 | "hashes": [ 46 | "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c", 47 | "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72" 48 | ], 49 | "version": "==19.3.0" 50 | }, 51 | "certifi": { 52 | "hashes": [ 53 | "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef", 54 | "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50" 55 | ], 56 | "version": "==2019.9.11" 57 | }, 58 | "chardet": { 59 | "hashes": [ 60 | "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", 61 | "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" 62 | ], 63 | "version": "==3.0.4" 64 | }, 65 | "codecov": { 66 | "hashes": [ 67 | "sha256:ae00d68e18d8a20e9c3288ba3875ae03db3a8e892115bf9b83ef20507732bed4", 68 | "sha256:8ed8b7c6791010d359baed66f84f061bba5bd41174bf324c31311e8737602788" 69 | ], 70 | "version": "==2.0.15" 71 | }, 72 | "coverage": { 73 | "hashes": [ 74 | "sha256:eee64c616adeff7db37cc37da4180a3a5b6177f5c46b187894e633f088fb5b28", 75 | "sha256:ef824cad1f980d27f26166f86856efe11eff9912c4fed97d3804820d43fa550c", 76 | "sha256:9a334d6c83dfeadae576b4d633a71620d40d1c379129d587faa42ee3e2a85cce", 77 | "sha256:7494b0b0274c5072bddbfd5b4a6c6f18fbbe1ab1d22a41e99cd2d00c8f96ecfe", 78 | "sha256:826f32b9547c8091679ff292a82aca9c7b9650f9fda3e2ca6bf2ac905b7ce888", 79 | "sha256:dd579709a87092c6dbee09d1b7cfa81831040705ffa12a1b248935274aee0437", 80 | "sha256:08907593569fe59baca0bf152c43f3863201efb6113ecb38ce7e97ce339805a6", 81 | "sha256:63a9a5fc43b58735f65ed63d2cf43508f462dc49857da70b8980ad78d41d52fc", 82 | "sha256:e2ede7c1d45e65e209d6093b762e98e8318ddeff95317d07a27a2140b80cfd24", 83 | "sha256:6b62544bb68106e3f00b21c8930e83e584fdca005d4fffd29bb39fb3ffa03cb5", 84 | "sha256:331cb5115673a20fb131dadd22f5bcaf7677ef758741312bee4937d71a14b2ef", 85 | "sha256:bf1ef9eb901113a9805287e090452c05547578eaab1b62e4ad456fcc049a9b7e", 86 | "sha256:386e2e4090f0bc5df274e720105c342263423e77ee8826002dcffe0c9533dbca", 87 | "sha256:fa964bae817babece5aa2e8c1af841bebb6d0b9add8e637548809d040443fee0", 88 | "sha256:df6712284b2e44a065097846488f66840445eb987eb81b3cc6e4149e7b6982e1", 89 | "sha256:efc89291bd5a08855829a3c522df16d856455297cf35ae827a37edac45f466a7", 90 | "sha256:e4ef9c164eb55123c62411f5936b5c2e521b12356037b6e1c2617cef45523d47", 91 | "sha256:ff37757e068ae606659c28c3bd0d923f9d29a85de79bf25b2b34b148473b5025", 92 | "sha256:bf0a7aed7f5521c7ca67febd57db473af4762b9622254291fbcbb8cd0ba5e33e", 93 | "sha256:19e4df788a0581238e9390c85a7a09af39c7b539b29f25c89209e6c3e371270d", 94 | "sha256:60851187677b24c6085248f0a0b9b98d49cba7ecc7ec60ba6b9d2e5574ac1ee9", 95 | "sha256:245388cda02af78276b479f299bbf3783ef0a6a6273037d7c60dc73b8d8d7755", 96 | "sha256:c0afd27bc0e307a1ffc04ca5ec010a290e49e3afbe841c5cafc5c5a80ecd81c9", 97 | "sha256:6ba744056423ef8d450cf627289166da65903885272055fb4b5e113137cfa14f", 98 | "sha256:af7ed8a8aa6957aac47b4268631fa1df984643f07ef00acd374e456364b373f5", 99 | "sha256:3a794ce50daee01c74a494919d5ebdc23d58873747fa0e288318728533a3e1ca", 100 | "sha256:0be0f1ed45fc0c185cfd4ecc19a1d6532d72f86a2bac9de7e24541febad72650", 101 | "sha256:eca2b7343524e7ba246cab8ff00cab47a2d6d54ada3b02772e908a45675722e2", 102 | "sha256:93715dffbcd0678057f947f496484e906bf9509f5c1c38fc9ba3922893cda5f5", 103 | "sha256:23cc09ed395b03424d1ae30dcc292615c1372bfba7141eb85e11e50efaa6b351", 104 | "sha256:141f08ed3c4b1847015e2cd62ec06d35e67a3ac185c26f7635f4406b90afa9c5", 105 | "sha256:e07d9f1a23e9e93ab5c62902833bf3e4b1f65502927379148b6622686223125c" 106 | ], 107 | "version": "==4.5.4" 108 | }, 109 | "idna": { 110 | "hashes": [ 111 | "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c", 112 | "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407" 113 | ], 114 | "version": "==2.8" 115 | }, 116 | "importlib-metadata": { 117 | "hashes": [ 118 | "sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af", 119 | "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26" 120 | ], 121 | "markers": "python_version < '3.8'", 122 | "version": "==0.23" 123 | }, 124 | "isort": { 125 | "hashes": [ 126 | "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd", 127 | "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1" 128 | ], 129 | "version": "==4.3.21" 130 | }, 131 | "lazy-object-proxy": { 132 | "hashes": [ 133 | "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442", 134 | "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d", 135 | "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4", 136 | "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a", 137 | "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a", 138 | "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e", 139 | "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357", 140 | "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50", 141 | "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db", 142 | "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449", 143 | "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156", 144 | "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531", 145 | "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb", 146 | "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08", 147 | "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383", 148 | "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142", 149 | "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea", 150 | "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62", 151 | "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd", 152 | "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239", 153 | "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0" 154 | ], 155 | "version": "==1.4.3" 156 | }, 157 | "mccabe": { 158 | "hashes": [ 159 | "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42", 160 | "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f" 161 | ], 162 | "version": "==0.6.1" 163 | }, 164 | "more-itertools": { 165 | "hashes": [ 166 | "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4", 167 | "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832" 168 | ], 169 | "version": "==7.2.0" 170 | }, 171 | "numpy": { 172 | "hashes": [ 173 | "sha256:ede47b98de79565fcd7f2decb475e2dcc85ee4097743e551fe26cfc7eb3ff143", 174 | "sha256:43bb4b70585f1c2d153e45323a886839f98af8bfa810f7014b20be714c37c447", 175 | "sha256:c7354e8f0eca5c110b7e978034cd86ed98a7a5ffcf69ca97535445a595e07b8e", 176 | "sha256:64874913367f18eb3013b16123c9fed113962e75d809fca5b78ebfbb73ed93ba", 177 | "sha256:6ca4000c4a6f95a78c33c7dadbb9495c10880be9c89316aa536eac359ab820ae", 178 | "sha256:75fd817b7061f6378e4659dd792c84c0b60533e867f83e0d1e52d5d8e53df88c", 179 | "sha256:7d81d784bdbed30137aca242ab307f3e65c8d93f4c7b7d8f322110b2e90177f9", 180 | "sha256:fe39f5fd4103ec4ca3cb8600b19216cd1ff316b4990f4c0b6057ad982c0a34d5", 181 | "sha256:e467c57121fe1b78a8f68dd9255fbb3bb3f4f7547c6b9e109f31d14569f490c3", 182 | "sha256:8d0af8d3664f142414fd5b15cabfd3b6cc3ef242a3c7a7493257025be5a6955f", 183 | "sha256:9679831005fb16c6df3dd35d17aa31dc0d4d7573d84f0b44cc481490a65c7725", 184 | "sha256:acbf5c52db4adb366c064d0b7c7899e3e778d89db585feadd23b06b587d64761", 185 | "sha256:3d52298d0be333583739f1aec9026f3b09fdfe3ddf7c7028cb16d9d2af1cca7e", 186 | "sha256:475963c5b9e116c38ad7347e154e5651d05a2286d86455671f5b1eebba5feb76", 187 | "sha256:0c0763787133dfeec19904c22c7e358b231c87ba3206b211652f8cbe1241deb6", 188 | "sha256:683828e50c339fc9e68720396f2de14253992c495fdddef77a1e17de55f1decc", 189 | "sha256:e2e9d8c87120ba2c591f60e32736b82b67f72c37ba88a4c23c81b5b8fa49c018", 190 | "sha256:a8f67ebfae9f575d85fa859b54d3bdecaeece74e3274b0b5c5f804d7ca789fe1", 191 | "sha256:0a7a1dd123aecc9f0076934288ceed7fd9a81ba3919f11a855a7887cbe82a02f", 192 | "sha256:ada4805ed51f5bcaa3a06d3dd94939351869c095e30a2b54264f5a5004b52170", 193 | "sha256:f58913e9227400f1395c7b800503ebfdb0772f1c33ff8cb4d6451c06cabdf316" 194 | ], 195 | "version": "==1.17.4" 196 | }, 197 | "packaging": { 198 | "hashes": [ 199 | "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108", 200 | "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47" 201 | ], 202 | "version": "==19.2" 203 | }, 204 | "pathlib2": { 205 | "hashes": [ 206 | "sha256:0ec8205a157c80d7acc301c0b18fbd5d44fe655968f5d947b6ecef5290fc35db", 207 | "sha256:6cd9a47b597b37cc57de1c05e56fb1a1c9cc9fab04fe78c29acd090418529868" 208 | ], 209 | "markers": "python_version < '3.6'", 210 | "version": "==2.3.5" 211 | }, 212 | "pluggy": { 213 | "hashes": [ 214 | "sha256:0db4b7601aae1d35b4a033282da476845aa19185c1e6964b25cf324b5e4ec3e6", 215 | "sha256:fa5fa1622fa6dd5c030e9cad086fa19ef6a0cf6d7a2d12318e10cb49d6d68f34" 216 | ], 217 | "version": "==0.13.0" 218 | }, 219 | "py": { 220 | "hashes": [ 221 | "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", 222 | "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" 223 | ], 224 | "version": "==1.8.0" 225 | }, 226 | "pyarrow": { 227 | "hashes": [ 228 | "sha256:b508b860486f75bcfeab72b98b4d8caa3a1517e5b7a9b3adcd5bc4539bff8a1a", 229 | "sha256:2964a3fe09fbe704160734d00bef7b023699dc6a603dc8eb889b095effc464db", 230 | "sha256:87a2324a6e41faff3a482dbfc54a1f51bbf2d7da39ee728ec73869e2ef892a97", 231 | "sha256:1f3934b2add6839844443c1ac0eba64e14b2b8253563574d45d6831851b11d47", 232 | "sha256:f5af4cd64c774693af560576a6b8039d165596b1921031ca5d739bd2e7e0554b", 233 | "sha256:14dbc00edd14133c15d62c8d6c566a82a7497b077f253fc0c2dad62c7f85beaa", 234 | "sha256:4f0276e258065c82dcb7edfc28c343ccad15da02b25e57e7c60ceb80e3f7268b", 235 | "sha256:bc7200f7a97aea7301f61cd616b33069d1098e6d9178db6a34ccd43ea9223f53", 236 | "sha256:13f921560bac5ad46b17513696e38fede0c0e92ba750c7b350c0b231815bb706", 237 | "sha256:c70f7d0032be960d8dbd32661a9de062af184f411400ea2f4a13883ca11b0b1f", 238 | "sha256:030d67418b129eb14a1c1f1af06b1a48c8074005d704789725ea6f5addaf3b26", 239 | "sha256:5f6026673ceaa037cb41fbe86ce7ea6483cfdc91e51dea929fbbf81883a73d96", 240 | "sha256:41cf5ed34012c43b4ceeeeb2534e3454c77e852bc9175d2e506b45bad132db49", 241 | "sha256:4fa03d2bc725e948f361a8ce7de271e39d90130ee3a3375793ac241b452c5bfa", 242 | "sha256:364806e26769ca20a79b1ead301c7ce28fd0534eb6d411d441053288d7e45817", 243 | "sha256:17cda6ba594acf5a72058dd2e5ca2586fe8781fc8d20bd750a3b7c66c8b274b2", 244 | "sha256:5a07222b80ae36219c558cb8875e7e346f779d0862ae277c68899db879cf5cd7", 245 | "sha256:7ad074690ba38313067bf3bbda1258966d38e2037c035d08b9ffe3cce07747a5" 246 | ], 247 | "version": "==0.15.1" 248 | }, 249 | "pylint": { 250 | "hashes": [ 251 | "sha256:886e6afc935ea2590b462664b161ca9a5e40168ea99e5300935f6591ad467df4", 252 | "sha256:3db5468ad013380e987410a8d6956226963aed94ecb5f9d3a28acca6d9ac36cd" 253 | ], 254 | "version": "==2.4.4" 255 | }, 256 | "pyparsing": { 257 | "hashes": [ 258 | "sha256:20f995ecd72f2a1f4bf6b072b63b22e2eb457836601e76d6e5dfcd75436acc1f", 259 | "sha256:4ca62001be367f01bd3e92ecbb79070272a9d4964dce6a48a82ff0b8bc7e683a" 260 | ], 261 | "version": "==2.4.5" 262 | }, 263 | "pytest": { 264 | "hashes": [ 265 | "sha256:8e256fe71eb74e14a4d20a5987bb5e1488f0511ee800680aaedc62b9358714e8", 266 | "sha256:ff0090819f669aaa0284d0f4aad1a6d9d67a6efdc6dd4eb4ac56b704f890a0d6" 267 | ], 268 | "version": "==5.2.4" 269 | }, 270 | "pytest-cov": { 271 | "hashes": [ 272 | "sha256:cdbdef4f870408ebdbfeb44e63e07eb18bb4619fae852f6e760645fa36172626", 273 | "sha256:cc6742d8bac45070217169f5f72ceee1e0e55b0221f54bcf24845972d3a47f2b" 274 | ], 275 | "version": "==2.8.1" 276 | }, 277 | "pytz": { 278 | "hashes": [ 279 | "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d", 280 | "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be" 281 | ], 282 | "version": "==2019.3" 283 | }, 284 | "requests": { 285 | "hashes": [ 286 | "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31", 287 | "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4" 288 | ], 289 | "version": "==2.22.0" 290 | }, 291 | "six": { 292 | "hashes": [ 293 | "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd", 294 | "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66" 295 | ], 296 | "version": "==1.13.0" 297 | }, 298 | "typed-ast": { 299 | "hashes": [ 300 | "sha256:262c247a82d005e43b5b7f69aff746370538e176131c32dda9cb0f324d27141e", 301 | "sha256:71211d26ffd12d63a83e079ff258ac9d56a1376a25bc80b1cdcdf601b855b90b", 302 | "sha256:630968c5cdee51a11c05a30453f8cd65e0cc1d2ad0d9192819df9978984529f4", 303 | "sha256:ffde2fbfad571af120fcbfbbc61c72469e72f550d676c3342492a9dfdefb8f12", 304 | "sha256:4e0b70c6fc4d010f8107726af5fd37921b666f5b31d9331f0bd24ad9a088e631", 305 | "sha256:bc6c7d3fa1325a0c6613512a093bc2a2a15aeec350451cbdf9e1d4bffe3e3233", 306 | "sha256:cc34a6f5b426748a507dd5d1de4c1978f2eb5626d51326e43280941206c209e1", 307 | "sha256:d896919306dd0aa22d0132f62a1b78d11aaf4c9fc5b3410d3c666b818191630a", 308 | "sha256:354c16e5babd09f5cb0ee000d54cfa38401d8b8891eefa878ac772f827181a3c", 309 | "sha256:95bd11af7eafc16e829af2d3df510cecfd4387f6453355188342c3e79a2ec87a", 310 | "sha256:18511a0b3e7922276346bcb47e2ef9f38fb90fd31cb9223eed42c85d1312344e", 311 | "sha256:d7c45933b1bdfaf9f36c579671fec15d25b06c8398f113dab64c18ed1adda01d", 312 | "sha256:d755f03c1e4a51e9b24d899561fec4ccaf51f210d52abdf8c07ee2849b212a36", 313 | "sha256:2b907eb046d049bcd9892e3076c7a6456c93a25bebfe554e931620c90e6a25b0", 314 | "sha256:fdc1c9bbf79510b76408840e009ed65958feba92a88833cdceecff93ae8fff66", 315 | "sha256:7954560051331d003b4e2b3eb822d9dd2e376fa4f6d98fee32f452f52dd6ebb2", 316 | "sha256:48e5b1e71f25cfdef98b013263a88d7145879fbb2d5185f2a0c79fa7ebbeae47", 317 | "sha256:1170afa46a3799e18b4c977777ce137bb53c7485379d9706af8a59f2ea1aa161", 318 | "sha256:838997f4310012cf2e1ad3803bce2f3402e9ffb71ded61b5ee22617b3a7f6b6e", 319 | "sha256:66480f95b8167c9c5c5c87f32cf437d585937970f3fc24386f313a4c97b44e34" 320 | ], 321 | "markers": "implementation_name == 'cpython' and python_version < '3.8'", 322 | "version": "==1.4.0" 323 | }, 324 | "urllib3": { 325 | "hashes": [ 326 | "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293", 327 | "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745" 328 | ], 329 | "version": "==1.25.7" 330 | }, 331 | "wcwidth": { 332 | "hashes": [ 333 | "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c", 334 | "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e" 335 | ], 336 | "version": "==0.1.7" 337 | }, 338 | "wrapt": { 339 | "hashes": [ 340 | "sha256:565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1" 341 | ], 342 | "version": "==1.11.2" 343 | }, 344 | "zipp": { 345 | "hashes": [ 346 | "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335", 347 | "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e" 348 | ], 349 | "version": "==0.6.0" 350 | } 351 | }, 352 | "develop": {} 353 | } 354 | --------------------------------------------------------------------------------