├── csvs
    ├── simple2.tsv
    ├── too-many-columns.csv
    ├── simple.csv
    ├── ints.csv
    ├── invalid-types.csv
    └── types.csv
├── csv2parquet
    ├── __main__.py
    ├── __init__.py
    ├── test_argparse.py
    ├── test_write.py
    └── csv2parquet.py
├── .gitignore
├── csv2parquet.sh
├── Makefile
├── .travis.yml
├── Pipfile
├── MAINTAINERS.md
├── setup.py
├── CHANGELOG.md
├── README.md
├── LICENSE
├── pylintrc
└── Pipfile.lock


/csvs/simple2.tsv:
--------------------------------------------------------------------------------
1 | a	b
2 | 1	b
3 | 


--------------------------------------------------------------------------------
/csvs/too-many-columns.csv:
--------------------------------------------------------------------------------
1 | a,b
2 | 1,2,3
3 | 


--------------------------------------------------------------------------------
/csvs/simple.csv:
--------------------------------------------------------------------------------
1 | a,b
2 | 1,a
3 | 2,b
4 | 3,c
5 | 


--------------------------------------------------------------------------------
/csv2parquet/__main__.py:
--------------------------------------------------------------------------------
1 | from .csv2parquet import main
2 | main()
3 | 


--------------------------------------------------------------------------------
/csvs/ints.csv:
--------------------------------------------------------------------------------
1 | int8,int16,int32
2 | 1,2,3
3 | -200,-40000,-4000000000
4 | 


--------------------------------------------------------------------------------
/csv2parquet/__init__.py:
--------------------------------------------------------------------------------
1 | name = "csv2parquet" # pylint: disable=invalid-name
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.pyc
 3 | 
 4 | build/
 5 | dist/
 6 | csv2parquet.egg-info/
 7 | .pytest_cache/
 8 | .coverage
 9 | csvs/*.parquet
10 | 


--------------------------------------------------------------------------------
/csv2parquet.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -euo pipefail
3 | 
4 | here="${BASH_SOURCE[0]}"
5 | here=$(dirname "$here")
6 | cd "$here"
7 | pipenv run python -m csv2parquet "$@"
8 | 


--------------------------------------------------------------------------------
/csvs/invalid-types.csv:
--------------------------------------------------------------------------------
1 | bool,float32,float64,int8,int16,int32,int64,string,timestamp
2 | 1,0.5,0.75,12,400,132000,6000000000,string,2018-07-09
3 | blah,blah,blah,blah,blah,blah,blah,blah,blah
4 | 


--------------------------------------------------------------------------------
/csvs/types.csv:
--------------------------------------------------------------------------------
1 | bool,float32,float64,int8,int16,int32,int64,string,timestamp
2 | 1,0.5,0.75,12,400,132000,6000000000,string,2018-07-09
3 | 0,0.6,1.75,13,401,132001,6000000001,string,2018-07-10
4 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | setup:
2 | 	pip install pipenv
3 | 	pipenv install --dev --three
4 | 
5 | test:
6 | 	pipenv run -- pylint csv2parquet
7 | 	pipenv run -- pytest --cov=csv2parquet
8 | 	pipenv run -- codecov
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 | env:
 5 |   global:
 6 |     - PIPENV_VENV_IN_PROJECT=1
 7 |     - PIPENV_IGNORE_VIRTUALENVS=1
 8 | # command to install dependencies
 9 | install: make setup
10 | # command to run tests
11 | script: make test
12 | 


--------------------------------------------------------------------------------
/Pipfile:
--------------------------------------------------------------------------------
 1 | [[source]]
 2 | 
 3 | verify_ssl = true
 4 | name = "pypi"
 5 | url = "https://pypi.python.org/simple"
 6 | 
 7 | 
 8 | [packages]
 9 | 
10 | pyarrow = "*"
11 | pylint = "*"
12 | pytest = "*"
13 | pytest-cov = "*"
14 | codecov = "*"
15 | pytz = "*"
16 | 
17 | 
18 | [dev-packages]
19 | 
20 | 


--------------------------------------------------------------------------------
/MAINTAINERS.md:
--------------------------------------------------------------------------------
 1 | # Maintainers & Contributors
 2 | 
 3 | * Colin Dellow
 4 | 
 5 | Feedback, issue reports, PRs, etc, welcome.
 6 | 
 7 | ## Doing a release to pypi
 8 | 
 9 | ```
10 | rm -rf build dist csv2parquet.egg-info
11 | python3 setup.py sdist bdist_wheel
12 | 
13 | # Upload to test pypi
14 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
15 | python3 -m pip install --index-url https://test.pypi.org/simple/ csv2parquet
16 | 
17 | # Upload to for reals pypi
18 | twine upload dist/*
19 | ```
20 | 
21 | See also https://packaging.python.org/tutorials/packaging-projects/
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md", "r") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="csv2parquet",
 8 |     version="0.0.9",
 9 |     author="Colin Dellow",
10 |     author_email="cldellow@cldellow.com",
11 |     description="A tool to convert CSVs to Parquet files",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/cldellow/csv2parquet",
15 |     packages=setuptools.find_packages(),
16 |     entry_points = {
17 |         "console_scripts": ['csv2parquet = csv2parquet.csv2parquet:main']
18 |     },
19 |     classifiers=(
20 |         "Programming Language :: Python :: 3",
21 |         "License :: OSI Approved :: Apache Software License",
22 |         "Operating System :: OS Independent",
23 |     ),
24 | )
25 | 
26 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changes
 2 | 
 3 | ## 0.0.9
 4 | 
 5 | Better error message when a row in the CSV has too many columns in it,
 6 | courtesy [dazzag24](https://github.com/cldellow/csv2parquet/pull/14)
 7 | 
 8 | ## 0.0.8
 9 | 
10 | Upgrade to be compatible with Arrow 0.15.1, which removed the `Column` type.
11 | 
12 | ## 0.0.7
13 | 
14 | Add `base64` type to interpret a base64-encoded string and store it as a binary field.
15 | 
16 | ## 0.0.6
17 | 
18 | Upgrade to pyarrow 0.10.0, which supports zstd.
19 | 
20 | ## 0.0.5
21 | 
22 | Support `--type`.
23 | 
24 | ## 0.0.4
25 | 
26 | Support `--include`, `--exclude`, `--rename`.
27 | 
28 | ## 0.0.3
29 | 
30 | Support `--rows`, sanitize column names
31 | 
32 | ## 0.0.2
33 | 
34 | Fix regression of `output_file`
35 | 
36 | ## 0.0.1
37 | 
38 | Initial pypi release.
39 | 
40 | # How to do a release
41 | 
42 | These are internal notes based on https://packaging.python.org/tutorials/packaging-projects/,
43 | because otherwise I'll forget how to do this.
44 | 
45 | ## Package
46 | 
47 | ```
48 | python3 -m pip install --user --upgrade setuptools wheel
49 | python3 setup.py sdist bdist_wheel
50 | ```
51 | 
52 | `dist/` should now have a wheel and a tarball.
53 | 
54 | ## Test upload
55 | 
56 | ```
57 | python3 -m pip install --user --upgrade twine
58 | twine upload --repository-url https://test.pypi.org/legacy/ dist/*
59 | 
60 | # Now validate with:
61 | python3 -m pip install --index-url https://test.pypi.org/simple/ example_pkg
62 | ```
63 | 
64 | ## Release upload
65 | 
66 | ```
67 | twine upload dist/*
68 | ```
69 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # csv2parquet
 2 | 
 3 | [![Build Status](https://travis-ci.org/cldellow/csv2parquet.svg?branch=master)](https://travis-ci.org/cldellow/csv2parquet)
 4 | [![codecov](https://codecov.io/gh/cldellow/csv2parquet/branch/master/graph/badge.svg)](https://codecov.io/gh/cldellow/csv2parquet)
 5 | 
 6 | Convert a CSV to a parquet file. You may also find [sqlite-parquet-vtable](https://github.com/cldellow/sqlite-parquet-vtable) or
 7 | [parquet-metadata](https://github.com/cldellow/parquet-metadata) useful.
 8 | 
 9 | ## Installing
10 | 
11 | If you just want to use the tool:
12 | 
13 | ```
14 | sudo pip install pyarrow csv2parquet
15 | ```
16 | 
17 | If you want to clone the repo and work on the tool, install its dependencies via pipenv:
18 | 
19 | ```
20 | pipenv install
21 | ```
22 | 
23 | ## Usage
24 | 
25 | Next, create some Parquet files. The tool supports CSV and TSV files.
26 | 
27 | ```
28 | usage: csv2parquet [-h] [-n ROWS] [-r ROW_GROUP_SIZE] [-o OUTPUT] [-c CODEC]
29 |                    [-i INCLUDE [INCLUDE ...] | -x EXCLUDE [EXCLUDE ...]]
30 |                    [-R RENAME [RENAME ...]] [-t TYPE [TYPE ...]]
31 |                    csv_file
32 | 
33 | positional arguments:
34 |   csv_file              input file, can be CSV or TSV
35 | 
36 | optional arguments:
37 |   -h, --help            show this help message and exit
38 |   -n ROWS, --rows ROWS  The number of rows to include, useful for testing.
39 |   -r ROW_GROUP_SIZE, --row-group-size ROW_GROUP_SIZE
40 |                         The number of rows per row group.
41 |   -o OUTPUT, --output OUTPUT
42 |                         The parquet file
43 |   -c CODEC, --codec CODEC
44 |                         The compression codec to use (brotli, gzip, snappy,
45 |                         zstd, none)
46 |   -i INCLUDE [INCLUDE ...], --include INCLUDE [INCLUDE ...]
47 |                         Include the given columns (by index or name)
48 |   -x EXCLUDE [EXCLUDE ...], --exclude EXCLUDE [EXCLUDE ...]
49 |                         Exclude the given columns (by index or name)
50 |   -R RENAME [RENAME ...], --rename RENAME [RENAME ...]
51 |                         Rename a column. Specify the column to be renamed and
52 |                         its new name, eg: 0=age or person_age=age
53 |   -t TYPE [TYPE ...], --type TYPE [TYPE ...]
54 |                         Parse a column as a given type. Specify the column and
55 |                         its type, eg: 0=bool? or person_age=int8. Parse errors
56 |                         are fatal unless the type is followed by a question
57 |                         mark. Valid types are string (default), base64, bool,
58 |                         float32, float64, int8, int16, int32, int64, timestamp
59 | ```
60 | 
61 | ## Testing
62 | 
63 | ```
64 | pylint csv2parquet
65 | pytest
66 | ```
67 | 


--------------------------------------------------------------------------------
/csv2parquet/test_argparse.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import pyarrow as pa
 3 | from . import csv2parquet
 4 | 
 5 | def capture_args(_map):
 6 |     def func(csv_file, output_file, row_group_size, codec, rows,
 7 |              rename, include, exclude, raw_types):
 8 |         _map['csv_file'] = csv_file
 9 |         _map['output_file'] = output_file
10 |         _map['row_group_size'] = row_group_size
11 |         _map['codec'] = codec
12 |         _map['rows'] = rows
13 |         _map['rename'] = rename
14 |         _map['include'] = include
15 |         _map['exclude'] = exclude
16 |         _map['raw_types'] = raw_types
17 | 
18 |     return func
19 | 
20 | def test_argparse_csv():
21 |     _map = {}
22 |     csv2parquet.main_with_args(capture_args(_map), ['foo.csv'])
23 |     assert _map['csv_file'] == 'foo.csv'
24 |     assert _map['output_file'] == 'foo.parquet'
25 | 
26 | def test_argparse_tsv():
27 |     _map = {}
28 |     csv2parquet.main_with_args(capture_args(_map), ['foo.tsv'])
29 |     assert _map['csv_file'] == 'foo.tsv'
30 |     assert _map['output_file'] == 'foo.parquet'
31 |     assert _map['rows'] is None
32 |     assert _map['raw_types'] == []
33 | 
34 | def test_argparse_types():
35 |     _map = {}
36 |     csv2parquet.main_with_args(capture_args(_map), ['foo.csv', '--type', '0=string', '0=int8?'])
37 |     assert _map['raw_types'] == [('0', pa.string(), False), ('0', pa.int8(), True)]
38 | 
39 | def test_argparse_override():
40 |     """Can override the default values."""
41 |     _map = {}
42 |     csv2parquet.main_with_args(
43 |         capture_args(_map),
44 |         ['foo.csv', '-o', 'output', '-c', 'somecodec', '-r', '123', '-n', '234'])
45 |     assert _map['row_group_size'] == 123
46 |     assert _map['codec'] == 'somecodec'
47 |     assert _map['output_file'] == 'output'
48 |     assert _map['rows'] == 234
49 | 
50 | def test_argparse_rename():
51 |     _map = {}
52 |     csv2parquet.main_with_args(capture_args(_map), ['foo.csv', '--rename', '0=foo', 'bar=baz'])
53 |     assert _map['rename'] == [('0', 'foo'), ('bar', 'baz')]
54 | 
55 | def test_argparse_bad_no_args():
56 |     """No args should be an error."""
57 |     with pytest.raises(SystemExit) as pytest_wrapped_e:
58 |         csv2parquet.main_with_args(None, [])
59 |     assert pytest_wrapped_e.type == SystemExit
60 |     assert pytest_wrapped_e.value.code == 2
61 | 
62 | def test_argparse_bad_inc_and_exc():
63 |     # Can't do both --include and --exclude
64 |     with pytest.raises(SystemExit) as pytest_wrapped_e:
65 |         csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '-i', 'foo', '-x', 'bar'])
66 |     assert pytest_wrapped_e.type == SystemExit
67 |     assert pytest_wrapped_e.value.code == 2
68 | 
69 | def test_argparse_bad_rename():
70 |     with pytest.raises(SystemExit) as pytest_wrapped_e:
71 |         csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '--rename', 'foo'])
72 |     assert pytest_wrapped_e.type == SystemExit
73 |     assert pytest_wrapped_e.value.code == 2
74 | 
75 | def test_argparse_bad_type():
76 |     with pytest.raises(SystemExit) as pytest_wrapped_e:
77 |         csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '--type', 'foo'])
78 |     assert pytest_wrapped_e.type == SystemExit
79 |     assert pytest_wrapped_e.value.code == 2
80 | 
81 | def test_argparse_bad_type2():
82 |     with pytest.raises(SystemExit) as pytest_wrapped_e:
83 |         csv2parquet.main_with_args(capture_args({}), ['csvs/simple.csv', '--type', 'foo=bar'])
84 |     assert pytest_wrapped_e.type == SystemExit
85 |     assert pytest_wrapped_e.value.code == 2
86 | 


--------------------------------------------------------------------------------
/csv2parquet/test_write.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | import pyarrow.parquet as pq
  3 | import pytest
  4 | from . import csv2parquet
  5 | 
  6 | def test_write_from_csv():
  7 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv'])
  8 |     pqf = pq.ParquetFile('csvs/simple.parquet')
  9 |     assert pqf.num_row_groups == 1
 10 |     schema = pqf.schema
 11 |     assert schema.names == ['a', 'b']
 12 |     assert schema.column(0).logical_type.type == 'STRING'
 13 |     assert schema.column(1).logical_type.type == 'STRING'
 14 |     row_group = pqf.read_row_group(0)
 15 |     assert row_group.num_rows == 3
 16 |     row_group = pqf.read_row_group(0)
 17 |     assert row_group.num_rows == 3
 18 |     col_a = row_group.column(0).to_pylist()
 19 |     assert col_a == ['1', '2', '3']
 20 |     col_b = row_group.column(1).to_pylist()
 21 |     assert col_b == ['a', 'b', 'c']
 22 | 
 23 | 
 24 | def test_write_from_tsv():
 25 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple2.tsv'])
 26 |     pqf = pq.ParquetFile('csvs/simple2.parquet')
 27 |     assert pqf.num_row_groups == 1
 28 |     schema = pqf.schema
 29 |     assert schema.names == ['a', 'b']
 30 |     assert schema.column(0).logical_type.type == 'STRING'
 31 |     assert schema.column(1).logical_type.type == 'STRING'
 32 |     row_group = pqf.read_row_group(0)
 33 |     assert row_group.num_rows == 1
 34 |     col_a = row_group.column(0).to_pylist()
 35 |     assert col_a == ['1']
 36 |     col_b = row_group.column(1).to_pylist()
 37 |     assert col_b == ['b']
 38 | 
 39 | def test_write_rename():
 40 |     csv2parquet.main_with_args(csv2parquet.convert,
 41 |                                ['csvs/simple.csv', '--rename', '0=alpha', 'b=bee'])
 42 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 43 |     schema = pqf.schema
 44 |     assert schema.names == ['alpha', 'bee']
 45 | 
 46 | def test_write_row_group_size():
 47 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--row-group-size', '1'])
 48 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 49 |     assert pqf.num_row_groups == 3
 50 | 
 51 | def test_write_limit():
 52 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--rows', '1'])
 53 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 54 |     row_group = pqf.read_row_group(0)
 55 |     assert row_group.num_rows == 1
 56 | 
 57 | def test_write_include_by_name():
 58 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--include', 'a'])
 59 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 60 |     schema = pqf.schema
 61 |     assert schema.names == ['a']
 62 |     row_group = pqf.read_row_group(0)
 63 |     assert row_group.num_rows == 3
 64 |     col_a = row_group.column(0).to_pylist()
 65 |     assert col_a == ['1', '2', '3']
 66 | 
 67 | def test_write_include_by_index():
 68 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--include', '0'])
 69 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 70 |     schema = pqf.schema
 71 |     assert schema.names == ['a']
 72 |     row_group = pqf.read_row_group(0)
 73 |     assert row_group.num_rows == 3
 74 |     col_a = row_group.column(0).to_pylist()
 75 |     assert col_a == ['1', '2', '3']
 76 | 
 77 | def test_write_exclude_by_name():
 78 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--exclude', 'a'])
 79 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 80 |     schema = pqf.schema
 81 |     assert schema.names == ['b']
 82 |     row_group = pqf.read_row_group(0)
 83 |     assert row_group.num_rows == 3
 84 |     col_b = row_group.column(0).to_pylist()
 85 |     assert col_b == ['a', 'b', 'c']
 86 | 
 87 | def test_write_exclude_by_index():
 88 |     csv2parquet.main_with_args(csv2parquet.convert, ['csvs/simple.csv', '--exclude', '0'])
 89 |     pqf = pq.ParquetFile('csvs/simple.parquet')
 90 |     schema = pqf.schema
 91 |     assert schema.names == ['b']
 92 |     row_group = pqf.read_row_group(0)
 93 |     assert row_group.num_rows == 3
 94 |     col_b = row_group.column(0).to_pylist()
 95 |     assert col_b == ['a', 'b', 'c']
 96 | 
 97 | def test_sanitize_column_name():
 98 |     assert csv2parquet.sanitize_column_name('foo') == 'foo'
 99 |     assert csv2parquet.sanitize_column_name(' foo ') == 'foo'
100 |     assert csv2parquet.sanitize_column_name('foo bar') == 'foo_bar'
101 |     assert csv2parquet.sanitize_column_name('foo   bar') == 'foo_bar'
102 |     assert csv2parquet.sanitize_column_name('PostalCode') == 'postalcode'
103 | 
104 | def test_required_types():
105 |     csv2parquet.main_with_args(csv2parquet.convert,
106 |                                ['csvs/types.csv', '--type',
107 |                                 'bool=bool', 'float32=float32', 'float64=float64', 'int8=int8',
108 |                                 'int16=int16', 'int32=int32', 'int64=int64', 'string=string',
109 |                                 'timestamp=timestamp'])
110 |     pqf = pq.ParquetFile('csvs/types.parquet')
111 |     schema = pqf.schema
112 |     assert schema.names == ['bool', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64',
113 |                             'string', 'timestamp']
114 |     row_group = pqf.read_row_group(0)
115 |     assert row_group.num_rows == 2
116 |     bools = row_group.column(0).to_pylist()
117 |     assert bools == [True, False]
118 |     float32 = row_group.column(1).to_pylist()
119 |     assert float32 == pytest.approx([0.5, 0.6])
120 |     float64 = row_group.column(2).to_pylist()
121 |     assert float64 == [0.75, 1.75]
122 |     int8 = row_group.column(3).to_pylist()
123 |     assert int8 == [12, 13]
124 |     int16 = row_group.column(4).to_pylist()
125 |     assert int16 == [400, 401]
126 |     int32 = row_group.column(5).to_pylist()
127 |     assert int32 == [132000, 132001]
128 |     int64 = row_group.column(6).to_pylist()
129 |     assert int64 == [6000000000, 6000000001]
130 |     string = row_group.column(7).to_pylist()
131 |     assert string == ['string', 'string']
132 |     timestamp = row_group.column(8).to_pylist()
133 |     assert timestamp == [datetime(2018, 7, 9, 0, 0), datetime(2018, 7, 10, 0, 0)]
134 | 
135 | def test_required_invalid_types():
136 |     with pytest.raises(ValueError):
137 |         csv2parquet.main_with_args(csv2parquet.convert,
138 |                                    ['csvs/invalid-types.csv', '--type',
139 |                                     'bool=bool', 'float32=float32', 'float64=float64', 'int8=int8',
140 |                                     'int16=int16', 'int32=int32', 'int64=int64', 'string=string',
141 |                                     'timestamp=timestamp'])
142 | 
143 | def test_too_many_columns():
144 |     with pytest.raises(IndexError):
145 |         csv2parquet.main_with_args(csv2parquet.convert,
146 |                                    ['csvs/too-many-columns.csv'])
147 | 
148 | def test_opt_invalid_types():
149 |     csv2parquet.main_with_args(csv2parquet.convert,
150 |                                ['csvs/invalid-types.csv', '--type',
151 |                                 'bool=bool?', 'float32=float32?', 'float64=float64?', 'int8=int8?',
152 |                                 'int16=int16?', 'int32=int32?', 'int64=int64?', 'string=string?',
153 |                                 'timestamp=timestamp?'])
154 |     pqf = pq.ParquetFile('csvs/invalid-types.parquet')
155 |     schema = pqf.schema
156 |     assert schema.names == ['bool', 'float32', 'float64', 'int8', 'int16', 'int32', 'int64',
157 |                             'string', 'timestamp']
158 |     row_group = pqf.read_row_group(0)
159 |     assert row_group.num_rows == 2
160 |     bools = row_group.column(0).to_pylist()
161 |     assert bools == [True, None]
162 |     float32 = row_group.column(1).to_pylist()
163 |     assert len(float32) == 2
164 |     assert float32[0] == pytest.approx(0.5)
165 |     assert float32[1] is None
166 |     float64 = row_group.column(2).to_pylist()
167 |     assert float64 == [0.75, None]
168 |     int8 = row_group.column(3).to_pylist()
169 |     assert int8 == [12, None]
170 |     int16 = row_group.column(4).to_pylist()
171 |     assert int16 == [400, None]
172 |     int32 = row_group.column(5).to_pylist()
173 |     assert int32 == [132000, None]
174 |     int64 = row_group.column(6).to_pylist()
175 |     assert int64 == [6000000000, None]
176 |     string = row_group.column(7).to_pylist()
177 |     assert string == ['string', 'blah']
178 |     timestamp = row_group.column(8).to_pylist()
179 |     assert timestamp == [datetime(2018, 7, 9, 0, 0), None]
180 | 
181 | def test_required_invalid_ints():
182 |     with pytest.raises(ValueError):
183 |         csv2parquet.main_with_args(csv2parquet.convert,
184 |                                    ['csvs/ints.csv', '--type',
185 |                                     'int8=int8', 'int16=int16', 'int32=int32'])
186 | 
187 | def test_opt_invalid_ints():
188 |     csv2parquet.main_with_args(csv2parquet.convert,
189 |                                ['csvs/ints.csv', '--type',
190 |                                 'int8=int8?', 'int16=int16?', 'int32=int32?'])
191 |     pqf = pq.ParquetFile('csvs/ints.parquet')
192 |     schema = pqf.schema
193 |     assert schema.names == ['int8', 'int16', 'int32']
194 |     row_group = pqf.read_row_group(0)
195 |     assert row_group.num_rows == 2
196 |     int8 = row_group.column(0).to_pylist()
197 |     assert int8 == [1, None]
198 |     int16 = row_group.column(1).to_pylist()
199 |     assert int16 == [2, None]
200 |     int32 = row_group.column(2).to_pylist()
201 |     assert int32 == [3, None]
202 | 


--------------------------------------------------------------------------------
/csv2parquet/csv2parquet.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import csv
  3 | import re
  4 | import sys
  5 | from datetime import datetime
  6 | from base64 import standard_b64decode
  7 | import pyarrow as pa
  8 | import pyarrow.parquet as pq
  9 | 
 10 | PA_BOOL = pa.bool_()
 11 | PA_FLOAT32 = pa.float32()
 12 | PA_FLOAT64 = pa.float64()
 13 | PA_INT8 = pa.int8()
 14 | PA_INT16 = pa.int16()
 15 | PA_INT32 = pa.int32()
 16 | PA_INT64 = pa.int64()
 17 | PA_STRING = pa.string()
 18 | PA_TIMESTAMP = pa.timestamp('ns')
 19 | PA_BINARY = pa.binary()
 20 | 
 21 | def get_delimiter(csv_file):
 22 |     if csv_file[-4:] == '.tsv':
 23 |         return '\t'
 24 |     return ','
 25 | 
 26 | def sanitize_column_name(name):
 27 |     cleaned = re.sub('[^a-z0-9]', '_', name.lower())
 28 |     cleaned = re.sub('__*', '_', cleaned)
 29 |     cleaned = re.sub('^_*', '', cleaned)
 30 |     cleaned = re.sub('_*$', '', cleaned)
 31 |     return cleaned
 32 | 
 33 | def get_column_names(csv_file, rename):
 34 |     with open(csv_file) as csvfile:
 35 |         spamreader = csv.reader(csvfile, delimiter=get_delimiter(csv_file))
 36 |         column_names = []
 37 |         for row in spamreader:
 38 |             for idx, col in enumerate(row):
 39 |                 clean = sanitize_column_name(col)
 40 |                 for old, new in rename:
 41 |                     if old == clean or old == str(idx):
 42 |                         clean = new
 43 |                 column_names.append(clean)
 44 |             return column_names
 45 | 
 46 | def get_pyarrow_types():
 47 |     return {
 48 |         'bool': PA_BOOL,
 49 |         'float32': PA_FLOAT32,
 50 |         'float64': PA_FLOAT64,
 51 |         'int8': PA_INT8,
 52 |         'int16': PA_INT16,
 53 |         'int32': PA_INT32,
 54 |         'int64': PA_INT64,
 55 |         'string': PA_STRING,
 56 |         'timestamp': PA_TIMESTAMP,
 57 |         'base64': PA_BINARY
 58 |     }
 59 | 
 60 | # pylint: disable=too-many-branches,too-many-statements
 61 | def convert(csv_file, output_file, row_group_size, codec, max_rows,
 62 |             rename, include, exclude, raw_types):
 63 |     column_names = get_column_names(csv_file, rename)
 64 |     columns = [[] for x in column_names]
 65 |     arrs = [[] for x in column_names]
 66 |     dropped_values = [0 for x in column_names]
 67 |     dropped_value_examples = [[] for x in column_names]
 68 | 
 69 |     types = []
 70 |     for idx, name in enumerate(column_names):
 71 |         opt = False
 72 |         column_type = pa.string()  # default to string if unspecified
 73 |         for target, new_type, new_opt in raw_types:
 74 |             if str(idx) == target or name == target:
 75 |                 opt = new_opt
 76 |                 column_type = new_type
 77 | 
 78 |         types.append((column_type, opt))
 79 | 
 80 |     if include:
 81 |         keep = [value in include or str(idx) in include
 82 |                 for idx, value in enumerate(column_names)]
 83 |     else:
 84 |         keep = [not (value in exclude or str(idx) in exclude)
 85 |                 for idx, value in enumerate(column_names)]
 86 | 
 87 |     def add_arrays(cols):
 88 |         for colnum, col in enumerate(cols):
 89 |             arr = pa.array(col, type=types[colnum][0])
 90 |             arrs[colnum].append(arr)
 91 | 
 92 |     with open(csv_file) as csvfile:
 93 |         spamreader = csv.reader(csvfile, delimiter=get_delimiter(csv_file))
 94 |         rownum = -1
 95 |         for row in spamreader:
 96 |             rownum = rownum + 1
 97 |             if rownum == 0:
 98 |                 continue
 99 |             idx = -1
100 |             for value in row:
101 |                 idx += 1
102 |                 try:
103 |                     if not keep[idx]:
104 |                         continue
105 | 
106 |                     expected_type = types[idx][0]
107 |                     if expected_type == PA_STRING:
108 |                         pass
109 |                     elif expected_type == PA_BOOL:
110 |                         if value in ('0', 'N', 'F', 'false'):
111 |                             value = False
112 |                         elif value in ('1', 'Y', 'T', 'true'):
113 |                             value = True
114 |                         else:
115 |                             raise ValueError()
116 |                     elif expected_type in (PA_FLOAT32, PA_FLOAT64):
117 |                         value = float(value)
118 |                     elif expected_type == PA_INT8:
119 |                         value = int(value)
120 |                         if value < -128 or value > 127:
121 |                             raise ValueError()
122 |                     elif expected_type == PA_INT16:
123 |                         value = int(value)
124 |                         if value < -32768 or value > 32767:
125 |                             raise ValueError()
126 |                     elif expected_type == PA_INT32:
127 |                         value = int(value)
128 |                         if value < -2147483648 or value > 2147483647:
129 |                             raise ValueError()
130 |                     elif expected_type == PA_INT64:
131 |                         value = int(value)
132 |                     elif expected_type == PA_TIMESTAMP:
133 |                         # Currently only support YYYY-MM-DD dates.
134 |                         comps = value.split('-')
135 |                         if len(comps) != 3:
136 |                             raise ValueError()
137 |                         value = datetime(int(comps[0]), int(comps[1]), int(comps[2]))
138 |                     elif expected_type == PA_BINARY:
139 |                         value = standard_b64decode(value)
140 | 
141 |                 except ValueError:
142 |                     if types[idx][1]:
143 |                         dropped_values[idx] += 1
144 |                         if dropped_values[idx] < 10:
145 |                             dropped_value_examples[idx].append(str(value))
146 |                         value = None
147 |                     else:
148 |                         raise ValueError('unexpected value for column {}, type {}: {}'
149 |                                          .format(column_names[idx], expected_type, str(value)))
150 |                 except IndexError:
151 |                     raise IndexError('Too many columns {} for row {}'.format(idx, rownum))
152 | 
153 |                 columns[idx].append(value)
154 |             if rownum % 10000 == 0:
155 |                 add_arrays(columns)
156 |                 columns = [[] for x in range(len(column_names))]
157 | 
158 |             if rownum == max_rows:
159 |                 break
160 | 
161 |     if columns and any(columns):
162 |         add_arrays(columns)
163 | 
164 |     data = [
165 |         pa.array([item.as_py() for sublist in arr for item in sublist], type=types[idx][0]) if keep[idx] else None
166 |         for idx, arr in enumerate(arrs)]
167 |     data = [x for x in data if x is not None]
168 |     batch = pa.RecordBatch.from_arrays(data, [column_names[x] for x in range(len(arrs)) if keep[x]])
169 |     table = pa.Table.from_batches([batch])
170 | 
171 |     pq.write_table(table,
172 |                    output_file,
173 |                    version='1.0',
174 |                    compression=codec,
175 |                    use_dictionary=True,
176 |                    row_group_size=row_group_size)
177 | 
178 | def main_with_args(func, argv):
179 |     parser = argparse.ArgumentParser()
180 |     parser.add_argument('csv_file', help="input file, can be CSV or TSV")
181 |     parser.add_argument('-n', '--rows', type=int,
182 |                         help='The number of rows to include, useful for testing.', nargs=1)
183 |     parser.add_argument('-r', '--row-group-size', default=[10000], type=int,
184 |                         help='The number of rows per row group.', nargs=1)
185 |     parser.add_argument('-o', '--output', help='The parquet file', nargs=1)
186 |     parser.add_argument('-c', '--codec', default=['snappy'],
187 |                         help='The compression codec to use (brotli, gzip, snappy, zstd, none)', nargs=1)
188 |     group = parser.add_mutually_exclusive_group()
189 |     group.add_argument('-i', '--include', default=[],
190 |                        help='Include the given columns (by index or name)', nargs='+')
191 |     group.add_argument('-x', '--exclude', default=[],
192 |                        help='Exclude the given columns (by index or name)', nargs='+')
193 | 
194 |     parser.add_argument('-R', '--rename', default=[], nargs='+',
195 |                         help='Rename a column. Specify the column to be renamed and its new name,' +
196 |                         ' eg: 0=age or person_age=age')
197 |     parser.add_argument('-t', '--type', default=[], nargs='+',
198 |                         help='Parse a column as a given type. Specify the column and its type,' +
199 |                         ' eg: 0=bool? or person_age=int8. Parse errors are fatal unless the type' +
200 |                         ' is followed by a question mark. Valid types are string (default), base64, bool,' +
201 |                         ' float32, float64, int8, int16, int32, int64, timestamp')
202 | 
203 |     args = parser.parse_args(argv)
204 |     output = args.output
205 |     if output is None:
206 |         output = args.csv_file
207 |         output = re.sub(r'\.tsv$|\.csv$', '', output)
208 |         output = output + '.parquet'
209 |     else:
210 |         output = output[0]
211 | 
212 |     for i in range(len(args.rename)):
213 |         haystack = args.rename[i]
214 |         needle = haystack.find('=')
215 |         if needle == -1:
216 |             print(haystack + ' is not a valid option for --rename, it must have the form')
217 |             print('colspec=new-name, where colspec is a numeric index or the original name.')
218 |             sys.exit(2)
219 | 
220 |         args.rename[i] = (haystack[:needle], haystack[needle + 1:])
221 | 
222 |     for i in range(len(args.type)):
223 |         haystack = args.type[i]
224 |         needle = haystack.find('=')
225 |         if needle == -1:
226 |             print(haystack + ' is not a valid option for --type, it must have the form')
227 |             print('colspec=type, where colspec is a numeric index or the original name.')
228 |             sys.exit(2)
229 | 
230 |         opt = haystack[-1] == '?'
231 |         if opt:
232 |             haystack = haystack[:-1]
233 | 
234 |         column_type_raw = haystack[needle + 1:]
235 |         column_type = get_pyarrow_types().get(column_type_raw, None)
236 |         if column_type is None:
237 |             print(haystack + ' is not a valid option for --type. ' +
238 |                   column_type_raw + ' is unknown.')
239 |             sys.exit(2)
240 | 
241 | 
242 |         args.type[i] = (haystack[:needle], column_type, opt)
243 | 
244 |     args.rows = args.rows[0] if args.rows else None
245 |     args.row_group_size = args.row_group_size[0]
246 |     args.codec = args.codec[0]
247 |     func(args.csv_file,
248 |          output,
249 |          args.row_group_size,
250 |          args.codec,
251 |          args.rows,
252 |          args.rename,
253 |          args.include,
254 |          args.exclude,
255 |          args.type)
256 | 
257 | def main():
258 |     main_with_args(convert, sys.argv[1:])
259 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/pylintrc:
--------------------------------------------------------------------------------
  1 | [MASTER]
  2 | 
  3 | # Specify a configuration file.
  4 | #rcfile=
  5 | 
  6 | # Python code to execute, usually for sys.path manipulation such as
  7 | # pygtk.require().
  8 | #init-hook=
  9 | 
 10 | # Add files or directories to the blacklist. They should be base names, not
 11 | # paths.
 12 | ignore=CVS
 13 | 
 14 | # Pickle collected data for later comparisons.
 15 | persistent=yes
 16 | 
 17 | # List of plugins (as comma separated values of python modules names) to load,
 18 | # usually to register additional checkers.
 19 | load-plugins=
 20 | 
 21 | # Use multiple processes to speed up Pylint.
 22 | jobs=1
 23 | 
 24 | # Allow loading of arbitrary C extensions. Extensions are imported into the
 25 | # active Python interpreter and may run arbitrary code.
 26 | unsafe-load-any-extension=no
 27 | 
 28 | # A comma-separated list of package or module names from where C extensions may
 29 | # be loaded. Extensions are loading into the active Python interpreter and may
 30 | # run arbitrary code
 31 | extension-pkg-whitelist=
 32 | 
 33 | # Allow optimization of some AST trees. This will activate a peephole AST
 34 | # optimizer, which will apply various small optimizations. For instance, it can
 35 | # be used to obtain the result of joining multiple strings with the addition
 36 | # operator. Joining a lot of strings can lead to a maximum recursion error in
 37 | # Pylint and this flag can prevent that. It has one side effect, the resulting
 38 | # AST will be different than the one from reality.
 39 | optimize-ast=no
 40 | 
 41 | 
 42 | [MESSAGES CONTROL]
 43 | 
 44 | # Only show warnings with the listed confidence levels. Leave empty to show
 45 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
 46 | confidence=
 47 | 
 48 | # Enable the message, report, category or checker with the given id(s). You can
 49 | # either give multiple identifier separated by comma (,) or put this option
 50 | # multiple time. See also the "--disable" option for examples.
 51 | #enable=
 52 | 
 53 | # Disable the message, report, category or checker with the given id(s). You
 54 | # can either give multiple identifiers separated by comma (,) or put this
 55 | # option multiple times (only on the command line, not in the configuration
 56 | # file where it should appear only once).You can also use "--disable=all" to
 57 | # disable everything first and then reenable specific checks. For example, if
 58 | # you want to run only the similarities checker, you can use "--disable=all
 59 | # --enable=similarities". If you want to run only the classes checker, but have
 60 | # no Warning level messages displayed, use"--disable=all --enable=classes
 61 | # --disable=W"
 62 | disable=too-many-arguments,too-many-locals,locally-disabled,missing-docstring,import-star-module-level,old-octal-literal,oct-method,print-statement,unpacking-in-except,parameter-unpacking,backtick,old-raise-syntax,old-ne-operator,long-suffix,dict-view-method,dict-iter-method,metaclass-assignment,next-method-called,raising-string,indexing-exception,raw_input-builtin,long-builtin,file-builtin,execfile-builtin,coerce-builtin,cmp-builtin,buffer-builtin,basestring-builtin,apply-builtin,filter-builtin-not-iterating,using-cmp-argument,useless-suppression,range-builtin-not-iterating,suppressed-message,no-absolute-import,old-division,cmp-method,reload-builtin,zip-builtin-not-iterating,intern-builtin,unichr-builtin,reduce-builtin,standarderror-builtin,unicode-builtin,xrange-builtin,coerce-method,delslice-method,getslice-method,setslice-method,input-builtin,round-builtin,hex-method,nonzero-method,map-builtin-not-iterating
 63 | 
 64 | 
 65 | [REPORTS]
 66 | 
 67 | # Set the output format. Available formats are text, parseable, colorized, msvs
 68 | # (visual studio) and html. You can also give a reporter class, eg
 69 | # mypackage.mymodule.MyReporterClass.
 70 | output-format=text
 71 | 
 72 | # Put messages in a separate file for each module / package specified on the
 73 | # command line instead of printing them on stdout. Reports (if any) will be
 74 | # written in a file name "pylint_global.[txt|html]".
 75 | files-output=no
 76 | 
 77 | # Tells whether to display a full report or only the messages
 78 | reports=yes
 79 | 
 80 | # Python expression which should return a note less than 10 (10 is the highest
 81 | # note). You have access to the variables errors warning, statement which
 82 | # respectively contain the number of errors / warnings messages and the total
 83 | # number of statements analyzed. This is used by the global evaluation report
 84 | # (RP0004).
 85 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
 86 | 
 87 | # Template used to display messages. This is a python new-style format string
 88 | # used to format the message information. See doc for all details
 89 | #msg-template=
 90 | 
 91 | 
 92 | [BASIC]
 93 | 
 94 | # List of builtins function names that should not be used, separated by a comma
 95 | bad-functions=map,filter,input
 96 | 
 97 | # Good variable names which should always be accepted, separated by a comma
 98 | good-names=i,j,k,ex,Run,_
 99 | 
100 | # Bad variable names which should always be refused, separated by a comma
101 | bad-names=foo,bar,baz,toto,tutu,tata
102 | 
103 | # Colon-delimited sets of names that determine each other's naming style when
104 | # the name regexes allow several styles.
105 | name-group=
106 | 
107 | # Include a hint for the correct naming format with invalid-name
108 | include-naming-hint=no
109 | 
110 | # Regular expression matching correct function names
111 | function-rgx=[a-z_][a-z0-9_]{2,30}$
112 | 
113 | # Naming hint for function names
114 | function-name-hint=[a-z_][a-z0-9_]{2,30}$
115 | 
116 | # Regular expression matching correct variable names
117 | variable-rgx=[a-z_][a-z0-9_]{2,30}$
118 | 
119 | # Naming hint for variable names
120 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$
121 | 
122 | # Regular expression matching correct constant names
123 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
124 | 
125 | # Naming hint for constant names
126 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$
127 | 
128 | # Regular expression matching correct attribute names
129 | attr-rgx=[a-z_][a-z0-9_]{2,30}$
130 | 
131 | # Naming hint for attribute names
132 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$
133 | 
134 | # Regular expression matching correct argument names
135 | argument-rgx=[a-z_][a-z0-9_]{2,30}$
136 | 
137 | # Naming hint for argument names
138 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$
139 | 
140 | # Regular expression matching correct class attribute names
141 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
142 | 
143 | # Naming hint for class attribute names
144 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$
145 | 
146 | # Regular expression matching correct inline iteration names
147 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
148 | 
149 | # Naming hint for inline iteration names
150 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$
151 | 
152 | # Regular expression matching correct class names
153 | class-rgx=[A-Z_][a-zA-Z0-9]+$
154 | 
155 | # Naming hint for class names
156 | class-name-hint=[A-Z_][a-zA-Z0-9]+$
157 | 
158 | # Regular expression matching correct module names
159 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
160 | 
161 | # Naming hint for module names
162 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
163 | 
164 | # Regular expression matching correct method names
165 | method-rgx=[a-z_][a-z0-9_]{2,30}$
166 | 
167 | # Naming hint for method names
168 | method-name-hint=[a-z_][a-z0-9_]{2,30}$
169 | 
170 | # Regular expression which should only match function or class names that do
171 | # not require a docstring.
172 | no-docstring-rgx=^_
173 | 
174 | # Minimum line length for functions/classes that require docstrings, shorter
175 | # ones are exempt.
176 | docstring-min-length=-1
177 | 
178 | 
179 | [ELIF]
180 | 
181 | # Maximum number of nested blocks for function / method body
182 | max-nested-blocks=5
183 | 
184 | 
185 | [LOGGING]
186 | 
187 | # Logging modules to check that the string format arguments are in logging
188 | # function parameter format
189 | logging-modules=logging
190 | 
191 | 
192 | [FORMAT]
193 | 
194 | # Maximum number of characters on a single line.
195 | max-line-length=120
196 | 
197 | # Regexp for a line that is allowed to be longer than the limit.
198 | ignore-long-lines=^\s*(# )?<?https?://\S+>?$
199 | 
200 | # Allow the body of an if to be on the same line as the test if there is no
201 | # else.
202 | single-line-if-stmt=no
203 | 
204 | # List of optional constructs for which whitespace checking is disabled. `dict-
205 | # separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
206 | # `trailing-comma` allows a space between comma and closing bracket: (a, ).
207 | # `empty-line` allows space-only lines.
208 | no-space-check=trailing-comma,dict-separator
209 | 
210 | # Maximum number of lines in a module
211 | max-module-lines=1000
212 | 
213 | # String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
214 | # tab).
215 | indent-string='    '
216 | 
217 | # Number of spaces of indent required inside a hanging  or continued line.
218 | indent-after-paren=4
219 | 
220 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
221 | expected-line-ending-format=
222 | 
223 | 
224 | [SPELLING]
225 | 
226 | # Spelling dictionary name. Available dictionaries: none. To make it working
227 | # install python-enchant package.
228 | spelling-dict=
229 | 
230 | # List of comma separated words that should not be checked.
231 | spelling-ignore-words=
232 | 
233 | # A path to a file that contains private dictionary; one word per line.
234 | spelling-private-dict-file=
235 | 
236 | # Tells whether to store unknown words to indicated private dictionary in
237 | # --spelling-private-dict-file option instead of raising a message.
238 | spelling-store-unknown-words=no
239 | 
240 | 
241 | [MISCELLANEOUS]
242 | 
243 | # List of note tags to take in consideration, separated by a comma.
244 | notes=FIXME,XXX,TODO
245 | 
246 | 
247 | [VARIABLES]
248 | 
249 | # Tells whether we should check for unused import in __init__ files.
250 | init-import=no
251 | 
252 | # A regular expression matching the name of dummy variables (i.e. expectedly
253 | # not used).
254 | dummy-variables-rgx=_$|dummy
255 | 
256 | # List of additional names supposed to be defined in builtins. Remember that
257 | # you should avoid to define new builtins when possible.
258 | additional-builtins=
259 | 
260 | # List of strings which can identify a callback function by name. A callback
261 | # name must start or end with one of those strings.
262 | callbacks=cb_,_cb
263 | 
264 | 
265 | [TYPECHECK]
266 | 
267 | # Tells whether missing members accessed in mixin class should be ignored. A
268 | # mixin class is detected if its name ends with "mixin" (case insensitive).
269 | ignore-mixin-members=yes
270 | 
271 | # List of module names for which member attributes should not be checked
272 | # (useful for modules/projects where namespaces are manipulated during runtime
273 | # and thus existing member attributes cannot be deduced by static analysis. It
274 | # supports qualified module names, as well as Unix pattern matching.
275 | ignored-modules=
276 | 
277 | # List of classes names for which member attributes should not be checked
278 | # (useful for classes with attributes dynamically set). This supports can work
279 | # with qualified names.
280 | ignored-classes=
281 | 
282 | # List of members which are set dynamically and missed by pylint inference
283 | # system, and so shouldn't trigger E1101 when accessed. Python regular
284 | # expressions are accepted.
285 | generated-members=
286 | 
287 | 
288 | [SIMILARITIES]
289 | 
290 | # Minimum lines number of a similarity.
291 | min-similarity-lines=4
292 | 
293 | # Ignore comments when computing similarities.
294 | ignore-comments=yes
295 | 
296 | # Ignore docstrings when computing similarities.
297 | ignore-docstrings=yes
298 | 
299 | # Ignore imports when computing similarities.
300 | ignore-imports=no
301 | 
302 | 
303 | [CLASSES]
304 | 
305 | # List of method names used to declare (i.e. assign) instance attributes.
306 | defining-attr-methods=__init__,__new__,setUp
307 | 
308 | # List of valid names for the first argument in a class method.
309 | valid-classmethod-first-arg=cls
310 | 
311 | # List of valid names for the first argument in a metaclass class method.
312 | valid-metaclass-classmethod-first-arg=mcs
313 | 
314 | # List of member names, which should be excluded from the protected access
315 | # warning.
316 | exclude-protected=_asdict,_fields,_replace,_source,_make
317 | 
318 | 
319 | [IMPORTS]
320 | 
321 | # Deprecated modules which should not be used, separated by a comma
322 | deprecated-modules=regsub,TERMIOS,Bastion,rexec
323 | 
324 | # Create a graph of every (i.e. internal and external) dependencies in the
325 | # given file (report RP0402 must not be disabled)
326 | import-graph=
327 | 
328 | # Create a graph of external dependencies in the given file (report RP0402 must
329 | # not be disabled)
330 | ext-import-graph=
331 | 
332 | # Create a graph of internal dependencies in the given file (report RP0402 must
333 | # not be disabled)
334 | int-import-graph=
335 | 
336 | 
337 | [DESIGN]
338 | 
339 | # Maximum number of arguments for function / method
340 | max-args=5
341 | 
342 | # Argument names that match this expression will be ignored. Default to name
343 | # with leading underscore
344 | ignored-argument-names=_.*
345 | 
346 | # Maximum number of locals for function / method body
347 | max-locals=15
348 | 
349 | # Maximum number of return / yield for function / method body
350 | max-returns=6
351 | 
352 | # Maximum number of branch for function / method body
353 | max-branches=12
354 | 
355 | # Maximum number of statements in function / method body
356 | max-statements=50
357 | 
358 | # Maximum number of parents for a class (see R0901).
359 | max-parents=7
360 | 
361 | # Maximum number of attributes for a class (see R0902).
362 | max-attributes=7
363 | 
364 | # Minimum number of public methods for a class (see R0903).
365 | min-public-methods=2
366 | 
367 | # Maximum number of public methods for a class (see R0904).
368 | max-public-methods=20
369 | 
370 | # Maximum number of boolean expressions in a if statement
371 | max-bool-expr=5
372 | 
373 | 
374 | [EXCEPTIONS]
375 | 
376 | # Exceptions that will emit a warning when being caught. Defaults to
377 | # "Exception"
378 | overgeneral-exceptions=Exception
379 | 


--------------------------------------------------------------------------------
/Pipfile.lock:
--------------------------------------------------------------------------------
  1 | {
  2 |     "_meta": {
  3 |         "hash": {
  4 |             "sha256": "042eb59af1add84b9b64485d9844d8d9f5eae89e83f6824441449783cfb7fae8"
  5 |         },
  6 |         "host-environment-markers": {
  7 |             "implementation_name": "cpython",
  8 |             "implementation_version": "3.5.2",
  9 |             "os_name": "posix",
 10 |             "platform_machine": "x86_64",
 11 |             "platform_python_implementation": "CPython",
 12 |             "platform_release": "4.9.3-040903-generic",
 13 |             "platform_system": "Linux",
 14 |             "platform_version": "#201701120631 SMP Thu Jan 12 11:33:59 UTC 2017",
 15 |             "python_full_version": "3.5.2",
 16 |             "python_version": "3.5",
 17 |             "sys_platform": "linux"
 18 |         },
 19 |         "pipfile-spec": 6,
 20 |         "requires": {},
 21 |         "sources": [
 22 |             {
 23 |                 "name": "pypi",
 24 |                 "url": "https://pypi.python.org/simple",
 25 |                 "verify_ssl": true
 26 |             }
 27 |         ]
 28 |     },
 29 |     "default": {
 30 |         "astroid": {
 31 |             "hashes": [
 32 |                 "sha256:840947ebfa8b58f318d42301cf8c0a20fd794a33b61cc4638e28e9e61ba32f42",
 33 |                 "sha256:71ea07f44df9568a75d0f354c49143a4575d90645e9fead6dfb52c26a85ed13a"
 34 |             ],
 35 |             "version": "==2.3.3"
 36 |         },
 37 |         "atomicwrites": {
 38 |             "hashes": [
 39 |                 "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4",
 40 |                 "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6"
 41 |             ],
 42 |             "version": "==1.3.0"
 43 |         },
 44 |         "attrs": {
 45 |             "hashes": [
 46 |                 "sha256:08a96c641c3a74e44eb59afb61a24f2cb9f4d7188748e76ba4bb5edfa3cb7d1c",
 47 |                 "sha256:f7b7ce16570fe9965acd6d30101a28f62fb4a7f9e926b3bbc9b61f8b04247e72"
 48 |             ],
 49 |             "version": "==19.3.0"
 50 |         },
 51 |         "certifi": {
 52 |             "hashes": [
 53 |                 "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef",
 54 |                 "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50"
 55 |             ],
 56 |             "version": "==2019.9.11"
 57 |         },
 58 |         "chardet": {
 59 |             "hashes": [
 60 |                 "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691",
 61 |                 "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"
 62 |             ],
 63 |             "version": "==3.0.4"
 64 |         },
 65 |         "codecov": {
 66 |             "hashes": [
 67 |                 "sha256:ae00d68e18d8a20e9c3288ba3875ae03db3a8e892115bf9b83ef20507732bed4",
 68 |                 "sha256:8ed8b7c6791010d359baed66f84f061bba5bd41174bf324c31311e8737602788"
 69 |             ],
 70 |             "version": "==2.0.15"
 71 |         },
 72 |         "coverage": {
 73 |             "hashes": [
 74 |                 "sha256:eee64c616adeff7db37cc37da4180a3a5b6177f5c46b187894e633f088fb5b28",
 75 |                 "sha256:ef824cad1f980d27f26166f86856efe11eff9912c4fed97d3804820d43fa550c",
 76 |                 "sha256:9a334d6c83dfeadae576b4d633a71620d40d1c379129d587faa42ee3e2a85cce",
 77 |                 "sha256:7494b0b0274c5072bddbfd5b4a6c6f18fbbe1ab1d22a41e99cd2d00c8f96ecfe",
 78 |                 "sha256:826f32b9547c8091679ff292a82aca9c7b9650f9fda3e2ca6bf2ac905b7ce888",
 79 |                 "sha256:dd579709a87092c6dbee09d1b7cfa81831040705ffa12a1b248935274aee0437",
 80 |                 "sha256:08907593569fe59baca0bf152c43f3863201efb6113ecb38ce7e97ce339805a6",
 81 |                 "sha256:63a9a5fc43b58735f65ed63d2cf43508f462dc49857da70b8980ad78d41d52fc",
 82 |                 "sha256:e2ede7c1d45e65e209d6093b762e98e8318ddeff95317d07a27a2140b80cfd24",
 83 |                 "sha256:6b62544bb68106e3f00b21c8930e83e584fdca005d4fffd29bb39fb3ffa03cb5",
 84 |                 "sha256:331cb5115673a20fb131dadd22f5bcaf7677ef758741312bee4937d71a14b2ef",
 85 |                 "sha256:bf1ef9eb901113a9805287e090452c05547578eaab1b62e4ad456fcc049a9b7e",
 86 |                 "sha256:386e2e4090f0bc5df274e720105c342263423e77ee8826002dcffe0c9533dbca",
 87 |                 "sha256:fa964bae817babece5aa2e8c1af841bebb6d0b9add8e637548809d040443fee0",
 88 |                 "sha256:df6712284b2e44a065097846488f66840445eb987eb81b3cc6e4149e7b6982e1",
 89 |                 "sha256:efc89291bd5a08855829a3c522df16d856455297cf35ae827a37edac45f466a7",
 90 |                 "sha256:e4ef9c164eb55123c62411f5936b5c2e521b12356037b6e1c2617cef45523d47",
 91 |                 "sha256:ff37757e068ae606659c28c3bd0d923f9d29a85de79bf25b2b34b148473b5025",
 92 |                 "sha256:bf0a7aed7f5521c7ca67febd57db473af4762b9622254291fbcbb8cd0ba5e33e",
 93 |                 "sha256:19e4df788a0581238e9390c85a7a09af39c7b539b29f25c89209e6c3e371270d",
 94 |                 "sha256:60851187677b24c6085248f0a0b9b98d49cba7ecc7ec60ba6b9d2e5574ac1ee9",
 95 |                 "sha256:245388cda02af78276b479f299bbf3783ef0a6a6273037d7c60dc73b8d8d7755",
 96 |                 "sha256:c0afd27bc0e307a1ffc04ca5ec010a290e49e3afbe841c5cafc5c5a80ecd81c9",
 97 |                 "sha256:6ba744056423ef8d450cf627289166da65903885272055fb4b5e113137cfa14f",
 98 |                 "sha256:af7ed8a8aa6957aac47b4268631fa1df984643f07ef00acd374e456364b373f5",
 99 |                 "sha256:3a794ce50daee01c74a494919d5ebdc23d58873747fa0e288318728533a3e1ca",
100 |                 "sha256:0be0f1ed45fc0c185cfd4ecc19a1d6532d72f86a2bac9de7e24541febad72650",
101 |                 "sha256:eca2b7343524e7ba246cab8ff00cab47a2d6d54ada3b02772e908a45675722e2",
102 |                 "sha256:93715dffbcd0678057f947f496484e906bf9509f5c1c38fc9ba3922893cda5f5",
103 |                 "sha256:23cc09ed395b03424d1ae30dcc292615c1372bfba7141eb85e11e50efaa6b351",
104 |                 "sha256:141f08ed3c4b1847015e2cd62ec06d35e67a3ac185c26f7635f4406b90afa9c5",
105 |                 "sha256:e07d9f1a23e9e93ab5c62902833bf3e4b1f65502927379148b6622686223125c"
106 |             ],
107 |             "version": "==4.5.4"
108 |         },
109 |         "idna": {
110 |             "hashes": [
111 |                 "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c",
112 |                 "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407"
113 |             ],
114 |             "version": "==2.8"
115 |         },
116 |         "importlib-metadata": {
117 |             "hashes": [
118 |                 "sha256:d5f18a79777f3aa179c145737780282e27b508fc8fd688cb17c7a813e8bd39af",
119 |                 "sha256:aa18d7378b00b40847790e7c27e11673d7fed219354109d0e7b9e5b25dc3ad26"
120 |             ],
121 |             "markers": "python_version < '3.8'",
122 |             "version": "==0.23"
123 |         },
124 |         "isort": {
125 |             "hashes": [
126 |                 "sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd",
127 |                 "sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1"
128 |             ],
129 |             "version": "==4.3.21"
130 |         },
131 |         "lazy-object-proxy": {
132 |             "hashes": [
133 |                 "sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442",
134 |                 "sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d",
135 |                 "sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4",
136 |                 "sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a",
137 |                 "sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a",
138 |                 "sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e",
139 |                 "sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357",
140 |                 "sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50",
141 |                 "sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db",
142 |                 "sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449",
143 |                 "sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156",
144 |                 "sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531",
145 |                 "sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb",
146 |                 "sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08",
147 |                 "sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383",
148 |                 "sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142",
149 |                 "sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea",
150 |                 "sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62",
151 |                 "sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd",
152 |                 "sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239",
153 |                 "sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0"
154 |             ],
155 |             "version": "==1.4.3"
156 |         },
157 |         "mccabe": {
158 |             "hashes": [
159 |                 "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
160 |                 "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
161 |             ],
162 |             "version": "==0.6.1"
163 |         },
164 |         "more-itertools": {
165 |             "hashes": [
166 |                 "sha256:92b8c4b06dac4f0611c0729b2f2ede52b2e1bac1ab48f089c7ddc12e26bb60c4",
167 |                 "sha256:409cd48d4db7052af495b09dec721011634af3753ae1ef92d2b32f73a745f832"
168 |             ],
169 |             "version": "==7.2.0"
170 |         },
171 |         "numpy": {
172 |             "hashes": [
173 |                 "sha256:ede47b98de79565fcd7f2decb475e2dcc85ee4097743e551fe26cfc7eb3ff143",
174 |                 "sha256:43bb4b70585f1c2d153e45323a886839f98af8bfa810f7014b20be714c37c447",
175 |                 "sha256:c7354e8f0eca5c110b7e978034cd86ed98a7a5ffcf69ca97535445a595e07b8e",
176 |                 "sha256:64874913367f18eb3013b16123c9fed113962e75d809fca5b78ebfbb73ed93ba",
177 |                 "sha256:6ca4000c4a6f95a78c33c7dadbb9495c10880be9c89316aa536eac359ab820ae",
178 |                 "sha256:75fd817b7061f6378e4659dd792c84c0b60533e867f83e0d1e52d5d8e53df88c",
179 |                 "sha256:7d81d784bdbed30137aca242ab307f3e65c8d93f4c7b7d8f322110b2e90177f9",
180 |                 "sha256:fe39f5fd4103ec4ca3cb8600b19216cd1ff316b4990f4c0b6057ad982c0a34d5",
181 |                 "sha256:e467c57121fe1b78a8f68dd9255fbb3bb3f4f7547c6b9e109f31d14569f490c3",
182 |                 "sha256:8d0af8d3664f142414fd5b15cabfd3b6cc3ef242a3c7a7493257025be5a6955f",
183 |                 "sha256:9679831005fb16c6df3dd35d17aa31dc0d4d7573d84f0b44cc481490a65c7725",
184 |                 "sha256:acbf5c52db4adb366c064d0b7c7899e3e778d89db585feadd23b06b587d64761",
185 |                 "sha256:3d52298d0be333583739f1aec9026f3b09fdfe3ddf7c7028cb16d9d2af1cca7e",
186 |                 "sha256:475963c5b9e116c38ad7347e154e5651d05a2286d86455671f5b1eebba5feb76",
187 |                 "sha256:0c0763787133dfeec19904c22c7e358b231c87ba3206b211652f8cbe1241deb6",
188 |                 "sha256:683828e50c339fc9e68720396f2de14253992c495fdddef77a1e17de55f1decc",
189 |                 "sha256:e2e9d8c87120ba2c591f60e32736b82b67f72c37ba88a4c23c81b5b8fa49c018",
190 |                 "sha256:a8f67ebfae9f575d85fa859b54d3bdecaeece74e3274b0b5c5f804d7ca789fe1",
191 |                 "sha256:0a7a1dd123aecc9f0076934288ceed7fd9a81ba3919f11a855a7887cbe82a02f",
192 |                 "sha256:ada4805ed51f5bcaa3a06d3dd94939351869c095e30a2b54264f5a5004b52170",
193 |                 "sha256:f58913e9227400f1395c7b800503ebfdb0772f1c33ff8cb4d6451c06cabdf316"
194 |             ],
195 |             "version": "==1.17.4"
196 |         },
197 |         "packaging": {
198 |             "hashes": [
199 |                 "sha256:d9551545c6d761f3def1677baf08ab2a3ca17c56879e70fecba2fc4dde4ed108",
200 |                 "sha256:28b924174df7a2fa32c1953825ff29c61e2f5e082343165438812f00d3a7fc47"
201 |             ],
202 |             "version": "==19.2"
203 |         },
204 |         "pathlib2": {
205 |             "hashes": [
206 |                 "sha256:0ec8205a157c80d7acc301c0b18fbd5d44fe655968f5d947b6ecef5290fc35db",
207 |                 "sha256:6cd9a47b597b37cc57de1c05e56fb1a1c9cc9fab04fe78c29acd090418529868"
208 |             ],
209 |             "markers": "python_version < '3.6'",
210 |             "version": "==2.3.5"
211 |         },
212 |         "pluggy": {
213 |             "hashes": [
214 |                 "sha256:0db4b7601aae1d35b4a033282da476845aa19185c1e6964b25cf324b5e4ec3e6",
215 |                 "sha256:fa5fa1622fa6dd5c030e9cad086fa19ef6a0cf6d7a2d12318e10cb49d6d68f34"
216 |             ],
217 |             "version": "==0.13.0"
218 |         },
219 |         "py": {
220 |             "hashes": [
221 |                 "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa",
222 |                 "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53"
223 |             ],
224 |             "version": "==1.8.0"
225 |         },
226 |         "pyarrow": {
227 |             "hashes": [
228 |                 "sha256:b508b860486f75bcfeab72b98b4d8caa3a1517e5b7a9b3adcd5bc4539bff8a1a",
229 |                 "sha256:2964a3fe09fbe704160734d00bef7b023699dc6a603dc8eb889b095effc464db",
230 |                 "sha256:87a2324a6e41faff3a482dbfc54a1f51bbf2d7da39ee728ec73869e2ef892a97",
231 |                 "sha256:1f3934b2add6839844443c1ac0eba64e14b2b8253563574d45d6831851b11d47",
232 |                 "sha256:f5af4cd64c774693af560576a6b8039d165596b1921031ca5d739bd2e7e0554b",
233 |                 "sha256:14dbc00edd14133c15d62c8d6c566a82a7497b077f253fc0c2dad62c7f85beaa",
234 |                 "sha256:4f0276e258065c82dcb7edfc28c343ccad15da02b25e57e7c60ceb80e3f7268b",
235 |                 "sha256:bc7200f7a97aea7301f61cd616b33069d1098e6d9178db6a34ccd43ea9223f53",
236 |                 "sha256:13f921560bac5ad46b17513696e38fede0c0e92ba750c7b350c0b231815bb706",
237 |                 "sha256:c70f7d0032be960d8dbd32661a9de062af184f411400ea2f4a13883ca11b0b1f",
238 |                 "sha256:030d67418b129eb14a1c1f1af06b1a48c8074005d704789725ea6f5addaf3b26",
239 |                 "sha256:5f6026673ceaa037cb41fbe86ce7ea6483cfdc91e51dea929fbbf81883a73d96",
240 |                 "sha256:41cf5ed34012c43b4ceeeeb2534e3454c77e852bc9175d2e506b45bad132db49",
241 |                 "sha256:4fa03d2bc725e948f361a8ce7de271e39d90130ee3a3375793ac241b452c5bfa",
242 |                 "sha256:364806e26769ca20a79b1ead301c7ce28fd0534eb6d411d441053288d7e45817",
243 |                 "sha256:17cda6ba594acf5a72058dd2e5ca2586fe8781fc8d20bd750a3b7c66c8b274b2",
244 |                 "sha256:5a07222b80ae36219c558cb8875e7e346f779d0862ae277c68899db879cf5cd7",
245 |                 "sha256:7ad074690ba38313067bf3bbda1258966d38e2037c035d08b9ffe3cce07747a5"
246 |             ],
247 |             "version": "==0.15.1"
248 |         },
249 |         "pylint": {
250 |             "hashes": [
251 |                 "sha256:886e6afc935ea2590b462664b161ca9a5e40168ea99e5300935f6591ad467df4",
252 |                 "sha256:3db5468ad013380e987410a8d6956226963aed94ecb5f9d3a28acca6d9ac36cd"
253 |             ],
254 |             "version": "==2.4.4"
255 |         },
256 |         "pyparsing": {
257 |             "hashes": [
258 |                 "sha256:20f995ecd72f2a1f4bf6b072b63b22e2eb457836601e76d6e5dfcd75436acc1f",
259 |                 "sha256:4ca62001be367f01bd3e92ecbb79070272a9d4964dce6a48a82ff0b8bc7e683a"
260 |             ],
261 |             "version": "==2.4.5"
262 |         },
263 |         "pytest": {
264 |             "hashes": [
265 |                 "sha256:8e256fe71eb74e14a4d20a5987bb5e1488f0511ee800680aaedc62b9358714e8",
266 |                 "sha256:ff0090819f669aaa0284d0f4aad1a6d9d67a6efdc6dd4eb4ac56b704f890a0d6"
267 |             ],
268 |             "version": "==5.2.4"
269 |         },
270 |         "pytest-cov": {
271 |             "hashes": [
272 |                 "sha256:cdbdef4f870408ebdbfeb44e63e07eb18bb4619fae852f6e760645fa36172626",
273 |                 "sha256:cc6742d8bac45070217169f5f72ceee1e0e55b0221f54bcf24845972d3a47f2b"
274 |             ],
275 |             "version": "==2.8.1"
276 |         },
277 |         "pytz": {
278 |             "hashes": [
279 |                 "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
280 |                 "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
281 |             ],
282 |             "version": "==2019.3"
283 |         },
284 |         "requests": {
285 |             "hashes": [
286 |                 "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31",
287 |                 "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4"
288 |             ],
289 |             "version": "==2.22.0"
290 |         },
291 |         "six": {
292 |             "hashes": [
293 |                 "sha256:1f1b7d42e254082a9db6279deae68afb421ceba6158efa6131de7b3003ee93fd",
294 |                 "sha256:30f610279e8b2578cab6db20741130331735c781b56053c59c4076da27f06b66"
295 |             ],
296 |             "version": "==1.13.0"
297 |         },
298 |         "typed-ast": {
299 |             "hashes": [
300 |                 "sha256:262c247a82d005e43b5b7f69aff746370538e176131c32dda9cb0f324d27141e",
301 |                 "sha256:71211d26ffd12d63a83e079ff258ac9d56a1376a25bc80b1cdcdf601b855b90b",
302 |                 "sha256:630968c5cdee51a11c05a30453f8cd65e0cc1d2ad0d9192819df9978984529f4",
303 |                 "sha256:ffde2fbfad571af120fcbfbbc61c72469e72f550d676c3342492a9dfdefb8f12",
304 |                 "sha256:4e0b70c6fc4d010f8107726af5fd37921b666f5b31d9331f0bd24ad9a088e631",
305 |                 "sha256:bc6c7d3fa1325a0c6613512a093bc2a2a15aeec350451cbdf9e1d4bffe3e3233",
306 |                 "sha256:cc34a6f5b426748a507dd5d1de4c1978f2eb5626d51326e43280941206c209e1",
307 |                 "sha256:d896919306dd0aa22d0132f62a1b78d11aaf4c9fc5b3410d3c666b818191630a",
308 |                 "sha256:354c16e5babd09f5cb0ee000d54cfa38401d8b8891eefa878ac772f827181a3c",
309 |                 "sha256:95bd11af7eafc16e829af2d3df510cecfd4387f6453355188342c3e79a2ec87a",
310 |                 "sha256:18511a0b3e7922276346bcb47e2ef9f38fb90fd31cb9223eed42c85d1312344e",
311 |                 "sha256:d7c45933b1bdfaf9f36c579671fec15d25b06c8398f113dab64c18ed1adda01d",
312 |                 "sha256:d755f03c1e4a51e9b24d899561fec4ccaf51f210d52abdf8c07ee2849b212a36",
313 |                 "sha256:2b907eb046d049bcd9892e3076c7a6456c93a25bebfe554e931620c90e6a25b0",
314 |                 "sha256:fdc1c9bbf79510b76408840e009ed65958feba92a88833cdceecff93ae8fff66",
315 |                 "sha256:7954560051331d003b4e2b3eb822d9dd2e376fa4f6d98fee32f452f52dd6ebb2",
316 |                 "sha256:48e5b1e71f25cfdef98b013263a88d7145879fbb2d5185f2a0c79fa7ebbeae47",
317 |                 "sha256:1170afa46a3799e18b4c977777ce137bb53c7485379d9706af8a59f2ea1aa161",
318 |                 "sha256:838997f4310012cf2e1ad3803bce2f3402e9ffb71ded61b5ee22617b3a7f6b6e",
319 |                 "sha256:66480f95b8167c9c5c5c87f32cf437d585937970f3fc24386f313a4c97b44e34"
320 |             ],
321 |             "markers": "implementation_name == 'cpython' and python_version < '3.8'",
322 |             "version": "==1.4.0"
323 |         },
324 |         "urllib3": {
325 |             "hashes": [
326 |                 "sha256:a8a318824cc77d1fd4b2bec2ded92646630d7fe8619497b142c84a9e6f5a7293",
327 |                 "sha256:f3c5fd51747d450d4dcf6f923c81f78f811aab8205fda64b0aba34a4e48b0745"
328 |             ],
329 |             "version": "==1.25.7"
330 |         },
331 |         "wcwidth": {
332 |             "hashes": [
333 |                 "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c",
334 |                 "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e"
335 |             ],
336 |             "version": "==0.1.7"
337 |         },
338 |         "wrapt": {
339 |             "hashes": [
340 |                 "sha256:565a021fd19419476b9362b05eeaa094178de64f8361e44468f9e9d7843901e1"
341 |             ],
342 |             "version": "==1.11.2"
343 |         },
344 |         "zipp": {
345 |             "hashes": [
346 |                 "sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335",
347 |                 "sha256:3718b1cbcd963c7d4c5511a8240812904164b7f381b647143a89d3b98f9bcd8e"
348 |             ],
349 |             "version": "==0.6.0"
350 |         }
351 |     },
352 |     "develop": {}
353 | }
354 | 


--------------------------------------------------------------------------------