├── requirements
    ├── prod.txt
    ├── dev.in
    ├── ci.in
    ├── lint.in
    ├── lint.txt
    ├── ci.txt
    └── dev.txt
├── mutmut-test.sh
├── .gitattributes
├── tests
    ├── files
    │   ├── example.png
    │   ├── example.pickle
    │   ├── example.jsonl
    │   ├── example.csv
    │   └── example.json
    ├── test_ml.py
    ├── test_path.py
    ├── test_decorators.py
    ├── test_shell.py
    ├── test_image.py
    ├── test_string_trie.py
    ├── test_mutmut_killers.py
    ├── conftest.py
    ├── test_char_trie.py
    ├── test_datetime.py
    ├── test_aws.py
    ├── test_string.py
    ├── test_trie.py
    ├── test_main.py
    ├── test_math.py
    ├── test_nodebased_trie.py
    ├── test_pd.py
    ├── test_units.py
    └── test_io.py
├── docs
    ├── requirements.in
    ├── source
    │   ├── mpu.rst
    │   ├── io.rst
    │   ├── ml.rst
    │   ├── pd.rst
    │   ├── aws.rst
    │   ├── image.rst
    │   ├── math.rst
    │   ├── path.rst
    │   ├── shell.rst
    │   ├── type.rst
    │   ├── string.rst
    │   ├── datetime.rst
    │   ├── geometry.rst
    │   ├── decorators.rst
    │   ├── datastructures.rst
    │   ├── index.rst
    │   ├── units.rst
    │   └── conf.py
    ├── requirements.txt
    └── Makefile
├── mpu
    ├── data
    │   ├── laguages.csv.gz
    │   ├── sources.txt
    │   └── iban.csv
    ├── _version.py
    ├── datastructures
    │   └── trie
    │   │   ├── base.py
    │   │   ├── full_prefix_dict.py
    │   │   ├── __init__.py
    │   │   ├── char_trie.py
    │   │   └── string_trie.py
    ├── image.py
    ├── type.py
    ├── decorators.py
    ├── path.py
    ├── ml.py
    ├── datetime.py
    ├── shell.py
    ├── aws.py
    ├── math.py
    ├── __init__.py
    ├── pd.py
    ├── geometry.py
    ├── string.py
    └── io.py
├── requirements.txt
├── .pylintrc
├── create_package.sh
├── .travis.yml
├── tox.ini
├── .isort.cfg
├── .circleci
    └── config.yml
├── .readthedocs.yml
├── .coveragerc
├── setup.py
├── azure-pipelines.yml
├── Makefile
├── LICENSE
├── .github
    └── workflows
    │   └── python-package.yml
├── .pre-commit-config.yaml
├── .gitignore
├── setup.cfg
└── README.md


/requirements/prod.txt:
--------------------------------------------------------------------------------
1 | -r ../requirements.txt
2 | 


--------------------------------------------------------------------------------
/mutmut-test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | mypy mpu/
3 | pytest -x
4 | 


--------------------------------------------------------------------------------
/requirements/dev.in:
--------------------------------------------------------------------------------
1 | -r ci.txt
2 | pip-tools
3 | pre-commit
4 | wheel
5 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py encoding=utf-8
2 | *.json encoding=utf-8
3 | *.csv encoding=utf-8
4 | 


--------------------------------------------------------------------------------
/tests/files/example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MartinThoma/mpu/HEAD/tests/files/example.png


--------------------------------------------------------------------------------
/docs/requirements.in:
--------------------------------------------------------------------------------
1 | pandas
2 | sphinx_rtd_theme>=0.3.1
3 | boto3
4 | Sphinx
5 | typing_extensions
6 | 


--------------------------------------------------------------------------------
/mpu/data/laguages.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MartinThoma/mpu/HEAD/mpu/data/laguages.csv.gz


--------------------------------------------------------------------------------
/tests/files/example.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MartinThoma/mpu/HEAD/tests/files/example.pickle


--------------------------------------------------------------------------------
/docs/source/mpu.rst:
--------------------------------------------------------------------------------
1 | mpu
2 | ---
3 | 
4 | .. automodule:: mpu
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/mpu/data/sources.txt:
--------------------------------------------------------------------------------
1 | ## languages.csv.gz
2 | 
3 | Credits to https://github.com/annexare/Countries/blob/master/data/languages.json
4 | 


--------------------------------------------------------------------------------
/docs/source/io.rst:
--------------------------------------------------------------------------------
1 | mpu.io
2 | ------
3 | 
4 | .. automodule:: mpu.io
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/ml.rst:
--------------------------------------------------------------------------------
1 | mpu.ml
2 | ------
3 | 
4 | .. automodule:: mpu.ml
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/pd.rst:
--------------------------------------------------------------------------------
1 | mpu.pd
2 | ------
3 | 
4 | .. automodule:: mpu.pd
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/mpu/_version.py:
--------------------------------------------------------------------------------
1 | """Store the version for setup.py and the module itself."""
2 | __version__ = "0.23.0"  # keep in sync with ../setup.py
3 | 


--------------------------------------------------------------------------------
/docs/source/aws.rst:
--------------------------------------------------------------------------------
1 | mpu.aws
2 | --------
3 | 
4 | .. automodule:: mpu.aws
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/tests/files/example.jsonl:
--------------------------------------------------------------------------------
1 | {"some":"thing"}
2 | {"foo":17,"bar":false,"quux":true}
3 | {"may":{"include":"nested","objects":["and","arrays"]}}
4 | 


--------------------------------------------------------------------------------
/docs/source/image.rst:
--------------------------------------------------------------------------------
1 | mpu.image
2 | ---------
3 | 
4 | .. automodule:: mpu.image
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/math.rst:
--------------------------------------------------------------------------------
1 | mpu.math
2 | --------
3 | 
4 | .. automodule:: mpu.math
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/path.rst:
--------------------------------------------------------------------------------
1 | mpu.path
2 | --------
3 | 
4 | .. automodule:: mpu.path
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/shell.rst:
--------------------------------------------------------------------------------
1 | mpu.shell
2 | ---------
3 | 
4 | .. automodule:: mpu.shell
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/type.rst:
--------------------------------------------------------------------------------
1 | mpu.type
2 | --------
3 | 
4 | .. automodule:: mpu.type
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | #
2 | # This file is autogenerated by pip-compile with python 3.7
3 | # To update, run:
4 | #
5 | #    pip-compile setup.py
6 | #
7 | 


--------------------------------------------------------------------------------
/docs/source/string.rst:
--------------------------------------------------------------------------------
1 | mpu.string
2 | ----------
3 | 
4 | .. automodule:: mpu.string
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/datetime.rst:
--------------------------------------------------------------------------------
1 | mpu.datetime
2 | ------------
3 | 
4 | .. automodule:: mpu.datetime
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/geometry.rst:
--------------------------------------------------------------------------------
1 | mpu.geometry
2 | ------------
3 | 
4 | .. automodule:: mpu.geometry
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/decorators.rst:
--------------------------------------------------------------------------------
1 | mpu.decorators
2 | --------------
3 | 
4 | .. automodule:: mpu.decorators
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/.pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 | load-plugins=pylint.extensions.mccabe
3 | 
4 | [MESSAGES CONTROL]
5 | disable=R0205,R1705,C0411,C0413,C0103,C1801,C0325,bad-continuation,logging-fstring-interpolation
6 | 


--------------------------------------------------------------------------------
/docs/source/datastructures.rst:
--------------------------------------------------------------------------------
1 | mpu.datastructures
2 | ------------------
3 | 
4 | .. automodule:: mpu.datastructures
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/requirements/ci.in:
--------------------------------------------------------------------------------
 1 | -r prod.txt
 2 | boto3>=1.7.84
 3 | hypothesis
 4 | moto>=1.3.3
 5 | pandas
 6 | pip-tools
 7 | pytest
 8 | pytest-cov
 9 | pytest-timeout
10 | pytest-benchmark
11 | simplejson
12 | twine
13 | wheel
14 | 


--------------------------------------------------------------------------------
/tests/files/example.csv:
--------------------------------------------------------------------------------
1 | a,b,c
2 | 1,"A towel,",1.0
3 | 42," it says, ",2.0
4 | 1337,is about the most ,-1
5 | 0,massively useful thing ,123
6 | -2,"an interstellar hitchhiker can have.
7 | ",3
8 | 3.141,Special char test: €üößł,2.7
9 | 


--------------------------------------------------------------------------------
/create_package.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | rm -rf lambda.zip;
4 | rm -rf venv-lambda && python3 -m venv venv-lambda && source venv-lambda/bin/activate && pip install . --upgrade && deactivate
5 | cd venv-lambda/lib/python3.8/site-packages/;
6 | zip -ur -D ../../../../lambda.zip mpu;
7 | 


--------------------------------------------------------------------------------
/tests/files/example.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "a list": [
 3 |         1,
 4 |         42,
 5 |         3.141,
 6 |         1337,
 7 |         "help",
 8 |         "€"
 9 |     ],
10 |     "a string": "bla",
11 |     "another dict": {
12 |         "foo": "bar",
13 |         "key": "value",
14 |         "the answer": 42
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - 3.7
 4 |   - 3.8
 5 |   - 3.9
 6 | before_install:
 7 |   - sudo rm -f /etc/boto.cfg  # https://github.com/travis-ci/travis-ci/issues/7940#issuecomment-310759657
 8 | install:
 9 |   - pip install coveralls tox-travis
10 | script:
11 |   - tox
12 | after_success:
13 |   - coveralls
14 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = linter,py37,py38,py39
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     -r requirements/ci.txt
 7 | commands =
 8 |     pip install -e .[all]
 9 |     pytest .
10 | 
11 | [testenv:linter]
12 | deps =
13 |     -r requirements/lint.txt
14 | commands =
15 |     flake8
16 |     black --check .
17 |     pydocstyle
18 |     mypy .
19 | 


--------------------------------------------------------------------------------
/requirements/lint.in:
--------------------------------------------------------------------------------
 1 | black
 2 | flake8
 3 | flake8_implicit_str_concat
 4 | flake8-assert-msg
 5 | flake8-bugbear
 6 | flake8-builtins
 7 | flake8-comprehensions
 8 | flake8-eradicate
 9 | flake8-executable
10 | flake8-isort
11 | flake8-pytest-style
12 | flake8-raise
13 | flake8-simplify
14 | flake8-string-format
15 | mccabe
16 | mypy
17 | pydocstyle
18 | 


--------------------------------------------------------------------------------
/mpu/datastructures/trie/base.py:
--------------------------------------------------------------------------------
 1 | # Core Library
 2 | from abc import abstractmethod
 3 | from collections.abc import Collection
 4 | from typing import List
 5 | 
 6 | 
 7 | class AbstractTrie(Collection):
 8 |     @abstractmethod
 9 |     def autocomplete(self, prefix: str) -> List[str]:
10 |         """Return a list of all words with the given prefix."""
11 | 


--------------------------------------------------------------------------------
/tests/test_ml.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | # Third party
 5 | import pytest
 6 | 
 7 | # First party
 8 | import mpu.ml
 9 | 
10 | 
11 | def test_negative_class_number():
12 |     with pytest.raises(ValueError):
13 |         mpu.ml.indices2one_hot([0, 1, 1], 0)
14 | 
15 | 
16 | def test_indices2one_hot():
17 |     assert mpu.ml.indices2one_hot([0, 0], 1) == [[1], [1]]
18 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
 1 | [settings]
 2 | line_length=79
 3 | indent='    '
 4 | multi_line_output=3
 5 | length_sort=0
 6 | import_heading_stdlib=Core Library
 7 | import_heading_firstparty=First party
 8 | import_heading_thirdparty=Third party
 9 | import_heading_localfolder=Local
10 | known_third_party = boto3,hypothesis,moto,pandas,pkg_resources,pytest,pytz,setuptools,simplejson,typing_extensions
11 | include_trailing_comma=True
12 | 


--------------------------------------------------------------------------------
/tests/test_path.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Third party
 4 | import pkg_resources
 5 | 
 6 | # First party
 7 | from mpu.path import get_all_files, get_from_package
 8 | 
 9 | 
10 | def test_get_meta():
11 |     path = "files"
12 |     root = pkg_resources.resource_filename(__name__, path)
13 |     meta = get_all_files(root)
14 |     assert len(meta) == 5
15 | 
16 | 
17 | def test_get_from_package():
18 |     get_from_package("mpu", "data/iban.csv")
19 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | orbs:
 4 |   python: circleci/python@0.2.1
 5 | 
 6 | jobs:
 7 |   build-and-test:
 8 |     executor: python/default
 9 |     steps:
10 |       - checkout
11 |       - run:
12 |           command: pip install -r requirements/ci.txt
13 |           name: Install Test requirements-dev
14 |       - run:
15 |           command: pip install -e .[all]
16 |           name: Install Pacakge
17 |       - run:
18 |           command: pytest
19 |           name: Test
20 | 
21 | workflows:
22 |   main:
23 |     jobs:
24 |       - build-and-test
25 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: docs/source/conf.py
11 | 
12 | # Don't build any extra formats
13 | formats:
14 |     - htmlzip
15 |     - pdf
16 | 
17 | # Optionally set the version of Python and requirements required to build your docs
18 | python:
19 |   version: 3.8
20 |   install:
21 |     - requirements: docs/requirements.txt
22 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | source = mpu
 3 | branch = True
 4 | 
 5 | [report]
 6 | # Regexes for lines to exclude from consideration
 7 | exclude_lines =
 8 |     # Have to re-enable the standard pragma
 9 |     pragma: no cover
10 |     @overload
11 | 
12 |     # Don't complain about missing debug-only code:
13 |     def __repr__
14 |     def __str__
15 |     if self\.debug
16 | 
17 |     # Don't complain if tests don't hit defensive assertion code:
18 |     raise AssertionError
19 |     raise NotImplementedError
20 | 
21 |     # Don't complain if non-runnable code isn't run:
22 |     if 0:
23 |     if __name__ == .__main__.:
24 | 


--------------------------------------------------------------------------------
/tests/test_decorators.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Core Library
 4 | import warnings
 5 | 
 6 | # First party
 7 | from mpu.decorators import deprecated, timing
 8 | 
 9 | 
10 | def test_timing():
11 |     @timing
12 |     def fib(n):
13 |         if n < 1:
14 |             return n
15 |         else:
16 |             return fib(n - 1) + fib(n - 2)
17 | 
18 |     fib(2)
19 | 
20 | 
21 | def test_deprecated():
22 |     with warnings.catch_warnings(record=True):
23 | 
24 |         @deprecated
25 |         def fib(n):
26 |             if n < 1:
27 |                 return n
28 |             else:
29 |                 return fib(n - 1) + fib(n - 2)
30 | 
31 |         fib(2)
32 | 


--------------------------------------------------------------------------------
/tests/test_shell.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Core Library
 4 | import sys
 5 | from io import StringIO
 6 | 
 7 | # First party
 8 | from mpu.shell import Codes, text_input
 9 | 
10 | 
11 | def test_codes():
12 |     s = Codes.BOLD + Codes.GREEN + "WORKS!" + Codes.RESET_ALL
13 |     assert isinstance(s, str)
14 | 
15 | 
16 | def test_codes_start_with_esc():
17 |     ESC = "\033"  # https://askubuntu.com/q/831971/10425
18 |     codes_obj = Codes()
19 |     codes = [a for a in dir(codes_obj) if not a.startswith("__")]
20 |     for code in codes:
21 |         assert Codes.__dict__[code].startswith(ESC)
22 | 
23 | 
24 | def test_text_input():
25 |     sys.stdin = StringIO("foo\nbar")
26 |     text_input("foo")
27 | 


--------------------------------------------------------------------------------
/tests/test_image.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Core Library
 4 | import sys
 5 | from unittest import mock
 6 | 
 7 | # Third party
 8 | import pkg_resources
 9 | 
10 | # First party
11 | from mpu.image import get_meta
12 | 
13 | # def test_get_meta():
14 | #     path = "files/example.png"
15 | #     source = pkg_resources.resource_filename(__name__, path)
16 | #     meta = get_meta(source)
17 | #     meta["file"] = None
18 | #     assert meta == {"width": 252, "height": 167, "channels": 4, "file": None}
19 | 
20 | 
21 | # def test_import_error():
22 | #     path = "files/example.png"
23 | #     source = pkg_resources.resource_filename(__name__, path)
24 | #     with mock.patch.dict(sys.modules, {"PIL": None}):
25 | #         meta = get_meta(source)
26 | #     meta["file"] = None
27 | #     assert meta == {"file": None}
28 | 


--------------------------------------------------------------------------------
/mpu/image.py:
--------------------------------------------------------------------------------
 1 | """Image manipulation."""
 2 | 
 3 | # Core Library
 4 | from typing import Dict
 5 | 
 6 | # First party
 7 | import mpu
 8 | 
 9 | 
10 | def get_meta(filepath: str) -> Dict:
11 |     """
12 |     Get meta-information of an image.
13 | 
14 |     Parameters
15 |     ----------
16 |     filepath : str
17 | 
18 |     Returns
19 |     -------
20 |     meta : Dict
21 |     """
22 |     meta = {}
23 |     try:
24 |         # Third party
25 |         from PIL import Image
26 | 
27 |         with Image.open(filepath) as img:
28 |             width, height = img.size
29 |         meta["width"] = width
30 |         meta["height"] = height
31 |         meta["channels"] = len(img.mode)  # RGB, RGBA - does this always work?
32 |     except ImportError:
33 |         pass
34 | 
35 |     # Get times - creation, last edit, last open
36 |     meta["file"] = mpu.io.get_file_meta(filepath)
37 |     return meta
38 | 


--------------------------------------------------------------------------------
/mpu/type.py:
--------------------------------------------------------------------------------
 1 | """Helpers for type annotations."""
 2 | 
 3 | # Core Library
 4 | import typing
 5 | from typing import Any
 6 | 
 7 | # Third party
 8 | from typing_extensions import Protocol
 9 | 
10 | C = typing.TypeVar("C", bound="Comparable")
11 | 
12 | 
13 | class Comparable(Protocol):
14 |     """Type for a function which is comparable to other instances."""
15 | 
16 |     def __eq__(self, other: Any) -> bool:
17 |         """Check if the comparable is equal to other."""
18 | 
19 |     def __lt__(self: C, other: C) -> bool:
20 |         """Check if the comparable is less than other."""
21 | 
22 |     def __gt__(self: C, other: C) -> bool:
23 |         """Check if the comparable is greater than other."""
24 | 
25 |     def __le__(self: C, other: C) -> bool:
26 |         """Check if the comparable is less than or equal to other."""
27 | 
28 |     def __ge__(self: C, other: C) -> bool:
29 |         """Check if the comparable is greater than or equal to other."""
30 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """mpu: Martins Python Utilities."""
 2 | 
 3 | # Third party
 4 | from setuptools import setup
 5 | 
 6 | requires_datetime = ["pytz"]
 7 | requires_image = ["Pillow"]
 8 | requires_io = ["pytz", "tzlocal"]
 9 | requires_aws = ["boto3"]
10 | requires_tests = [
11 |     "pytest",
12 |     "pytest-cov",
13 |     "pytest-mccabe",
14 |     "pytest-flake8",
15 |     "simplejson",
16 | ]
17 | requires_all = (
18 |     ["pandas", "python-magic", "typing_extensions"]
19 |     + requires_datetime
20 |     + requires_image
21 |     + requires_io
22 |     + requires_aws
23 |     + requires_tests
24 | )
25 | 
26 | setup(
27 |     package_data={"mpu": ["units/currencies.csv", "data/*", "package/templates/*"]},
28 |     extras_require={
29 |         "all": requires_all,
30 |         "aws": requires_aws,
31 |         "datetime": requires_datetime,
32 |         "image": requires_image,
33 |         "io": requires_io,
34 |         "tests": requires_tests,
35 |     },
36 |     tests_require=requires_tests,
37 | )
38 | 


--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | # Python package
 2 | # Create and test a Python package on multiple Python versions.
 3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
 4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python
 5 | 
 6 | trigger:
 7 | - master
 8 | 
 9 | pool:
10 |   vmImage: 'VS2017-Win2016'
11 | strategy:
12 |   matrix:
13 |     Python37:
14 |       python.version: '3.7'
15 |     Python38:
16 |       python.version: '3.8'
17 | 
18 | steps:
19 | - task: UsePythonVersion@0
20 |   inputs:
21 |     versionSpec: '$(python.version)'
22 |   displayName: 'Use Python $(python.version)'
23 | 
24 | - script: |
25 |     python -m pip install --upgrade pip
26 |     pip install -r requirements/ci.txt
27 |   displayName: 'Install dependencies'
28 | 
29 | - script: |
30 |     pip install .[all]
31 |   displayName: 'Install package'
32 | 
33 | - script: |
34 |     pip install pytest pytest-azurepipelines
35 |     pytest -vv
36 |   displayName: 'pytest'
37 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | maint:
 2 | 	pip install -r requirements/dev.txt
 3 | 	pre-commit autoupdate && pre-commit run --all-files
 4 | 	pip-compile -U docs/requirements.in
 5 | 	pip-compile -U setup.py
 6 | 	pip-compile -U requirements/ci.in
 7 | 	pip-compile -U requirements/dev.in
 8 | 
 9 | upload:
10 | 	make clean
11 | 	python setup.py sdist bdist_wheel && twine upload -s dist/*
12 | 
13 | clean:
14 | 	python setup.py clean --all
15 | 	pyclean .
16 | 	rm -rf *.pyc build dist tests/reports docs/build .pytest_cache .tox .coverage html/
17 | 	rm -rf mpu.egg-info lambda.zip venv-lambda
18 | 	rm -rf __pycache__ mpu/datastructures/trie/__pycache__ mpu/__pycache__ mpu/units/__pycache__ tests/__pycache__
19 | 
20 | package:
21 | 	make clean
22 | 	./create_package.sh
23 | 
24 | mutation-test:
25 | 	mutmut run
26 | 
27 | mutmut-results:
28 | 	mutmut junitxml --suspicious-policy=ignore --untested-policy=ignore > mutmut-results.xml
29 | 	junit2html mutmut-results.xml mutmut-results.html
30 | 
31 | bandit:
32 | 	# Python3 only: B322 is save
33 | 	bandit -r mpu -s B322
34 | 


--------------------------------------------------------------------------------
/tests/test_string_trie.py:
--------------------------------------------------------------------------------
 1 | # First party
 2 | from mpu.datastructures.trie.string_trie import Trie
 3 | 
 4 | 
 5 | def test_trie_print():
 6 |     data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
 7 |     trie = Trie(data)
 8 |     trie_data = trie.print(print_stdout=True)
 9 |     trie_data = trie.print(print_stdout=False)
10 |     expected = """Trie
11 |  cat
12 |   tle
13 |  d
14 |   og
15 |  tom
16 |   atoe
17 |   cat"""
18 |     assert trie_data == expected
19 |     trie.print(print_stdout=True)
20 | 
21 | 
22 | def test_trie_creation_prefix_search():
23 |     data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
24 |     trie = Trie(data)
25 |     expected = {"tom", "tomcat", "tomatoe"}
26 |     prefix, subtrie = trie.get_subtrie("tom")
27 |     assert {prefix + element for element in subtrie} == expected
28 | 
29 | 
30 | def test_get_subtrie_direct_hit2():
31 |     trie = Trie(["foobar"])
32 |     assert [word for subtrie in trie.get_subtrie("foobar") for word in subtrie] == [
33 |         "foobar"
34 |     ]
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Martin Thoma
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/mpu/datastructures/trie/full_prefix_dict.py:
--------------------------------------------------------------------------------
 1 | # Core Library
 2 | from collections import defaultdict
 3 | 
 4 | # First party
 5 | from mpu.datastructures.trie.base import AbstractTrie
 6 | 
 7 | 
 8 | class FullPrefixDict(AbstractTrie):
 9 |     def __init__(self, container=None):
10 |         if container is None:
11 |             container = []
12 |         self._prefix2words = defaultdict(list)  # Prefix to list of words
13 |         self._len = 0
14 |         for element in container:
15 |             self.push(element)
16 | 
17 |     def __len__(self):
18 |         return self._len
19 | 
20 |     def __iter__(self):
21 |         yield from self._prefix2words[""]
22 | 
23 |     def push(self, element):
24 |         self._len += 1
25 |         for i in range(0, len(element) + 1):
26 |             prefix = element[:i]
27 |             self._prefix2words[prefix].append(element)
28 |             self._prefix2words[prefix] = sorted(self._prefix2words[prefix])
29 | 
30 |     def autocomplete(self, prefix):
31 |         return self._prefix2words[prefix]
32 | 
33 |     def __contains__(self, element):
34 |         return element in self._prefix2words[element]
35 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ master ]
 9 |   pull_request:
10 |     branches: [ master ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: [3.7, 3.8, 3.9]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         pip install -r requirements/ci.txt
30 |         pip install -r requirements/lint.txt
31 |         pip install .[all]
32 |     - name: Test with pytest
33 |       run: |
34 |         pytest
35 |     - name: Test with mypy
36 |       run: |
37 |         mypy . --exclude=build
38 | 


--------------------------------------------------------------------------------
/tests/test_mutmut_killers.py:
--------------------------------------------------------------------------------
 1 | # Third party
 2 | import pytest
 3 | 
 4 | # First party
 5 | from mpu.datastructures import Interval
 6 | from mpu.units import Money
 7 | 
 8 | 
 9 | def test_eq_exception_msg():
10 |     a = Money("0.1", "EUR")
11 |     with pytest.raises(ValueError) as excinfo:
12 |         a == 0.5
13 |     assert "XX" not in str(excinfo)
14 | 
15 | 
16 | def test_interval_exception_msg():
17 |     with pytest.raises(RuntimeError) as excinfo:
18 |         Interval(None, 3)
19 |     assert "XX" not in str(excinfo)
20 | 
21 | 
22 | def test_interval_left_bigger_right_exception_msg():
23 |     with pytest.raises(RuntimeError) as excinfo:
24 |         Interval(5, 3)
25 |     assert "XX" not in str(excinfo)
26 | 
27 | 
28 | def test_interval_invalid_issubset():
29 |     class Impossible:
30 |         def __init__(self):
31 |             self.left = -1
32 |             self.right = float("nan")
33 | 
34 |         def is_empty(self):
35 |             return False
36 | 
37 |     interval = Interval(0, 1)
38 |     other = Impossible()
39 |     with pytest.raises(RuntimeError) as excinfo:
40 |         interval.issubset(other)
41 |     assert "XX" not in str(excinfo)
42 | 


--------------------------------------------------------------------------------
/mpu/datastructures/trie/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A trie is a prefix-tree. It allows efficient search by prefixes.
 3 | 
 4 | There are three different prefix-trees implemented in mpu:
 5 | 
 6 | * FullPrefixTrie: Every prefix of every word is stored
 7 | * CharTrie: Every single character is a node
 8 | * StringTrie: Every node stores a substring of the word which is as long as
 9 |   possible.
10 | 
11 | |                               | FullPrefixTrie | CharTrie | StringTrie |
12 | | ----------------------------- | -------------- | -------- | ---------- |
13 | | Insert word with w characters | O(w)           | O(w)     | ?          |
14 | | Lookup word with w characters | O(1)           | O(w)     | ?          |
15 | 
16 | Typically, the FullPrefixTrie is the fastest solution and uses by far most
17 | memory.
18 | 
19 | See also
20 | --------
21 | * [Should a prefix tree (trie) node store only a single character or a
22 |   string?](https://cs.stackexchange.com/q/121937/2914)
23 | """
24 | 
25 | # First party
26 | from mpu.datastructures.trie.char_trie import Trie as CharTrie  # noqa
27 | from mpu.datastructures.trie.full_prefix_dict import FullPrefixDict  # noqa
28 | from mpu.datastructures.trie.string_trie import Trie as StringTrie  # noqa
29 | 
30 | Trie = FullPrefixDict
31 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | # Core Library
 2 | import os
 3 | from tempfile import mkstemp
 4 | 
 5 | # Third party
 6 | import pytest
 7 | 
 8 | 
 9 | def create_tempfile(suffix=None, prefix=None):
10 |     """Named"""
11 |     handle, pathname = mkstemp(suffix=suffix, prefix=prefix)
12 |     os.close(handle)
13 |     return pathname
14 | 
15 | 
16 | @pytest.fixture
17 | def json_tempfile():
18 |     pathname = create_tempfile(suffix=".json")
19 |     yield pathname
20 |     os.remove(pathname)
21 | 
22 | 
23 | @pytest.fixture
24 | def jsonl_tempfile():
25 |     pathname = create_tempfile(suffix=".jsonl")
26 |     yield pathname
27 |     os.remove(pathname)
28 | 
29 | 
30 | @pytest.fixture
31 | def jpg_tempfile():
32 |     pathname = create_tempfile(suffix=".jpg")
33 |     yield pathname
34 |     os.remove(pathname)
35 | 
36 | 
37 | @pytest.fixture
38 | def pickle_tempfile():
39 |     pathname = create_tempfile(suffix=".pickle")
40 |     yield pathname
41 |     os.remove(pathname)
42 | 
43 | 
44 | @pytest.fixture
45 | def csv_tempfile():
46 |     pathname = create_tempfile(suffix=".csv")
47 |     yield pathname
48 |     os.remove(pathname)
49 | 
50 | 
51 | @pytest.fixture
52 | def hdf5_tempfile():
53 |     pathname = create_tempfile(suffix=".hdf5")
54 |     yield pathname
55 |     os.remove(pathname)
56 | 


--------------------------------------------------------------------------------
/mpu/decorators.py:
--------------------------------------------------------------------------------
 1 | """Decorators which are not in `functools`."""
 2 | 
 3 | # Core Library
 4 | import functools
 5 | import warnings
 6 | from time import time
 7 | from typing import Callable, Dict, List
 8 | 
 9 | 
10 | def timing(func: Callable) -> Callable:
11 |     """Measure the execution time of a function call and print the result."""
12 | 
13 |     @functools.wraps(func)
14 |     def wrap(*args: List, **kw: Dict) -> Callable:
15 |         t0 = time()
16 |         result = func(*args, **kw)
17 |         t1 = time()
18 |         print(
19 |             f"func:{func.__name__!r} args:[{args!r}, {kw!r}] took: "
20 |             f"{t1 - t0:2.4f} sec"
21 |         )
22 |         return result
23 | 
24 |     return wrap
25 | 
26 | 
27 | def deprecated(func: Callable) -> Callable:
28 |     """
29 |     Mark functions as deprecated.
30 | 
31 |     It will result in a warning being emitted when the function is used.
32 |     """
33 | 
34 |     @functools.wraps(func)
35 |     def new_func(*args: List, **kwargs: Dict) -> Callable:
36 |         warnings.warn_explicit(
37 |             f"Call to deprecated function {func.__name__}.",
38 |             category=DeprecationWarning,
39 |             filename=func.__code__.co_filename,
40 |             lineno=func.__code__.co_firstlineno + 1,
41 |         )
42 |         return func(*args, **kwargs)
43 | 
44 |     return new_func
45 | 


--------------------------------------------------------------------------------
/mpu/path.py:
--------------------------------------------------------------------------------
 1 | """Functions for path manipulation and retrieval of files."""
 2 | 
 3 | # Core Library
 4 | import os
 5 | from typing import List
 6 | 
 7 | # Third party
 8 | import pkg_resources
 9 | 
10 | 
11 | def get_all_files(root: str, followlinks: bool = False) -> List:
12 |     """
13 |     Get all files within the given root directory.
14 | 
15 |     Note that this list is not ordered.
16 | 
17 |     Parameters
18 |     ----------
19 |     root : str
20 |         Path to a directory
21 |     followlinks : bool, optional (default: False)
22 | 
23 |     Returns
24 |     -------
25 |     filepaths : List
26 |         List of absolute paths to files
27 |     """
28 |     filepaths = []
29 |     for path, _, files in os.walk(root, followlinks=followlinks):
30 |         for name in files:
31 |             filepaths.append(os.path.abspath(os.path.join(path, name)))
32 |     return filepaths
33 | 
34 | 
35 | def get_from_package(package_name: str, path: str) -> str:
36 |     """
37 |     Get the absolute path to a file in a package.
38 | 
39 |     Parameters
40 |     ----------
41 |     package_name : str
42 |         e.g. 'mpu'
43 |     path : str
44 |         Path within a package
45 | 
46 |     Returns
47 |     -------
48 |     filepath : str
49 |     """
50 |     filepath = pkg_resources.resource_filename(package_name, path)
51 |     return os.path.abspath(filepath)
52 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. mpu documentation master file, created by
 2 |    sphinx-quickstart on Wed May  2 22:11:51 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to mpu's documentation!
 7 | ===============================
 8 | 
 9 | 
10 | This package contains various small functions and classes. All of the
11 | functionality is not offered by any mayor package.
12 | 
13 | Core design principles are:
14 | 
15 | * **Lightweight**: mpu does not bring unexpected dependencies. You have
16 |   fine-grained control via extras.
17 | * **Documentation**: Every parameter is properly documented. For each opened
18 |   issue or question I will think about adding the information to the docs
19 | * **Testing**: >90% test coverage. For each issue found I will think about
20 |   creating a test which could have shown the issue.
21 | 
22 | Please note that this is not in version 1.0 yet. So there will likely be
23 | breaking changes.
24 | 
25 | 
26 | Contents:
27 | 
28 | .. toctree::
29 |    :maxdepth: 2
30 | 
31 |    mpu
32 |    aws
33 |    datastructures
34 |    datetime
35 |    decorators
36 |    geometry
37 |    image
38 |    io
39 |    math
40 |    ml
41 |    path
42 |    pd
43 |    shell
44 |    string
45 |    type
46 |    units
47 | 
48 | 
49 | 
50 | Indices and tables
51 | ------------------
52 | 
53 | * :ref:`modindex`
54 | * :ref:`search`
55 | 


--------------------------------------------------------------------------------
/tests/test_char_trie.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Test the mpu.datastructures.char_trie module."""
 4 | 
 5 | # Third party
 6 | import pytest
 7 | 
 8 | # First party
 9 | from mpu.datastructures.trie.char_trie import EMPTY_NODE, Trie, TrieNode
10 | 
11 | 
12 | def test_trie_print():
13 |     data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
14 |     trie = Trie(data)
15 |     trie_data = trie.print(print_stdout=False)
16 |     expected = """Trie
17 | 
18 |  c
19 |   a
20 |    t
21 |     t
22 |      l
23 |       e
24 |  d
25 |   o
26 |    g
27 |  t
28 |   o
29 |    m
30 |     a
31 |      t
32 |       o
33 |        e
34 |     c
35 |      a
36 |       t"""
37 |     assert trie_data == expected
38 |     trie.print(print_stdout=True)
39 | 
40 | 
41 | def test_create_trie_node_with_children():
42 |     TrieNode("b", children={"a": TrieNode("a")})
43 | 
44 | 
45 | def test_trie_node_push():
46 |     node = TrieNode(value="a")
47 |     with pytest.raises(ValueError) as exinfo:
48 |         node.push("")
49 |     assert str(exinfo.value) == "The pushed value should not be empty"
50 | 
51 | 
52 | def test_get_subtrie_from_empty():
53 |     node = Trie()
54 |     prefix, node = node.get_subtrie("")
55 |     assert prefix == ""
56 |     assert node._value == EMPTY_NODE._value
57 |     assert node.is_word == EMPTY_NODE.is_word
58 |     assert node.count == EMPTY_NODE.count
59 |     assert node.children == EMPTY_NODE.children
60 | 


--------------------------------------------------------------------------------
/docs/source/units.rst:
--------------------------------------------------------------------------------
 1 | mpu.units
 2 | =========
 3 | 
 4 | Module contents
 5 | ---------------
 6 | 
 7 | .. automodule:: mpu.units
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 | 
12 | Allowed operations with Money
13 | -----------------------------
14 | Here you can see which operations are allowed by two Money objects of
15 | currencies (A and B):
16 | 
17 | +---------+----------------------+----------+---------+---------------+
18 | | Money A | Operator             | Money A  | Money B | int, Fraction |
19 | +=========+======================+==========+=========+===============+
20 | |         | `+` , `-`            | Money A  | N/A     | N/A           |
21 | +---------+----------------------+----------+---------+---------------+
22 | |         | `*`                  | N/A      | N/A     | Money A       |
23 | +---------+----------------------+----------+---------+---------------+
24 | |         | `/`                  | N/A      | N/A     | N/A           |
25 | +---------+----------------------+----------+---------+---------------+
26 | |         | `//`                 | Fraction | N/A     | Money A       |
27 | +---------+----------------------+----------+---------+---------------+
28 | |         | `>`, `>=`, `<`, `<=` | Bool     | N/A     | N/A           |
29 | +---------+----------------------+----------+---------+---------------+
30 | |         | ==                   | Bool     | False   | False         |
31 | +---------+----------------------+----------+---------+---------------+
32 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # pre-commit run --all-files
 2 | repos:
 3 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v4.1.0
 5 |     hooks:
 6 |     -   id: check-ast
 7 |     -   id: check-byte-order-marker
 8 |     -   id: check-case-conflict
 9 |     -   id: check-docstring-first
10 |     -   id: check-executables-have-shebangs
11 |     -   id: check-json
12 |     -   id: check-yaml
13 |     -   id: debug-statements
14 |     -   id: detect-private-key
15 |     -   id: end-of-file-fixer
16 |     -   id: trailing-whitespace
17 |     -   id: mixed-line-ending
18 | -   repo: https://github.com/MartinThoma/check-pip-compile
19 |     rev: 0.1.0
20 |     hooks:
21 |     -   id: check-pip-compile
22 |         args: ['requirements/ci.in', 'requirements/lint.in']
23 | -   repo: https://github.com/pre-commit/mirrors-mypy
24 |     rev: v0.931
25 |     hooks:
26 |     -   id: mypy
27 |         args: [--ignore-missing-imports]
28 |         additional_dependencies: [lxml, types-simplejson, types-pytz, types-tzlocal, types-setuptools]
29 | -   repo: https://github.com/asottile/seed-isort-config
30 |     rev: v2.2.0
31 |     hooks:
32 |     -   id: seed-isort-config
33 | -   repo: https://github.com/pre-commit/mirrors-isort
34 |     rev: v5.10.1
35 |     hooks:
36 |     -   id: isort
37 | -   repo: https://github.com/psf/black
38 |     rev: 22.1.0
39 |     hooks:
40 |     -   id: black
41 | -   repo: https://github.com/asottile/pyupgrade
42 |     rev: v2.31.0
43 |     hooks:
44 |     -   id: pyupgrade
45 |         args: [--py37-plus]
46 | -   repo: https://github.com/asottile/blacken-docs
47 |     rev: v1.12.1
48 |     hooks:
49 |     -   id: blacken-docs
50 |         additional_dependencies: [black==20.8b1]
51 | 


--------------------------------------------------------------------------------
/mpu/ml.py:
--------------------------------------------------------------------------------
 1 | """Machine Learning functions."""
 2 | 
 3 | # Core Library
 4 | from typing import Iterable, List
 5 | 
 6 | # First party
 7 | from mpu.math import argmax
 8 | 
 9 | 
10 | def indices2one_hot(indices: Iterable, nb_classes: int) -> List:
11 |     """
12 |     Convert an iterable of indices to one-hot encoded list.
13 | 
14 |     You might also be interested in sklearn.preprocessing.OneHotEncoder
15 | 
16 |     Parameters
17 |     ----------
18 |     indices : Iterable
19 |         iterable of indices
20 |     nb_classes : int
21 |         Number of classes
22 | 
23 |     Returns
24 |     -------
25 |     one_hot : List
26 | 
27 |     Examples
28 |     --------
29 |     >>> indices2one_hot([0, 1, 1], 3)
30 |     [[1, 0, 0], [0, 1, 0], [0, 1, 0]]
31 |     >>> indices2one_hot([0, 1, 1], 2)
32 |     [[1, 0], [0, 1], [0, 1]]
33 |     """
34 |     if nb_classes < 1:
35 |         raise ValueError(f"nb_classes={nb_classes}, but positive number expected")
36 | 
37 |     one_hot = []
38 |     for index in indices:
39 |         one_hot.append([0] * nb_classes)
40 |         one_hot[-1][index] = 1
41 |     return one_hot
42 | 
43 | 
44 | def one_hot2indices(one_hots: List) -> List:
45 |     """
46 |     Convert an iterable of one-hot encoded targets to a list of indices.
47 | 
48 |     Parameters
49 |     ----------
50 |     one_hots : List
51 | 
52 |     Returns
53 |     -------
54 |     indices : List
55 | 
56 |     Examples
57 |     --------
58 |     >>> one_hot2indices([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
59 |     [0, 1, 2]
60 | 
61 |     >>> one_hot2indices([[1, 0], [1, 0], [0, 1]])
62 |     [0, 0, 1]
63 |     """
64 |     indices = []
65 |     for one_hot in one_hots:
66 |         indices.append(argmax(one_hot))
67 |     return indices
68 | 


--------------------------------------------------------------------------------
/tests/test_datetime.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | 
 4 | # Core Library
 5 | from datetime import datetime
 6 | 
 7 | # Third party
 8 | import pytest
 9 | import pytz
10 | 
11 | # First party
12 | import mpu.datetime
13 | 
14 | 
15 | def test_add_hour():
16 |     tz = pytz.timezone("Europe/Berlin")
17 |     out = mpu.datetime.add_time(
18 |         datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz), hours=1
19 |     ).isoformat()
20 |     assert out == "1918-04-15T03:00:00+02:00"
21 | 
22 | 
23 | def test_add_day():
24 |     tz = pytz.timezone("Europe/Berlin")
25 |     out = mpu.datetime.add_time(
26 |         datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz),
27 |         days=1,
28 |     ).isoformat()
29 |     assert out == "1918-04-16T02:00:00+02:00"
30 | 
31 | 
32 | def test_add_time_neutral():
33 |     """Call add_time without any specified time to add."""
34 |     tz = pytz.timezone("Europe/Berlin")
35 |     out = mpu.datetime.add_time(
36 |         datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz)
37 |     ).isoformat()
38 |     assert out == "1918-04-15T01:00:00+01:00"
39 | 
40 | 
41 | def test_add_time_all():
42 |     """Call add_time without any specified time to add."""
43 |     tz = pytz.timezone("Europe/Berlin")
44 |     out = mpu.datetime.add_time(
45 |         datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz),
46 |         seconds=1,
47 |         minutes=2,
48 |         hours=3,
49 |     ).isoformat()
50 |     assert out == "1918-04-15T05:02:01+02:00"
51 | 
52 | 
53 | def test_generate_fail():
54 |     with pytest.raises(ValueError):
55 |         mpu.datetime.generate(datetime(2018, 1, 1), datetime(2018, 1, 1))
56 | 
57 | 
58 | def test_generate():
59 |     start = datetime(2018, 1, 1)
60 |     end = datetime(2018, 2, 1)
61 |     generated = mpu.datetime.generate(start, end)
62 |     assert start <= generated <= end
63 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with python 3.7
 3 | # To update, run:
 4 | #
 5 | #    pip-compile docs/requirements.in
 6 | #
 7 | alabaster==0.7.12
 8 |     # via sphinx
 9 | babel==2.9.1
10 |     # via sphinx
11 | boto3==1.20.47
12 |     # via -r requirements.in
13 | botocore==1.23.47
14 |     # via
15 |     #   boto3
16 |     #   s3transfer
17 | certifi==2021.10.8
18 |     # via requests
19 | charset-normalizer==2.0.11
20 |     # via requests
21 | docutils==0.17.1
22 |     # via
23 |     #   sphinx
24 |     #   sphinx-rtd-theme
25 | idna==3.3
26 |     # via requests
27 | imagesize==1.3.0
28 |     # via sphinx
29 | jinja2==3.0.3
30 |     # via sphinx
31 | jmespath==0.10.0
32 |     # via
33 |     #   boto3
34 |     #   botocore
35 | markupsafe==2.0.1
36 |     # via jinja2
37 | numpy==1.22.0
38 |     # via pandas
39 | packaging==21.3
40 |     # via sphinx
41 | pandas==1.3.5
42 |     # via -r requirements.in
43 | pygments==2.11.2
44 |     # via sphinx
45 | pyparsing==3.0.7
46 |     # via packaging
47 | python-dateutil==2.8.2
48 |     # via
49 |     #   botocore
50 |     #   pandas
51 | pytz==2021.3
52 |     # via
53 |     #   babel
54 |     #   pandas
55 | requests==2.27.1
56 |     # via sphinx
57 | s3transfer==0.5.1
58 |     # via boto3
59 | six==1.16.0
60 |     # via python-dateutil
61 | snowballstemmer==2.2.0
62 |     # via sphinx
63 | sphinx==4.4.0
64 |     # via
65 |     #   -r requirements.in
66 |     #   sphinx-rtd-theme
67 | sphinx-rtd-theme==1.0.0
68 |     # via -r requirements.in
69 | sphinxcontrib-applehelp==1.0.2
70 |     # via sphinx
71 | sphinxcontrib-devhelp==1.0.2
72 |     # via sphinx
73 | sphinxcontrib-htmlhelp==2.0.0
74 |     # via sphinx
75 | sphinxcontrib-jsmath==1.0.1
76 |     # via sphinx
77 | sphinxcontrib-qthelp==1.0.3
78 |     # via sphinx
79 | sphinxcontrib-serializinghtml==1.1.5
80 |     # via sphinx
81 | typing-extensions==4.0.1
82 |     # via -r requirements.in
83 | urllib3==1.26.8
84 |     # via
85 |     #   botocore
86 |     #   requests
87 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | mypy-html/
103 | 
104 | # pytest
105 | .pytest_cache/
106 | 
107 | tests/reports/
108 | 
109 | lambda.zip
110 | venv-lambda/
111 | report/
112 | 
113 | # https://pypi.org/project/mutmut/
114 | .mutmut-cache
115 | html/
116 | report.html
117 | assets/
118 | 
119 | .vscode/
120 | *.code-workspace
121 | 


--------------------------------------------------------------------------------
/mpu/datetime.py:
--------------------------------------------------------------------------------
 1 | """Datetime related utility functions."""
 2 | 
 3 | # Core Library
 4 | import datetime as dt
 5 | import random
 6 | 
 7 | # Third party
 8 | import pytz
 9 | 
10 | local_random = random.Random()
11 | 
12 | 
13 | def add_time(datetime_obj, days=0, hours=0, minutes=0, seconds=0):
14 |     """
15 |     Add time to a timezone-aware datetime object.
16 | 
17 |     This keeps the timezone correct, even if it changes due to daylight
18 |     saving time (DST).
19 | 
20 |     Parameters
21 |     ----------
22 |     datetime_obj : datetime.datetime
23 |     days : int
24 |     hours : int
25 |     minutes : int
26 |     seconds : int
27 | 
28 |     Returns
29 |     -------
30 |     datetime : datetime.datetime
31 |     """
32 |     seconds += minutes * 60
33 |     seconds += hours * 60**2
34 |     seconds += days * 24 * 60**2
35 |     t14 = datetime_obj + dt.timedelta(seconds=seconds)  # Invalid timezone!
36 |     t14 = t14.astimezone(pytz.utc).astimezone(t14.tzinfo)  # Fix the timezone
37 |     return t14
38 | 
39 | 
40 | def generate(minimum, maximum, local_random=local_random):
41 |     """
42 |     Generate a random date.
43 | 
44 |     The generated dates are uniformly distributed.
45 | 
46 |     Parameters
47 |     ----------
48 |     minimum : datetime object
49 |     maximum : datetime object
50 |     local_random : random.Random
51 | 
52 |     Returns
53 |     -------
54 |     generated_date : datetime object
55 | 
56 |     Examples
57 |     --------
58 |     >>> import random; r = random.Random(); r.seed(0)
59 |     >>> from datetime import datetime
60 | 
61 |     >>> generate(datetime(2018, 1, 1), datetime(2018, 1, 2), local_random=r)
62 |     datetime.datetime(2018, 1, 1, 20, 15, 58, 47972)
63 | 
64 |     >>> generate(datetime(2018, 1, 1), datetime(2018, 1, 2), local_random=r)
65 |     datetime.datetime(2018, 1, 1, 18, 11, 27, 260414)
66 |     """
67 |     if not (minimum < maximum):
68 |         raise ValueError(f"{minimum} is not smaller than {maximum}")
69 | 
70 |     time_d = maximum - minimum
71 |     time_d_rand = time_d * local_random.random()
72 |     generated = minimum + time_d_rand
73 |     return generated
74 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | # https://setuptools.readthedocs.io/en/latest/setuptools.html#configuring-setup-using-setup-cfg-files
 3 | name = mpu
 4 | 
 5 | author = Martin Thoma
 6 | author_email = info@martin-thoma.de
 7 | maintainer = Martin Thoma
 8 | maintainer_email = info@martin-thoma.de
 9 | 
10 | # keep in sync with mpu/_version.py
11 | version = 0.23.1
12 | 
13 | description = Martins Python Utilities
14 | long_description = file: README.md
15 | long_description_content_type = text/markdown
16 | keywords = utility,
17 | 
18 | platforms = Linux
19 | 
20 | url = https://github.com/MartinThoma/mpu
21 | download_url = https://github.com/MartinThoma/mpu
22 | 
23 | license = MIT
24 | 
25 | # https://pypi.org/pypi?%3Aaction=list_classifiers
26 | classifiers =
27 |     Development Status :: 3 - Alpha
28 |     Environment :: Console
29 |     Intended Audience :: Developers
30 |     Intended Audience :: Information Technology
31 |     License :: OSI Approved :: MIT License
32 |     Natural Language :: English
33 |     Operating System :: OS Independent
34 |     Programming Language :: Python :: 3
35 |     Programming Language :: Python :: 3 :: Only
36 |     Programming Language :: Python :: 3.7
37 |     Programming Language :: Python :: 3.8
38 |     Programming Language :: Python :: 3.9
39 |     Topic :: Software Development :: Libraries :: Python Modules
40 |     Topic :: Software Development
41 |     Topic :: Utilities
42 | 
43 | [options]
44 | packages = find:
45 | python_requires = >=3.7
46 | 
47 | [tool:pytest]
48 | addopts = --doctest-modules --cov=./mpu --cov-report html:tests/reports/coverage-html --cov-report term-missing --ignore=docs/ --durations=3 --timeout=30
49 | doctest_encoding = utf-8
50 | 
51 | [pydocstyle]
52 | match_dir = mpu
53 | ignore = D105, D413, D107, D416, D212, D203, D417
54 | 
55 | [flake8]
56 | max-complexity=10
57 | max_line_length = 88
58 | exclude = tests/*,.tox/*,.nox/*,docs/*
59 | ignore = H301,H306,H404,H405,W503,D105,D413,D103,D107,E252,N803,E203,C416,A001,A003,P102,SIM106
60 | 
61 | [mutmut]
62 | backup = False
63 | runner = ./mutmut-test.sh
64 | tests_dir = tests/
65 | 
66 | [mypy]
67 | ignore_missing_imports = True
68 | 


--------------------------------------------------------------------------------
/requirements/lint.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile
 3 | # To update, run:
 4 | #
 5 | #    pip-compile requirements/lint.in
 6 | #
 7 | appdirs==1.4.4
 8 |     # via black
 9 | astor==0.8.1
10 |     # via flake8-simplify
11 | attrs==20.3.0
12 |     # via
13 |     #   flake8-bugbear
14 |     #   flake8-eradicate
15 |     #   flake8-implicit-str-concat
16 | black==20.8b1
17 |     # via -r requirements/lint.in
18 | click==7.1.2
19 |     # via black
20 | eradicate==2.0.0
21 |     # via flake8-eradicate
22 | flake8-assert-msg==1.1.1
23 |     # via -r requirements/lint.in
24 | flake8-bugbear==21.3.2
25 |     # via -r requirements/lint.in
26 | flake8-builtins==1.5.3
27 |     # via -r requirements/lint.in
28 | flake8-comprehensions==3.4.0
29 |     # via -r requirements/lint.in
30 | flake8-eradicate==1.0.0
31 |     # via -r requirements/lint.in
32 | flake8-executable==2.1.1
33 |     # via -r requirements/lint.in
34 | flake8-implicit-str-concat==0.2.0
35 |     # via -r requirements/lint.in
36 | flake8-isort==4.0.0
37 |     # via -r requirements/lint.in
38 | flake8-plugin-utils==1.3.1
39 |     # via flake8-pytest-style
40 | flake8-pytest-style==1.4.0
41 |     # via -r requirements/lint.in
42 | flake8-raise==0.0.5
43 |     # via -r requirements/lint.in
44 | flake8-simplify==0.14.0
45 |     # via -r requirements/lint.in
46 | flake8-string-format==0.3.0
47 |     # via -r requirements/lint.in
48 | flake8==3.9.0
49 |     # via
50 |     #   -r requirements/lint.in
51 |     #   flake8-assert-msg
52 |     #   flake8-bugbear
53 |     #   flake8-builtins
54 |     #   flake8-comprehensions
55 |     #   flake8-eradicate
56 |     #   flake8-executable
57 |     #   flake8-isort
58 |     #   flake8-raise
59 |     #   flake8-simplify
60 |     #   flake8-string-format
61 | isort==5.8.0
62 |     # via flake8-isort
63 | mccabe==0.6.1
64 |     # via
65 |     #   -r requirements/lint.in
66 |     #   flake8
67 | more-itertools==8.7.0
68 |     # via flake8-implicit-str-concat
69 | mypy-extensions==0.4.3
70 |     # via
71 |     #   black
72 |     #   mypy
73 | mypy==0.812
74 |     # via -r requirements/lint.in
75 | pathspec==0.8.1
76 |     # via black
77 | pycodestyle==2.7.0
78 |     # via flake8
79 | pydocstyle==6.0.0
80 |     # via -r requirements/lint.in
81 | pyflakes==2.3.0
82 |     # via flake8
83 | regex==2021.3.17
84 |     # via black
85 | snowballstemmer==2.1.0
86 |     # via pydocstyle
87 | testfixtures==6.17.1
88 |     # via flake8-isort
89 | toml==0.10.2
90 |     # via black
91 | typed-ast==1.4.2
92 |     # via
93 |     #   black
94 |     #   mypy
95 | typing-extensions==3.7.4.3
96 |     # via
97 |     #   black
98 |     #   mypy
99 | 


--------------------------------------------------------------------------------
/tests/test_aws.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Test mpu.aws module."""
 4 | 
 5 | # Core Library
 6 | import filecmp
 7 | import os
 8 | from tempfile import mkstemp
 9 | 
10 | # Third party
11 | import boto3
12 | import pkg_resources
13 | import pytest
14 | from moto import mock_s3
15 | 
16 | # First party
17 | # internal modules
18 | import mpu.aws
19 | from mpu.aws import ExistsStrategy
20 | 
21 | 
22 | @pytest.mark.xfail
23 | @mock_s3
24 | def test_list_no_files():
25 |     """Test if listing files of an S3 bucket works."""
26 |     # We need to create the bucket since this is all in Moto's 'virtual'
27 |     # AWS account
28 |     conn = boto3.resource("s3", region_name="us-east-1")
29 |     conn.create_bucket(Bucket="mybucket")
30 |     assert mpu.aws.list_files("mybucket") == []
31 | 
32 |     # Test upload
33 |     path = "files/example.csv"
34 |     local_path = pkg_resources.resource_filename(__name__, path)
35 |     mpu.aws.s3_upload(local_path, "s3://mybucket/example_test.csv")
36 |     assert mpu.aws.list_files("mybucket") == ["s3://mybucket/example_test.csv"]
37 | 
38 |     # Test download
39 |     _, destination = mkstemp(suffix="example.csv")
40 |     os.remove(destination)  # make sure this file does NOT exist
41 |     mpu.aws.s3_download("s3://mybucket/example_test.csv", destination)
42 |     assert filecmp.cmp(destination, local_path)
43 |     os.remove(destination)  # cleanup of mkstemp
44 | 
45 |     # Test download without destination
46 |     destination = mpu.aws.s3_download("s3://mybucket/example_test.csv")
47 |     os.remove(destination)
48 | 
49 |     # Test download: File exists
50 |     _, destination = mkstemp(suffix="example.csv")
51 |     with pytest.raises(RuntimeError):
52 |         mpu.aws.s3_download(
53 |             "s3://mybucket/example_test.csv",
54 |             destination,
55 |             exists_strategy=mpu.aws.ExistsStrategy.RAISE,
56 |         )
57 |     with pytest.raises(ValueError):
58 |         mpu.aws.s3_download(
59 |             "s3://mybucket/example_test.csv",
60 |             destination,
61 |             exists_strategy=ExistsStrategy.RAISE,
62 |         )
63 |     mpu.aws.s3_download(
64 |         "s3://mybucket/example_test.csv",
65 |         destination,
66 |         exists_strategy=mpu.aws.ExistsStrategy.ABORT,
67 |     )
68 |     mpu.aws.s3_download(
69 |         "s3://mybucket/example_test.csv",
70 |         destination,
71 |         exists_strategy=mpu.aws.ExistsStrategy.REPLACE,
72 |     )
73 | 
74 |     mpu.aws.s3_read("s3://mybucket/example_test.csv")
75 |     os.remove(destination)  # cleanup of mkstemp
76 | 
77 | 
78 | def test_s3_path_split():
79 |     with pytest.raises(ValueError) as exinfo:
80 |         mpu.aws._s3_path_split("foo/bar")
81 |     assert (
82 |         str(exinfo.value)
83 |         == "s3_path is expected to start with 's3://', but was foo/bar"
84 |     )
85 | 


--------------------------------------------------------------------------------
/tests/test_string.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Core Library
 4 | import re
 5 | 
 6 | # Third party
 7 | import hypothesis.strategies as st
 8 | import pytest
 9 | from hypothesis import given
10 | 
11 | # First party
12 | import mpu.string
13 | 
14 | 
15 | def test_str2bool_no_mapping():
16 |     with pytest.raises(ValueError):
17 |         mpu.string.str2bool("foobar")
18 | 
19 | 
20 | @pytest.mark.parametrize("illegal_default", ["foobar", True])
21 | def test_str2bool_illegal_default(illegal_default):
22 |     with pytest.raises(ValueError):
23 |         mpu.string.str2bool("yes", default=illegal_default)
24 | 
25 | 
26 | @pytest.mark.parametrize("illegal_default", ["foobar", True])
27 | def test_str2bool_or_none_illegal_default(illegal_default):
28 |     with pytest.raises(ValueError):
29 |         mpu.string.str2bool_or_none("yes", default=illegal_default)
30 | 
31 | 
32 | def test_is_iban_not():
33 |     assert mpu.string.is_iban("DE12") is False
34 |     assert mpu.string.is_iban("") is False
35 |     assert mpu.string.is_iban("ZZaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") is False
36 |     assert mpu.string.is_iban("DEaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") is False
37 | 
38 | 
39 | def test_is_iban():
40 |     iban = "FR14 2004 1010 0505 0001 3M02 606"
41 |     assert mpu.string.is_iban(iban)
42 | 
43 | 
44 | @pytest.mark.parametrize("illegal_default", ["foobar", True])
45 | def test_is_none_illegal_default(illegal_default):
46 |     with pytest.raises(ValueError):
47 |         mpu.string.is_none("none", default=illegal_default)
48 | 
49 | 
50 | def test_is_none_not():
51 |     with pytest.raises(ValueError):
52 |         mpu.string.is_none("foobar")
53 | 
54 | 
55 | @given(st.emails())
56 | def test_is_email(email):
57 |     assert mpu.string.is_email(email), f"is_email({email}) returned False"
58 | 
59 | 
60 | @given(st.ip_addresses(v=4))
61 | def test_is_ipv4(ip):
62 |     assert mpu.string.is_ipv4(str(ip)), f"is_ipv4({ip}) returned False"
63 | 
64 | 
65 | @pytest.mark.parametrize(
66 |     "valid_mail",
67 |     [
68 |         "noreply@example.com",
69 |         "noreply@example.de",
70 |         "noreply+foo@gmail.com",
71 |         "jon.smith@notice.tuya.co",
72 |         "NoRePlY@ExAmPlE.cOm",
73 |         "noreply@example.de",
74 |         "noreply+foo@gmail.com",
75 |         "jon.smith@notice.tuya.co",
76 |     ],
77 | )
78 | def test_email_pattern_positive(valid_mail):
79 |     """Check if valid emails are recognized as being valid."""
80 |     email_pattern = re.compile(mpu.string.email_regex)
81 |     assert email_pattern.match(valid_mail)
82 | 
83 | 
84 | @pytest.mark.parametrize(
85 |     "invalid_mail",
86 |     [
87 |         "noreply.@example.com",
88 |         "@example.de",
89 |     ],
90 | )
91 | def test_email_pattern_negative(invalid_mail):
92 |     """Check if invalid emails are recognized as being invalid."""
93 |     email_pattern = re.compile(mpu.string.email_regex)
94 |     assert email_pattern.match(invalid_mail) is None
95 | 


--------------------------------------------------------------------------------
/mpu/shell.py:
--------------------------------------------------------------------------------
  1 | """Enhancing printed terminal output."""
  2 | 
  3 | # Core Library
  4 | from typing import List
  5 | 
  6 | 
  7 | class Codes:
  8 |     """Escape sequences for enhanced shell output."""
  9 | 
 10 |     RESET_ALL = "\033[0m"
 11 | 
 12 |     BOLD = "\033[1m"
 13 |     DIM = "\033[2m"
 14 |     UNDERLINED = "\033[4m"
 15 |     BLINK = "\033[5m"
 16 |     REVERSE = "\033[7m"
 17 |     HIDDEN = "\033[8m"
 18 | 
 19 |     RESET_BOLD = "\033[21m"
 20 |     RESET_DIM = "\033[22m"
 21 |     RESET_UNDERLINED = "\033[24m"
 22 |     RESET_BLINK = "\033[25m"
 23 |     RESET_REVERSE = "\033[27m"
 24 |     RESET_HIDDEN = "\033[28m"
 25 | 
 26 |     DEFAULT = "\033[39m"
 27 |     BLACK = "\033[30m"
 28 |     RED = "\033[31m"
 29 |     GREEN = "\033[32m"
 30 |     YELLOW = "\033[33m"
 31 |     BLUE = "\033[34m"
 32 |     MAGENTA = "\033[35m"
 33 |     CYAN = "\033[36m"
 34 |     LIGHT_GRAY = "\033[37m"
 35 |     DARK_GRAY = "\033[90m"
 36 |     LIGHT_RED = "\033[91m"
 37 |     LIGHT_GREEN = "\033[92m"
 38 |     LIGHT_YELLOW = "\033[93m"
 39 |     LIGHT_BLUE = "\033[94m"
 40 |     LIGHT_MAGENTA = "\033[95m"
 41 |     LIGHT_CYAN = "\033[96m"
 42 |     WHITE = "\033[97m"
 43 | 
 44 |     BACKGROUND_DEFAULT = "\033[49m"
 45 |     BACKGROUND_BLACK = "\033[40m"
 46 |     BACKGROUND_RED = "\033[41m"
 47 |     BACKGROUND_GREEN = "\033[42m"
 48 |     BACKGROUND_YELLOW = "\033[43m"
 49 |     BACKGROUND_BLUE = "\033[44m"
 50 |     BACKGROUND_MAGENTA = "\033[45m"
 51 |     BACKGROUND_CYAN = "\033[46m"
 52 |     BACKGROUND_LIGHT_GRAY = "\033[47m"
 53 |     BACKGROUND_DARK_GRAY = "\033[100m"
 54 |     BACKGROUND_LIGHT_RED = "\033[101m"
 55 |     BACKGROUND_LIGHT_GREEN = "\033[102m"
 56 |     BACKGROUND_LIGHT_YELLOW = "\033[103m"
 57 |     BACKGROUND_LIGHT_BLUE = "\033[104m"
 58 |     BACKGROUND_LIGHT_MAGENTA = "\033[105m"
 59 |     BACKGROUND_LIGHT_CYAN = "\033[106m"
 60 |     BACKGROUND_WHITE = "\033[107m"
 61 | 
 62 | 
 63 | def print_table(table: List) -> None:
 64 |     """
 65 |     Print as a table.
 66 | 
 67 |     I recommend looking at [`tabulate`](https://pypi.org/project/tabulate/).
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     table : List
 72 | 
 73 |     Examples
 74 |     --------
 75 |     >>> print_table([[1, 2, 3], [41, 0, 1]])
 76 |      1  2  3
 77 |     41  0  1
 78 |     """
 79 |     table = [[str(cell) for cell in row] for row in table]
 80 |     column_widths = [len(cell) for cell in table[0]]
 81 |     for row in table:
 82 |         for x, cell in enumerate(row):
 83 |             column_widths[x] = max(column_widths[x], len(cell))
 84 | 
 85 |     formatters = []
 86 |     for width in column_widths:
 87 |         formatters.append("{:>" + str(width) + "}")
 88 |     formatter = "  ".join(formatters)
 89 |     for row in table:
 90 |         print(formatter.format(*row))
 91 | 
 92 | 
 93 | def text_input(text: str) -> str:
 94 |     """
 95 |     Ask the user for textual input.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     text : str
100 |         What the user sees.
101 | 
102 |     Returns
103 |     -------
104 |     entered_text : str
105 |         What the user wrote.
106 |     """
107 |     return input(text)
108 | 


--------------------------------------------------------------------------------
/tests/test_trie.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Test the mpu.datastructures.trie module."""
  4 | 
  5 | # Third party
  6 | import pytest
  7 | 
  8 | # First party
  9 | from mpu.datastructures.trie import Trie as DefaultTrie
 10 | from mpu.datastructures.trie.base import AbstractTrie
 11 | from mpu.datastructures.trie.char_trie import Trie as CharTrie
 12 | from mpu.datastructures.trie.full_prefix_dict import FullPrefixDict
 13 | from mpu.datastructures.trie.string_trie import Trie as StringTrie
 14 | 
 15 | all_tries = [CharTrie, StringTrie, FullPrefixDict, DefaultTrie]
 16 | 
 17 | 
 18 | def test_abstract_trie():
 19 |     with pytest.raises(TypeError) as exinfo:
 20 |         trie = AbstractTrie()
 21 |     msg = (
 22 |         "Can't instantiate abstract class AbstractTrie with abstract "
 23 |         "methods __contains__, __iter__, __len__, autocomplete"
 24 |     )
 25 |     assert str(exinfo.value) == msg
 26 | 
 27 | 
 28 | @pytest.mark.parametrize("Trie", all_tries)
 29 | def test_trie_creation(Trie):
 30 |     data = ["dog", "cat", "cattle", "tom", "dinosaur", "tomcat", "tomatoe"]
 31 |     trie = Trie(data)
 32 |     assert {element for element in trie} == set(data)
 33 | 
 34 | 
 35 | @pytest.mark.parametrize("Trie", all_tries)
 36 | def test_trie_add_same(Trie):
 37 |     trie = Trie(["dog", "cat", "dog"])
 38 |     assert sorted(word for word in trie) == ["cat", "dog", "dog"]
 39 | 
 40 | 
 41 | @pytest.mark.parametrize("Trie", all_tries)
 42 | def test_empty_trie_iter_empty(Trie):
 43 |     trie = Trie()
 44 |     assert [word for word in trie] == []
 45 | 
 46 | 
 47 | @pytest.mark.parametrize("Trie", all_tries)
 48 | def test_contains(Trie):
 49 |     words = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
 50 |     trie = Trie(words)
 51 |     for word in words:
 52 |         assert word in trie
 53 | 
 54 |     words = [
 55 |         "creeker",
 56 |         "creekfish",
 57 |         "creekfishes",
 58 |         "Creeks",
 59 |         "creekside",
 60 |         "creekstuff",
 61 |         "creeky",
 62 |     ]
 63 |     trie = Trie(words)
 64 |     for word in words:
 65 |         assert word in trie
 66 | 
 67 | 
 68 | @pytest.mark.parametrize("Trie", all_tries)
 69 | def test_len_initialization(Trie):
 70 |     words = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
 71 |     trie = Trie(words)
 72 |     assert len(trie) == len(words)
 73 | 
 74 | 
 75 | @pytest.mark.parametrize("Trie", all_tries)
 76 | def test_len_push(Trie):
 77 |     words = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
 78 |     trie = Trie()
 79 |     for word in words:
 80 |         trie.push(word)
 81 |     assert len(trie) == len(words)
 82 | 
 83 | 
 84 | @pytest.mark.parametrize("Trie", all_tries)
 85 | def test_autocomplete_empty(Trie):
 86 |     trie = Trie()
 87 |     assert list(trie.autocomplete("")) == []
 88 | 
 89 | 
 90 | @pytest.mark.parametrize("Trie", all_tries)
 91 | def test_contains_empty_true(Trie):
 92 |     trie = Trie([""])
 93 |     assert "" in trie
 94 | 
 95 | 
 96 | @pytest.mark.parametrize("Trie", all_tries)
 97 | def test_contains_empty_false(Trie):
 98 |     trie = Trie(["foo"])
 99 |     assert "" not in trie
100 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Core Library
  4 | import sys
  5 | import time
  6 | import traceback
  7 | 
  8 | # Third party
  9 | import pytest
 10 | 
 11 | # First party
 12 | from mpu import (
 13 |     Location,
 14 |     clip,
 15 |     consistent_shuffle,
 16 |     exception_logging,
 17 |     haversine_distance,
 18 |     is_in_interval,
 19 |     parallel_for,
 20 | )
 21 | 
 22 | 
 23 | def test_clip():
 24 |     assert clip(42) == 42
 25 |     assert clip(42, 0, 100) == 42
 26 |     assert clip(42, 0, 42.0) == 42
 27 |     assert clip(42, None, 100) == 42
 28 |     assert clip(42, 0, None) == 42
 29 |     assert clip(-42, 0, None) == 0
 30 |     assert clip(420, None, 100) == 100
 31 | 
 32 | 
 33 | def test_parallel_for():
 34 |     def looping_function(payload):
 35 |         i, j = payload
 36 |         time.sleep(1)
 37 |         return i + j
 38 | 
 39 |     parameters = [(i, i + 1) for i in range(50)]
 40 |     out = parallel_for(looping_function, parameters)
 41 |     assert out == [2 * i + 1 for i in range(50)]
 42 | 
 43 | 
 44 | def test_haversine():
 45 |     with pytest.raises(ValueError):
 46 |         haversine_distance((-200, 0), (0, 0))
 47 |     with pytest.raises(ValueError):
 48 |         haversine_distance((0, -200), (0, 0))
 49 |     with pytest.raises(ValueError):
 50 |         haversine_distance((0, 0), (-200, 0))
 51 |     with pytest.raises(ValueError):
 52 |         haversine_distance((0, 0), (0, -200))
 53 | 
 54 | 
 55 | def test_is_in_interval_raises():
 56 |     with pytest.raises(ValueError):
 57 |         is_in_interval(10, 20, 100)
 58 | 
 59 | 
 60 | def test_is_in_interval_ok():
 61 |     is_in_interval(10, 10, 100)
 62 | 
 63 | 
 64 | def test_exception_logging():
 65 |     def raise_exception():
 66 |         try:
 67 |             raise Exception
 68 |         except Exception:
 69 |             ex_type, ex, tb = sys.exc_info()
 70 |             traceback.print_tb(tb)
 71 |         return tb
 72 | 
 73 |     exception_logging(exctype="ValueError", value=None, tb=raise_exception())
 74 | 
 75 | 
 76 | def test_location_class():
 77 |     munich = Location(48.137222222222, 11.575555555556)
 78 |     berlin = Location(52.518611111111, 13.408333333333)
 79 |     assert abs(munich.distance(berlin) - 506.7) < 10
 80 |     assert "google.com" in munich.get_google_maps_link()
 81 |     assert munich.get_google_maps_link().startswith("http")
 82 |     assert str(munich) == "Location(48.137222222222, 11.575555555556)"
 83 | 
 84 | 
 85 | def test_location_value_range():
 86 |     with pytest.raises(ValueError):
 87 |         Location(90.000000001, 42)
 88 |     with pytest.raises(ValueError):
 89 |         Location(-90.000000001, 42)
 90 |     Location(90.0, 42)
 91 |     Location(-90.0, 42)
 92 |     with pytest.raises(ValueError):
 93 |         Location(42, 180.000000001)
 94 |     with pytest.raises(ValueError):
 95 |         Location(42, -180.000000001)
 96 |     Location(42, 180.0)
 97 |     Location(42, -180.0)
 98 | 
 99 | 
100 | def test_consistent_shuffle_single():
101 |     input_list = [[1, 2], [3, 4]]
102 |     result = consistent_shuffle(*input_list)
103 |     assert result == ([1, 2], [3, 4]) or result == ([2, 1], [4, 3])
104 | 


--------------------------------------------------------------------------------
/tests/test_math.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | 
  4 | # Core Library
  5 | import itertools
  6 | 
  7 | # Third party
  8 | import hypothesis.strategies as st
  9 | import pytest
 10 | from hypothesis import given
 11 | 
 12 | # First party
 13 | import mpu.math
 14 | 
 15 | 
 16 | def test_factorize_zero():
 17 |     with pytest.raises(ValueError) as exinfo:
 18 |         mpu.math.factorize(0)
 19 |     assert str(exinfo.value) == "All primes are prime factors of 0."
 20 | 
 21 | 
 22 | @given(st.floats())
 23 | def test_factorize_float(a_float):
 24 |     with pytest.raises(ValueError) as exinfo:
 25 |         mpu.math.factorize(a_float)
 26 |     assert str(exinfo.value) == "integer expected, but type(number)=<class 'float'>"
 27 | 
 28 | 
 29 | def test_factorize_at_border():
 30 |     assert mpu.math.factorize(991**2) == [991, 991]
 31 | 
 32 | 
 33 | @given(an_integer=st.integers(min_value=-(10**6), max_value=10**6))
 34 | def test_factorize(an_integer):
 35 |     if an_integer == 0:
 36 |         # This is tested in `test_factorize_zero` and should throw an exception
 37 |         return
 38 |     factors = mpu.math.factorize(an_integer)
 39 |     product = 1
 40 |     for factor in factors:
 41 |         product *= factor
 42 |     assert product == an_integer
 43 | 
 44 | 
 45 | def test_factorize_5():
 46 |     assert mpu.math.factorize(5) == [5]
 47 | 
 48 | 
 49 | def test_factorize_1024(benchmark):
 50 |     assert benchmark(mpu.math.factorize, 1024) == [2] * 10
 51 | 
 52 | 
 53 | def test_factorize_3072(benchmark):
 54 |     assert benchmark(mpu.math.factorize, 3072) == [2] * 10 + [3]
 55 | 
 56 | 
 57 | def test_argmax(benchmark):
 58 |     assert benchmark(mpu.math.argmax, [1, 2, 3]) == 2
 59 | 
 60 | 
 61 | @given(st.lists(st.integers(), min_size=1))
 62 | def test_argmax_property(integer_list):
 63 |     argmax = mpu.math.argmax(integer_list)
 64 |     max_value = integer_list[argmax]
 65 |     for el in integer_list:
 66 |         assert el <= max_value
 67 | 
 68 | 
 69 | def test_gcd_fail():
 70 |     with pytest.raises(ValueError) as exinfo:
 71 |         mpu.math.gcd(0, 7)
 72 |     assert str(exinfo.value) == "gcd(a=0, b=7) is undefined"
 73 | 
 74 | 
 75 | @given(st.integers(), st.integers())
 76 | def test_gcd_is_divisor(a, b):
 77 |     if a == 0 or b == 0:
 78 |         with pytest.raises(ValueError) as exinfo:
 79 |             mpu.math.gcd(a, b)
 80 |         assert str(exinfo.value) == f"gcd(a={a}, b={b}) is undefined"
 81 |     else:
 82 |         gcd = mpu.math.gcd(a, b)
 83 |         assert a % gcd == 0
 84 |         assert b % gcd == 0
 85 | 
 86 | 
 87 | @given(st.integers(), st.integers(), st.integers())
 88 | def test_gcd_is_divisor_min_size(a, b, c):
 89 |     if a == 0 or b == 0 or c == 0:
 90 |         with pytest.raises(ValueError) as exinfo:
 91 |             mpu.math.gcd(a * c, b * c)
 92 |         assert str(exinfo.value) == f"gcd(a={a*c}, b={b*c}) is undefined"
 93 |     else:
 94 |         gcd = mpu.math.gcd(a * c, b * c)
 95 |         assert (a * c) % gcd == 0
 96 |         assert (b * c) % gcd == 0
 97 |         assert gcd % c == 0
 98 | 
 99 | 
100 | def test_generate_primes():
101 |     first_primes = list(itertools.islice(mpu.math.generate_primes(), 10))
102 |     assert first_primes == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
103 | 


--------------------------------------------------------------------------------
/mpu/data/iban.csv:
--------------------------------------------------------------------------------
 1 | country_en;length;bban_format;iban_fields
 2 | Albania;28;8n,16c;ALkkbbbssssxcccccccccccccccc
 3 | Andorra;24;8n,12c;ADkkbbbbsssscccccccccccc
 4 | Austria;20;16n;ATkkbbbbbccccccccccc
 5 | Azerbaijan;28;4c,20n;AZkkbbbbcccccccccccccccccccc
 6 | Bahrain;22;4a,14c;BHkkbbbbcccccccccccccc
 7 | Belarus;28;4c,20n;BYkkbbbbaaaacccccccccccccccc
 8 | Belgium;16;12n;BEkkbbbcccccccxx
 9 | Bosnia and Herzegovina;20;16n;BAkkbbbsssccccccccxx
10 | Brazil;29;23n,1a,1c;BRkkbbbbbbbbssssscccccccccctn
11 | Bulgaria;22;4a,6n,8c;BGkkbbbbssssttcccccccc
12 | Costa Rica;22;18n;CRkk0bbbcccccccccccccc
13 | Croatia;21;17n;HRkkbbbbbbbcccccccccc
14 | Cyprus;28;8n,16c;CYkkbbbssssscccccccccccccccc
15 | Czech Republic;24;20n;CZkkbbbbsssssscccccccccc
16 | Denmark;18;14n;DKkkbbbbcccccccccc
17 | Dominican Republic;28;4a,20n;DOkkbbbbcccccccccccccccccccc
18 | East Timor;23;19n;TLkkbbbccccccccccccccxx
19 | Estonia;20;16n;EEkkbbsscccccccccccx
20 | Faroe Islands;18;14n;FOkkbbbbcccccccccx
21 | Finland;18;14n;FIkkbbbbbbcccccccx
22 | France;27;10n,11c,2n;FRkkbbbbbssssscccccccccccxx
23 | Georgia;22;2c,16n;GEkkbbcccccccccccccccc
24 | Germany;22;18n;DEkkbbbbbbbbcccccccccc
25 | Gibraltar;23;4a,15c;GIkkbbbbccccccccccccccc
26 | Greece;27;7n,16c;GRkkbbbsssscccccccccccccccc
27 | Greenland;18;14n;GLkkbbbbcccccccccc
28 | Guatemala;28;4c,20c;GTkkbbbbmmttcccccccccccccccc
29 | Hungary;28;24n;HUkkbbbssssxcccccccccccccccx
30 | Iceland;26;22n;ISkkbbbbsscccccciiiiiiiiii
31 | Ireland;22;4c,14n;IEkkaaaabbbbbbcccccccc
32 | Israel;23;19n;ILkkbbbnnnccccccccccccc
33 | Italy;27;1a,10n,12c;ITkkxbbbbbssssscccccccccccc
34 | Jordan;30;4a,22n;JOkkbbbbsssscccccccccccccccccc
35 | Kazakhstan;20;3n,13c;KZkkbbbccccccccccccc
36 | Kosovo;20;4n,10n,2n;XKkkbbbbcccccccccccc
37 | Kuwait;30;4a,22c;KWkkbbbbcccccccccccccccccccccc
38 | Latvia;21;4a,13c;LVkkbbbbccccccccccccc
39 | Lebanon;28;4n,20c;LBkkbbbbcccccccccccccccccccc
40 | Liechtenstein;21;5n,12c;LIkkbbbbbcccccccccccc
41 | Lithuania;20;16n;LTkkbbbbbccccccccccc
42 | Luxembourg;20;3n,13c;LUkkbbbccccccccccccc
43 | Macedonia;19;3n,10c,2n;MKkkbbbccccccccccxx
44 | Malta;31;4a,5n,18c;MTkkbbbbssssscccccccccccccccccc
45 | Mauritania;27;23n;MRkkbbbbbssssscccccccccccxx
46 | Mauritius;30;4a,19n,3a;MUkkbbbbbbsscccccccccccc000mmm
47 | Monaco;27;10n,11c,2n;MCkkbbbbbssssscccccccccccxx
48 | Moldova;24;2c,18c;MDkkbbcccccccccccccccccc
49 | Montenegro;22;18n;MEkkbbbcccccccccccccxx
50 | Netherlands;18;4a,10n;NLkkbbbbcccccccccc
51 | Norway;15;11n;NOkkbbbbccccccx
52 | Pakistan;24;4c,16n;PKkkbbbbcccccccccccccccc
53 | Palestinian territories;29;4c,21n;PSkkbbbbxxxxxxxxxcccccccccccc
54 | Poland;28;24n;PLkkbbbssssxcccccccccccccccc
55 | Portugal;25;21n;PTkkbbbbsssscccccccccccxx
56 | Qatar;29;4a,21c;QAkkbbbbccccccccccccccccccccc
57 | Romania;24;4a,16c;ROkkbbbbcccccccccccccccc
58 | San Marino;27;1a,10n,12c;SMkkxbbbbbssssscccccccccccc
59 | Saudi Arabia;24;2n,18c;SAkkbbcccccccccccccccccc
60 | Serbia;22;18n;RSkkbbbcccccccccccccxx
61 | Slovakia;24;20n;SKkkbbbbsssssscccccccccc
62 | Slovenia;19;15n;SIkkbbsssccccccccxx
63 | Spain;24;20n;ESkkbbbbssssxxcccccccccc
64 | Sweden;24;20n;SEkkbbbccccccccccccccccc
65 | Switzerland;21;5n,12c;CHkkbbbbbcccccccccccc
66 | Tunisia;24;20n;TNkkbbsssccccccccccccccc
67 | Turkey;26;5n,17c;TRkkbbbbbxcccccccccccccccc
68 | United Arab Emirates;23;3n,16n;AEkkbbbcccccccccccccccc
69 | United Kingdom;22;4a,14n;GBkkbbbbsssssscccccccc
70 | Virgin Islands, British;24;4c,16n;VGkkbbbbcccccccccccccccc
71 | 


--------------------------------------------------------------------------------
/tests/test_nodebased_trie.py:
--------------------------------------------------------------------------------
 1 | # Third party
 2 | import pytest
 3 | 
 4 | # First party
 5 | from mpu.datastructures.trie.char_trie import EMPTY_NODE as CHAR_EMPTY_NODE
 6 | from mpu.datastructures.trie.char_trie import Trie as CharTrie
 7 | from mpu.datastructures.trie.char_trie import TrieNode as CharTrieNode
 8 | from mpu.datastructures.trie.string_trie import EMPTY_NODE as STRING_EMPTY_NODE
 9 | from mpu.datastructures.trie.string_trie import Trie as StringTrie
10 | from mpu.datastructures.trie.string_trie import TrieNode as StringTrieNode
11 | 
12 | nodebased_tries = [CharTrie, StringTrie]
13 | nodebased_tries_empty_nodes = [
14 |     (CharTrie, CHAR_EMPTY_NODE),
15 |     (StringTrie, STRING_EMPTY_NODE),
16 | ]
17 | 
18 | 
19 | @pytest.mark.parametrize("Trie,EMPTY_NODE", nodebased_tries_empty_nodes)
20 | def test_get_subtrie_prefix_hit_miss(Trie, EMPTY_NODE):
21 |     trie = Trie(["foo"])
22 |     prefix, subtrie = trie.get_subtrie("foobar")
23 |     assert subtrie is EMPTY_NODE
24 | 
25 | 
26 | @pytest.mark.parametrize("Trie", nodebased_tries)
27 | def test_get_subtrie_prefix_hit_hit(Trie):
28 |     trie = Trie(["foo", "foobar"])
29 |     words = []
30 |     prefix, subtrie = trie.get_subtrie("foobar")
31 |     for word in subtrie:
32 |         words.append(prefix + word)
33 |     assert words == ["foobar"]
34 | 
35 | 
36 | @pytest.mark.parametrize("Trie,EMPTY_NODE", nodebased_tries_empty_nodes)
37 | def test_get_subtrie_direct_miss(Trie, EMPTY_NODE):
38 |     trie = Trie(["foo"])
39 |     prefix, subtrie = trie.get_subtrie("bar")
40 |     assert subtrie is EMPTY_NODE
41 | 
42 | 
43 | @pytest.mark.parametrize("Trie", nodebased_tries)
44 | def test_trie_autocomplete(Trie):
45 |     data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
46 |     trie = Trie(data)
47 |     assert list(trie.autocomplete("d")) == ["d", "dog"]
48 |     expected = ["tom", "tomatoe", "tomcat"]
49 |     assert sorted(trie.autocomplete("tom")) == expected
50 | 
51 |     data = ["tom", "d"]
52 |     trie = Trie(data)
53 |     assert list(trie.autocomplete("t")) == ["tom"]
54 | 
55 |     data = ["dog", "tomco", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
56 |     trie = Trie(data)
57 |     assert sorted(trie.autocomplete("tomc")) == ["tomcat", "tomco"]
58 |     trie.print()
59 |     print(trie.get_subtrie("tom"))
60 |     assert list(trie.autocomplete("x")) == []
61 | 
62 | 
63 | @pytest.mark.parametrize("Trie", nodebased_tries)
64 | def test_get_subtrie_direct_hit(Trie):
65 |     trie = Trie(["foobar"])
66 |     prefix, subtrie = trie.get_subtrie("foobar")
67 |     assert [prefix + word for word in subtrie] == ["foobar"]
68 | 
69 | 
70 | @pytest.mark.parametrize("Trie", nodebased_tries)
71 | def test_get_subtrie_empty(Trie):
72 |     trie = Trie()
73 |     prefix, subtrie = trie.get_subtrie("foobar")
74 |     assert prefix == ""
75 |     assert not subtrie.is_word
76 |     assert subtrie.count == 0
77 | 
78 | 
79 | @pytest.mark.parametrize("Trie", nodebased_tries)
80 | def test_trie_creation_prefix_search(Trie):
81 |     data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"]
82 |     trie = Trie(data)
83 |     expected = {"tom", "tomcat", "tomatoe"}
84 |     prefix, subtrie = trie.get_subtrie("tom")
85 |     assert {prefix + element for element in subtrie} == expected
86 | 
87 | 
88 | @pytest.mark.parametrize("TrieNode", [CharTrieNode, StringTrieNode])
89 | def test_frozen_node_push(TrieNode):
90 |     node = TrieNode("a", freeze=True)
91 |     with pytest.raises(RuntimeError):
92 |         node.push("b")
93 | 
94 | 
95 | @pytest.mark.parametrize("Trie", nodebased_tries)
96 | def test_push_empty(Trie):
97 |     trie = Trie()
98 |     trie.push("")
99 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![PyPI version](https://badge.fury.io/py/mpu.svg)](https://badge.fury.io/py/mpu)
  2 | [![Python Support](https://img.shields.io/pypi/pyversions/mpu.svg)](https://pypi.org/project/mpu/)
  3 | [![Documentation Status](https://readthedocs.org/projects/mpu/badge/?version=latest)](http://mpu.readthedocs.io/en/latest/?badge=latest)
  4 | [![Build Status](https://travis-ci.org/MartinThoma/mpu.svg?branch=master)](https://travis-ci.org/MartinThoma/mpu)
  5 | [![MartinThoma](https://circleci.com/gh/MartinThoma/mpu.svg?style=shield)](https://app.circleci.com/pipelines/github/MartinThoma/mpu)
  6 | [![Build Status](https://dev.azure.com/martinthoma/mpu/_apis/build/status/MartinThoma.mpu?branchName=master)](https://dev.azure.com/martinthoma/mpu/_build/latest?definitionId=1&branchName=master)
  7 | [![Coverage Status](https://coveralls.io/repos/github/MartinThoma/mpu/badge.svg?branch=master)](https://coveralls.io/github/MartinThoma/mpu?branch=master)
  8 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
  9 | ![GitHub last commit](https://img.shields.io/github/last-commit/MartinThoma/mpu)
 10 | ![GitHub commits since latest release (by SemVer)](https://img.shields.io/github/commits-since/MartinThoma/mpu/0.23.1)
 11 | [![CodeFactor](https://www.codefactor.io/repository/github/martinthoma/mpu/badge/master)](https://www.codefactor.io/repository/github/martinthoma/mpu/overview/master)
 12 | [![mutmut](https://pypi.org/project/mutmut/)](https://img.shields.io/badge/mutmut-1417%2F1813-lightgrey)
 13 | 
 14 | # mpu
 15 | Martins Python Utilities (mpu) is a collection of utility functions and classes
 16 | with no other dependencies.
 17 | 
 18 | The total size of the package will never be bigger than 10 MB and currently it
 19 | is 120 kB in zipped form. This makes it a candidate to include into AWS Lambda
 20 | projects.
 21 | 
 22 | 
 23 | ## Installation
 24 | 
 25 | ```bash
 26 | $ pip install git+https://github.com/MartinThoma/mpu.git
 27 | ```
 28 | 
 29 | It can, of course, also be installed via PyPI.
 30 | 
 31 | 
 32 | ## Usage
 33 | 
 34 | ### Datastructures
 35 | 
 36 | ```python-repl
 37 | >>> from mpu.datastructures import EList
 38 | 
 39 | >>> l = EList([2, 1, 0])
 40 | >>> l[2]
 41 | 0
 42 | 
 43 | >>> l[[2, 0]]
 44 | [0, 2]
 45 | 
 46 | >>> l[l]
 47 | [0, 1, 2]
 48 | ```
 49 | 
 50 | ### Shell
 51 | 
 52 | To enhance your terminals output, you might want to do something like:
 53 | 
 54 | ```python
 55 | from mpu.shell import Codes
 56 | 
 57 | print("{c.GREEN}{c.UNDERLINED}Works{c.RESET_ALL}".format(c=Codes))
 58 | ```
 59 | 
 60 | 
 61 | ### Quick Examples
 62 | 
 63 | Creating small example datastructures is a task I encounter once in a while
 64 | for StackExchange answers.
 65 | 
 66 | ```python
 67 | from mpu.pd import example_df
 68 | 
 69 | df = example_df()
 70 | print(df)
 71 | ```
 72 | 
 73 | gives
 74 | 
 75 | ```
 76 |      country   population population_time    EUR
 77 | 0    Germany   82521653.0      2016-12-01   True
 78 | 1     France   66991000.0      2017-01-01   True
 79 | 2  Indonesia  255461700.0      2017-01-01  False
 80 | 3    Ireland    4761865.0             NaT   True
 81 | 4      Spain   46549045.0      2017-06-01   True
 82 | 5    Vatican          NaN             NaT   True
 83 | ```
 84 | 
 85 | 
 86 | ### Money
 87 | 
 88 | ```python
 89 | import mpu
 90 | from fractions import Fraction
 91 | 
 92 | gross_income = mpu.units.Money("2345.10", "EUR")
 93 | net_income = gross_income * Fraction("0.80")
 94 | apartment = mpu.units.Money("501.23", "EUR")
 95 | savings = net_income - apartment
 96 | print(savings)
 97 | ```
 98 | 
 99 | prints `1375.31 Euro`
100 | 
101 | 
102 | ### IO
103 | 
104 | * Download files with [`mpu.io.download(source, sink)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.download).
105 | * Read CSV, JSON and pickle with [`mpu.io.read(filepath)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.write).
106 | * Write CSV, JSON and pickle with [`mpu.io.write(filepath, data)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.read)
107 | 


--------------------------------------------------------------------------------
/requirements/ci.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with python 3.7
  3 | # To update, run:
  4 | #
  5 | #    pip-compile requirements/ci.in
  6 | #
  7 | attrs==21.4.0
  8 |     # via
  9 |     #   hypothesis
 10 |     #   pytest
 11 | bleach==4.1.0
 12 |     # via readme-renderer
 13 | boto3==1.20.47
 14 |     # via
 15 |     #   -r requirements/ci.in
 16 |     #   moto
 17 | botocore==1.23.47
 18 |     # via
 19 |     #   boto3
 20 |     #   moto
 21 |     #   s3transfer
 22 | certifi==2021.10.8
 23 |     # via requests
 24 | cffi==1.15.0
 25 |     # via cryptography
 26 | charset-normalizer==2.0.11
 27 |     # via requests
 28 | click==8.0.3
 29 |     # via pip-tools
 30 | colorama==0.4.4
 31 |     # via twine
 32 | coverage[toml]==6.3.1
 33 |     # via pytest-cov
 34 | cryptography==36.0.1
 35 |     # via
 36 |     #   moto
 37 |     #   secretstorage
 38 | docutils==0.18.1
 39 |     # via readme-renderer
 40 | hypothesis==6.36.1
 41 |     # via -r requirements/ci.in
 42 | idna==3.3
 43 |     # via requests
 44 | importlib-metadata==4.10.1
 45 |     # via
 46 |     #   click
 47 |     #   keyring
 48 |     #   moto
 49 |     #   pep517
 50 |     #   pluggy
 51 |     #   pytest
 52 |     #   twine
 53 | iniconfig==1.1.1
 54 |     # via pytest
 55 | jeepney==0.7.1
 56 |     # via
 57 |     #   keyring
 58 |     #   secretstorage
 59 | jinja2==3.0.3
 60 |     # via moto
 61 | jmespath==0.10.0
 62 |     # via
 63 |     #   boto3
 64 |     #   botocore
 65 | keyring==23.5.0
 66 |     # via twine
 67 | markupsafe==2.0.1
 68 |     # via
 69 |     #   jinja2
 70 |     #   moto
 71 | moto==3.0.2
 72 |     # via -r requirements/ci.in
 73 | numpy==1.21.5
 74 |     # via pandas
 75 | packaging==21.3
 76 |     # via
 77 |     #   bleach
 78 |     #   pytest
 79 | pandas==1.3.5
 80 |     # via -r requirements/ci.in
 81 | pep517==0.12.0
 82 |     # via pip-tools
 83 | pip-tools==6.4.0
 84 |     # via -r requirements/ci.in
 85 | pkginfo==1.8.2
 86 |     # via twine
 87 | pluggy==1.0.0
 88 |     # via pytest
 89 | py==1.11.0
 90 |     # via pytest
 91 | py-cpuinfo==8.0.0
 92 |     # via pytest-benchmark
 93 | pycparser==2.21
 94 |     # via cffi
 95 | pygments==2.11.2
 96 |     # via readme-renderer
 97 | pyparsing==3.0.7
 98 |     # via packaging
 99 | pytest==6.2.5
100 |     # via
101 |     #   -r requirements/ci.in
102 |     #   pytest-benchmark
103 |     #   pytest-cov
104 |     #   pytest-timeout
105 | pytest-benchmark==3.4.1
106 |     # via -r requirements/ci.in
107 | pytest-cov==3.0.0
108 |     # via -r requirements/ci.in
109 | pytest-timeout==2.1.0
110 |     # via -r requirements/ci.in
111 | python-dateutil==2.8.2
112 |     # via
113 |     #   botocore
114 |     #   moto
115 |     #   pandas
116 | pytz==2021.3
117 |     # via
118 |     #   moto
119 |     #   pandas
120 | readme-renderer==32.0
121 |     # via twine
122 | requests==2.27.1
123 |     # via
124 |     #   moto
125 |     #   requests-toolbelt
126 |     #   responses
127 |     #   twine
128 | requests-toolbelt==0.9.1
129 |     # via twine
130 | responses==0.18.0
131 |     # via moto
132 | rfc3986==2.0.0
133 |     # via twine
134 | s3transfer==0.5.1
135 |     # via boto3
136 | secretstorage==3.3.1
137 |     # via keyring
138 | simplejson==3.17.6
139 |     # via -r requirements/ci.in
140 | six==1.16.0
141 |     # via
142 |     #   bleach
143 |     #   python-dateutil
144 | sortedcontainers==2.4.0
145 |     # via hypothesis
146 | toml==0.10.2
147 |     # via pytest
148 | tomli==2.0.0
149 |     # via
150 |     #   coverage
151 |     #   pep517
152 | tqdm==4.62.3
153 |     # via twine
154 | twine==3.8.0
155 |     # via -r requirements/ci.in
156 | typing-extensions==4.0.1
157 |     # via importlib-metadata
158 | urllib3==1.26.8
159 |     # via
160 |     #   botocore
161 |     #   requests
162 |     #   responses
163 |     #   twine
164 | webencodings==0.5.1
165 |     # via bleach
166 | werkzeug==2.0.2
167 |     # via moto
168 | wheel==0.37.1
169 |     # via
170 |     #   -r requirements/ci.in
171 |     #   pip-tools
172 | xmltodict==0.12.0
173 |     # via moto
174 | zipp==3.7.0
175 |     # via
176 |     #   importlib-metadata
177 |     #   pep517
178 | 
179 | # The following packages are considered to be unsafe in a requirements file:
180 | # pip
181 | # setuptools
182 | 


--------------------------------------------------------------------------------
/tests/test_pd.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Core Library
  4 | import datetime
  5 | 
  6 | # Third party
  7 | import pandas as pd
  8 | 
  9 | # First party
 10 | import mpu.pd
 11 | 
 12 | 
 13 | def test_example_df():
 14 |     df = mpu.pd.example_df()
 15 |     assert list(df.columns) == ["country", "population", "population_time", "EUR"]
 16 |     assert list(df["country"]) == [
 17 |         "Germany",
 18 |         "France",
 19 |         "Indonesia",
 20 |         "Ireland",
 21 |         "Spain",
 22 |         "Vatican",
 23 |     ]
 24 |     assert list(df["population"])[:5] == [
 25 |         82521653.0,
 26 |         66991000.0,
 27 |         255461700.0,
 28 |         4761865.0,
 29 |         46549045.0,
 30 |     ]
 31 |     assert df["population_time"].equals(
 32 |         pd.Series(
 33 |             [
 34 |                 datetime.datetime(2016, 12, 1),
 35 |                 datetime.datetime(2017, 1, 1),
 36 |                 datetime.datetime(2017, 1, 1),
 37 |                 None,  # Ireland
 38 |                 datetime.datetime(2017, 6, 1),  # Spain
 39 |                 None,
 40 |             ]
 41 |         )
 42 |     )
 43 |     assert list(df["EUR"]) == [True, True, False, True, True, True]
 44 | 
 45 | 
 46 | def test_describe(capsys):
 47 |     mpu.pd.describe(mpu.pd.example_df())
 48 |     captured = capsys.readouterr()
 49 |     assert (
 50 |         captured.out
 51 |         == """Number of datapoints: 6
 52 | 
 53 | ## Float Columns
 54 | Column name  Non-nan         mean          std         min          25%          50%          75%           max
 55 |  population        5  91257052.60  96317882.77  4761865.00  46549045.00  66991000.00  82521653.00  255461700.00
 56 | 
 57 | ## Category Columns
 58 | Column name  Non-nan  unique  top el  top (count)    rest
 59 |         EUR        6       2   False            5  [True]
 60 | 
 61 | ## Time Columns
 62 |     Column name  Non-nan  unique               top el  top (count)                  min                  max
 63 | population_time        4       4  2016-12-01 00:00:00            2  2016-12-01 00:00:00  2017-06-01 00:00:00
 64 | 
 65 | ## Other Columns
 66 | Column name  Non-nan  unique     top  (count)                                      rest
 67 |     country        6       6  France        1  ['Germany', 'Indonesia', 'Ireland', 'Spa
 68 | """
 69 |     )
 70 | 
 71 | 
 72 | def test_describe_int(capsys):
 73 |     column_info = {"int": ["numbers"]}
 74 |     df = pd.DataFrame({"numbers": [1, 2, 3, 100, 500]})
 75 |     mpu.pd._describe_int(df, column_info)
 76 |     mpu.pd.describe(df, column_info)
 77 |     captured = capsys.readouterr()
 78 |     assert (
 79 |         captured.out
 80 |         == """
 81 | ## Integer Columns
 82 | Column name  Non-nan   mean                 std  min  25%  50%    75%  max
 83 |     numbers        5  121.2  215.96689561134133    1  2.0  3.0  100.0  500
 84 | Number of datapoints: 5
 85 | 
 86 | ## Integer Columns
 87 | Column name  Non-nan   mean                 std  min  25%  50%    75%  max
 88 |     numbers        5  121.2  215.96689561134133    1  2.0  3.0  100.0  500
 89 | """
 90 |     )
 91 | 
 92 | 
 93 | def test_get_column_info_suspicious_categorical():
 94 |     df = pd.DataFrame({"numbers": [1, 2, 3, 100, 500]})
 95 |     mpu.pd._get_column_info(df, [])
 96 | 
 97 | 
 98 | def test_get_column_info_nonsuspicious_categorical():
 99 |     df = pd.DataFrame({"numbers": [i for i in range(200)]})
100 |     mpu.pd._get_column_info(df, [])
101 | 
102 | 
103 | def test_get_column_info_no_values():
104 |     df = pd.DataFrame({"numbers": []})
105 |     mpu.pd._get_column_info(df, [])
106 | 
107 | 
108 | def test_get_column_info_mixed_column():
109 |     df = pd.DataFrame({"numbers": [1, 2.3, None, "Foobar", (5, 10)]})
110 |     info = mpu.pd._get_column_info(df, [])
111 | 
112 |     assert set(info[1]["numbers"]["value_list"]) == {(5, 10), 2.3, "Foobar", 1}
113 |     info[1]["numbers"]["value_list"] = None
114 | 
115 |     expected_column_info = {
116 |         "category": [],
117 |         "float": [],
118 |         "int": [],
119 |         "other": ["numbers"],
120 |         "time": [],
121 |     }
122 |     expected_column_meta = {
123 |         "numbers": {"top_count_val": 1, "value_list": None, "value_count": 4}
124 |     }
125 |     expected = (expected_column_info, expected_column_meta)
126 |     assert info == expected
127 | 
128 | 
129 | def test_get_column_info_column_unknown_dtype():
130 |     df = pd.DataFrame({"numbers": [datetime.timedelta(days=3)]})
131 |     info = mpu.pd._get_column_info(df, [])
132 | 
133 |     assert set(info[1]["numbers"]["value_list"]) == {datetime.timedelta(days=3)}
134 |     info[1]["numbers"]["value_list"] = None
135 | 
136 |     expected_column_info = {
137 |         "category": [],
138 |         "float": [],
139 |         "int": [],
140 |         "other": [],
141 |         "time": [],
142 |     }
143 |     expected_column_meta = {
144 |         "numbers": {"top_count_val": 1, "value_list": None, "value_count": 1}
145 |     }
146 |     expected = (expected_column_info, expected_column_meta)
147 |     assert info == expected
148 | 
149 | 
150 | def test_countries_global():
151 |     assert len(mpu.pd.countries) == 248
152 | 


--------------------------------------------------------------------------------
/mpu/datastructures/trie/char_trie.py:
--------------------------------------------------------------------------------
  1 | # Core Library
  2 | import logging
  3 | from typing import Dict, List, Tuple
  4 | 
  5 | # First party
  6 | from mpu.datastructures.trie.base import AbstractTrie
  7 | 
  8 | logger = logging.getLogger(__name__)
  9 | 
 10 | 
 11 | class TrieNode:
 12 |     def __init__(self, value, is_word=False, count=0, children=None, freeze=False):
 13 |         if children is None:
 14 |             children = {}
 15 |         self._value = value
 16 |         self.children: Dict[str, TrieNode] = children
 17 |         self.is_word = is_word
 18 |         self.count = count
 19 |         self.is_frozen = freeze
 20 | 
 21 |     def get_subtrie(self, search_prefix: str, current_trie_node_prefix: str = ""):
 22 |         """
 23 |         Get the TrieNodes which represents the given prefix.
 24 | 
 25 |         If the search_prefix is not in the trie, return ("", EMPTY_NODE).
 26 |         The found_prefix is a prefix of search_prefix or equal to it.
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         search_prefix : str
 31 |         current_trie_node_prefix : str, optional (default: "")
 32 | 
 33 |         Returns
 34 |         -------
 35 |         found_prefix, subtrie: Tuple[str, TrieNode]
 36 |         """
 37 |         if len(search_prefix) == 0:
 38 |             return (current_trie_node_prefix, self)
 39 |         elif search_prefix[0] in self.children:
 40 |             child = self.children[search_prefix[0]]
 41 |             remainder = search_prefix[1:]
 42 |             new_prefix = current_trie_node_prefix + self._value
 43 |             return child.get_subtrie(remainder, current_trie_node_prefix=new_prefix)
 44 |         else:
 45 |             return ("", EMPTY_NODE)
 46 | 
 47 |     def push(self, value: str):
 48 |         if self.is_frozen:
 49 |             raise RuntimeError("The node is frozen. You may not edit it.")
 50 |         if value == self._value and len(value) == 0:
 51 |             # This is the root node
 52 |             self.is_word = True
 53 |             self.count += 1
 54 |             return
 55 |         if len(value) == 0:
 56 |             raise ValueError("The pushed value should not be empty")
 57 |         elif len(value) == 1:
 58 |             char = value[0]
 59 |             if char not in self.children:
 60 |                 self.children[char] = TrieNode(value=char, is_word=True, count=1)
 61 |             else:
 62 |                 self.children[char].is_word = True
 63 |                 self.children[char].count += 1
 64 |         else:
 65 |             char = value[0]
 66 |             if char not in self.children:
 67 |                 self.children[char] = TrieNode(value=char, is_word=False, count=0)
 68 |             self.children[char].push(value[1:])
 69 | 
 70 |     def __iter__(self):
 71 |         self._iteration_queue: List[Tuple[TrieNode, str]] = [(self, "")]
 72 |         while self._iteration_queue:
 73 |             trie_node, prefix = self._iteration_queue.pop()
 74 |             children = sorted(trie_node.children.items(), key=lambda n: n[0])
 75 |             for _, child in children:
 76 |                 self._iteration_queue.append((child, prefix + trie_node._value))
 77 |             if trie_node.is_word:
 78 |                 for _ in range(trie_node.count):
 79 |                     yield prefix + trie_node._value
 80 | 
 81 |     def print(self, _indent: int = 0):
 82 |         string = ""
 83 |         string += " " * _indent + self._value + "\n"
 84 |         children = sorted(self.children.values(), key=lambda child: child._value)
 85 |         for child in children:
 86 |             string += child.print(_indent=_indent + 1)
 87 |         return string
 88 | 
 89 |     def __str__(self):
 90 |         return f"TrieNode(value='{self._value}', nb_children='{len(self.children)}')"
 91 | 
 92 |     __repr__ = __str__
 93 | 
 94 | 
 95 | EMPTY_NODE = TrieNode(value="", is_word=False, count=0, freeze=True)
 96 | 
 97 | 
 98 | class Trie(AbstractTrie):
 99 |     def __init__(self, container=None):
100 |         if container is None:
101 |             container = []
102 |         self._root = TrieNode(value="", count=0, is_word=0)
103 |         self._length = 0
104 |         for element in container:
105 |             self.push(element)
106 | 
107 |     def __len__(self) -> int:
108 |         return self._length
109 | 
110 |     def __contains__(self, element) -> bool:
111 |         found_prefix, subtrie = self.get_subtrie(element)
112 |         return subtrie.is_word and found_prefix + subtrie._value == element
113 | 
114 |     def autocomplete(self, prefix):
115 |         found_prefix, subtrie = self.get_subtrie(prefix)
116 |         for word in subtrie:
117 |             yield found_prefix + word
118 | 
119 |     def get_subtrie(self, prefix) -> Tuple[str, TrieNode]:
120 |         return self._root.get_subtrie(prefix)
121 | 
122 |     def __iter__(self):
123 |         self._iteration_index = -1
124 |         self._child_values = [element for element in self._root]
125 |         return self
126 | 
127 |     def __next__(self):
128 |         """Return the next value from the Trie."""
129 |         self._iteration_index += 1
130 |         if self._iteration_index < self._length:
131 |             return self._child_values[self._iteration_index]
132 |         raise StopIteration
133 | 
134 |     def push(self, element: str):
135 |         self._root.push(element)
136 |         self._length += 1
137 | 
138 |     def print(self, print_stdout=True) -> str:
139 |         string = "Trie\n"
140 |         string += self._root.print()
141 |         string = string.strip()
142 |         if print_stdout:
143 |             print(string)
144 |         return string
145 | 
146 |     def __str__(self):
147 |         return f"Trie(len={self._length}, {self._root})"
148 | 
149 |     __repr__ = __str__
150 | 


--------------------------------------------------------------------------------
/mpu/aws.py:
--------------------------------------------------------------------------------
  1 | """Convenience functions for AWS interactions."""
  2 | 
  3 | # Core Library
  4 | import enum
  5 | import os
  6 | from collections import namedtuple
  7 | from tempfile import mkstemp
  8 | from typing import List, Optional
  9 | 
 10 | # Third party
 11 | import boto3.session
 12 | 
 13 | 
 14 | def list_files(
 15 |     bucket: str, prefix: str = "", profile_name: Optional[str] = None
 16 | ) -> List[str]:
 17 |     """
 18 |     List up to 1000 files in a bucket.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 |     bucket : str
 23 |     prefix : str
 24 |     profile_name : str, optional
 25 |         AWS profile
 26 | 
 27 |     Returns
 28 |     -------
 29 |     s3_paths : List[str]
 30 |     """
 31 |     session = boto3.session.Session(profile_name=profile_name)
 32 |     conn = session.client("s3")
 33 |     keys = []
 34 |     ret = conn.list_objects_v2(Bucket=bucket, Prefix=prefix)
 35 |     if "Contents" not in ret:
 36 |         return []
 37 |     # Make this a generator in future and use the marker:
 38 |     # https://boto3.readthedocs.io/en/latest/reference/services/
 39 |     #     s3.html#S3.Client.list_objects
 40 |     for key in conn.list_objects_v2(Bucket=bucket, Prefix=prefix)["Contents"]:
 41 |         keys.append("s3://" + bucket + "/" + key["Key"])
 42 |     return keys
 43 | 
 44 | 
 45 | def s3_read(source: str, profile_name: Optional[str] = None) -> bytes:
 46 |     """
 47 |     Read a file from an S3 source.
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     source : str
 52 |         Path starting with s3://, e.g. 's3://bucket-name/key/foo.bar'
 53 |     profile_name : str, optional
 54 |         AWS profile
 55 | 
 56 |     Returns
 57 |     -------
 58 |     content : bytes
 59 | 
 60 |     Raises
 61 |     ------
 62 |     botocore.exceptions.NoCredentialsError
 63 |         Botocore is not able to find your credentials. Either specify
 64 |         profile_name or add the environment variables AWS_ACCESS_KEY_ID,
 65 |         AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN.
 66 |         See https://boto3.readthedocs.io/en/latest/guide/configuration.html
 67 |     """
 68 |     session = boto3.session.Session(profile_name=profile_name)
 69 |     s3 = session.client("s3")
 70 |     bucket_name, key = _s3_path_split(source)
 71 |     s3_object = s3.get_object(Bucket=bucket_name, Key=key)
 72 |     body = s3_object["Body"]
 73 |     return body.read()
 74 | 
 75 | 
 76 | class ExistsStrategy(enum.Enum):
 77 |     """Strategies what to do when a file already exists."""
 78 | 
 79 |     RAISE = "raise"
 80 |     REPLACE = "replace"
 81 |     ABORT = "abort"
 82 | 
 83 | 
 84 | def s3_download(
 85 |     source: str,
 86 |     destination: Optional[str] = None,
 87 |     exists_strategy: ExistsStrategy = ExistsStrategy.RAISE,
 88 |     profile_name: Optional[str] = None,
 89 | ) -> Optional[str]:
 90 |     """
 91 |     Copy a file from an S3 source to a local destination.
 92 | 
 93 |     Parameters
 94 |     ----------
 95 |     source : str
 96 |         Path starting with s3://, e.g. 's3://bucket-name/key/foo.bar'
 97 |     destination : str, optional
 98 |         If none is given, a temporary file is created
 99 |     exists_strategy : {'raise', 'replace', 'abort'}
100 |         What is done when the destination already exists?
101 |         * `ExistsStrategy.RAISE` means a RuntimeError is raised,
102 |         * `ExistsStrategy.REPLACE` means the local file is replaced,
103 |         * `ExistsStrategy.ABORT` means the download is not done.
104 |     profile_name : str, optional
105 |         AWS profile
106 | 
107 |     Returns
108 |     -------
109 |     download_path : Optional[str]
110 |         Path of the downloaded file, if any was downloaded.
111 | 
112 |     Raises
113 |     ------
114 |     botocore.exceptions.NoCredentialsError
115 |         Botocore is not able to find your credentials. Either specify
116 |         profile_name or add the environment variables AWS_ACCESS_KEY_ID,
117 |         AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN.
118 |         See https://boto3.readthedocs.io/en/latest/guide/configuration.html
119 |     """
120 |     if not isinstance(exists_strategy, ExistsStrategy):
121 |         raise ValueError(
122 |             f"exists_strategy '{exists_strategy}' is not in {ExistsStrategy}"
123 |         )
124 |     session = boto3.session.Session(profile_name=profile_name)
125 |     s3 = session.resource("s3")
126 |     bucket_name, key = _s3_path_split(source)
127 |     if destination is None:
128 |         _, filename = os.path.split(source)
129 |         prefix, suffix = os.path.splitext(filename)
130 |         _, destination = mkstemp(prefix=prefix, suffix=suffix)
131 |     elif os.path.isfile(destination):
132 |         if exists_strategy is ExistsStrategy.RAISE:
133 |             raise RuntimeError(f"File '{destination}' already exists.")
134 |         elif exists_strategy is ExistsStrategy.ABORT:
135 |             return None
136 |     s3.Bucket(bucket_name).download_file(key, destination)
137 |     return destination
138 | 
139 | 
140 | def s3_upload(
141 |     source: str, destination: str, profile_name: Optional[str] = None
142 | ) -> None:
143 |     """
144 |     Copy a file from a local source to an S3 destination.
145 | 
146 |     Parameters
147 |     ----------
148 |     source : str
149 |     destination : str
150 |         Path starting with s3://, e.g. 's3://bucket-name/key/foo.bar'
151 |     profile_name : str, optional
152 |         AWS profile
153 |     """
154 |     session = boto3.session.Session(profile_name=profile_name)
155 |     s3 = session.resource("s3")
156 |     bucket_name, key = _s3_path_split(destination)
157 |     with open(source, "rb") as data:
158 |         s3.Bucket(bucket_name).put_object(Key=key, Body=data)
159 | 
160 | 
161 | S3Path = namedtuple("S3Path", ["bucket_name", "key"])
162 | 
163 | 
164 | def _s3_path_split(s3_path: str) -> S3Path:
165 |     """
166 |     Split an S3 path into bucket and key.
167 | 
168 |     Parameters
169 |     ----------
170 |     s3_path : str
171 | 
172 |     Returns
173 |     -------
174 |     splitted : S3Path
175 | 
176 |     Examples
177 |     --------
178 |     >>> _s3_path_split('s3://my-bucket/foo/bar.jpg')
179 |     S3Path(bucket_name='my-bucket', key='foo/bar.jpg')
180 |     """
181 |     if not s3_path.startswith("s3://"):
182 |         raise ValueError(
183 |             f"s3_path is expected to start with 's3://', but was {s3_path}"
184 |         )
185 |     bucket_key = s3_path[len("s3://") :]
186 |     bucket_name, key = bucket_key.split("/", 1)
187 |     return S3Path(bucket_name, key)
188 | 


--------------------------------------------------------------------------------
/requirements/dev.txt:
--------------------------------------------------------------------------------
  1 | #
  2 | # This file is autogenerated by pip-compile with python 3.7
  3 | # To update, run:
  4 | #
  5 | #    pip-compile requirements/dev.in
  6 | #
  7 | attrs==21.4.0
  8 |     # via
  9 |     #   -r requirements/ci.txt
 10 |     #   hypothesis
 11 |     #   pytest
 12 | bleach==4.1.0
 13 |     # via
 14 |     #   -r requirements/ci.txt
 15 |     #   readme-renderer
 16 | boto3==1.20.47
 17 |     # via
 18 |     #   -r requirements/ci.txt
 19 |     #   moto
 20 | botocore==1.23.47
 21 |     # via
 22 |     #   -r requirements/ci.txt
 23 |     #   boto3
 24 |     #   moto
 25 |     #   s3transfer
 26 | certifi==2021.10.8
 27 |     # via
 28 |     #   -r requirements/ci.txt
 29 |     #   requests
 30 | cffi==1.15.0
 31 |     # via
 32 |     #   -r requirements/ci.txt
 33 |     #   cryptography
 34 | cfgv==3.3.1
 35 |     # via pre-commit
 36 | charset-normalizer==2.0.11
 37 |     # via
 38 |     #   -r requirements/ci.txt
 39 |     #   requests
 40 | click==8.0.3
 41 |     # via
 42 |     #   -r requirements/ci.txt
 43 |     #   pip-tools
 44 | colorama==0.4.4
 45 |     # via
 46 |     #   -r requirements/ci.txt
 47 |     #   twine
 48 | coverage[toml]==6.3.1
 49 |     # via
 50 |     #   -r requirements/ci.txt
 51 |     #   pytest-cov
 52 | cryptography==36.0.1
 53 |     # via
 54 |     #   -r requirements/ci.txt
 55 |     #   moto
 56 |     #   secretstorage
 57 | distlib==0.3.4
 58 |     # via virtualenv
 59 | docutils==0.18.1
 60 |     # via
 61 |     #   -r requirements/ci.txt
 62 |     #   readme-renderer
 63 | filelock==3.4.2
 64 |     # via virtualenv
 65 | hypothesis==6.36.1
 66 |     # via -r requirements/ci.txt
 67 | identify==2.4.7
 68 |     # via pre-commit
 69 | idna==3.3
 70 |     # via
 71 |     #   -r requirements/ci.txt
 72 |     #   requests
 73 | importlib-metadata==4.10.1
 74 |     # via
 75 |     #   -r requirements/ci.txt
 76 |     #   click
 77 |     #   keyring
 78 |     #   moto
 79 |     #   pep517
 80 |     #   pluggy
 81 |     #   pre-commit
 82 |     #   pytest
 83 |     #   twine
 84 |     #   virtualenv
 85 | iniconfig==1.1.1
 86 |     # via
 87 |     #   -r requirements/ci.txt
 88 |     #   pytest
 89 | jeepney==0.7.1
 90 |     # via
 91 |     #   -r requirements/ci.txt
 92 |     #   keyring
 93 |     #   secretstorage
 94 | jinja2==3.0.3
 95 |     # via
 96 |     #   -r requirements/ci.txt
 97 |     #   moto
 98 | jmespath==0.10.0
 99 |     # via
100 |     #   -r requirements/ci.txt
101 |     #   boto3
102 |     #   botocore
103 | keyring==23.5.0
104 |     # via
105 |     #   -r requirements/ci.txt
106 |     #   twine
107 | markupsafe==2.0.1
108 |     # via
109 |     #   -r requirements/ci.txt
110 |     #   jinja2
111 |     #   moto
112 | moto==3.0.2
113 |     # via -r requirements/ci.txt
114 | nodeenv==1.6.0
115 |     # via pre-commit
116 | numpy==1.21.5
117 |     # via
118 |     #   -r requirements/ci.txt
119 |     #   pandas
120 | packaging==21.3
121 |     # via
122 |     #   -r requirements/ci.txt
123 |     #   bleach
124 |     #   pytest
125 | pandas==1.3.5
126 |     # via -r requirements/ci.txt
127 | pep517==0.12.0
128 |     # via
129 |     #   -r requirements/ci.txt
130 |     #   pip-tools
131 | pip-tools==6.4.0
132 |     # via
133 |     #   -r requirements/ci.txt
134 |     #   -r requirements/dev.in
135 | pkginfo==1.8.2
136 |     # via
137 |     #   -r requirements/ci.txt
138 |     #   twine
139 | platformdirs==2.4.1
140 |     # via virtualenv
141 | pluggy==1.0.0
142 |     # via
143 |     #   -r requirements/ci.txt
144 |     #   pytest
145 | pre-commit==2.17.0
146 |     # via -r requirements/dev.in
147 | py==1.11.0
148 |     # via
149 |     #   -r requirements/ci.txt
150 |     #   pytest
151 | py-cpuinfo==8.0.0
152 |     # via
153 |     #   -r requirements/ci.txt
154 |     #   pytest-benchmark
155 | pycparser==2.21
156 |     # via
157 |     #   -r requirements/ci.txt
158 |     #   cffi
159 | pygments==2.11.2
160 |     # via
161 |     #   -r requirements/ci.txt
162 |     #   readme-renderer
163 | pyparsing==3.0.7
164 |     # via
165 |     #   -r requirements/ci.txt
166 |     #   packaging
167 | pytest==6.2.5
168 |     # via
169 |     #   -r requirements/ci.txt
170 |     #   pytest-benchmark
171 |     #   pytest-cov
172 |     #   pytest-timeout
173 | pytest-benchmark==3.4.1
174 |     # via -r requirements/ci.txt
175 | pytest-cov==3.0.0
176 |     # via -r requirements/ci.txt
177 | pytest-timeout==2.1.0
178 |     # via -r requirements/ci.txt
179 | python-dateutil==2.8.2
180 |     # via
181 |     #   -r requirements/ci.txt
182 |     #   botocore
183 |     #   moto
184 |     #   pandas
185 | pytz==2021.3
186 |     # via
187 |     #   -r requirements/ci.txt
188 |     #   moto
189 |     #   pandas
190 | pyyaml==6.0
191 |     # via pre-commit
192 | readme-renderer==32.0
193 |     # via
194 |     #   -r requirements/ci.txt
195 |     #   twine
196 | requests==2.27.1
197 |     # via
198 |     #   -r requirements/ci.txt
199 |     #   moto
200 |     #   requests-toolbelt
201 |     #   responses
202 |     #   twine
203 | requests-toolbelt==0.9.1
204 |     # via
205 |     #   -r requirements/ci.txt
206 |     #   twine
207 | responses==0.18.0
208 |     # via
209 |     #   -r requirements/ci.txt
210 |     #   moto
211 | rfc3986==2.0.0
212 |     # via
213 |     #   -r requirements/ci.txt
214 |     #   twine
215 | s3transfer==0.5.1
216 |     # via
217 |     #   -r requirements/ci.txt
218 |     #   boto3
219 | secretstorage==3.3.1
220 |     # via
221 |     #   -r requirements/ci.txt
222 |     #   keyring
223 | simplejson==3.17.6
224 |     # via -r requirements/ci.txt
225 | six==1.16.0
226 |     # via
227 |     #   -r requirements/ci.txt
228 |     #   bleach
229 |     #   python-dateutil
230 |     #   virtualenv
231 | sortedcontainers==2.4.0
232 |     # via
233 |     #   -r requirements/ci.txt
234 |     #   hypothesis
235 | toml==0.10.2
236 |     # via
237 |     #   -r requirements/ci.txt
238 |     #   pre-commit
239 |     #   pytest
240 | tomli==2.0.0
241 |     # via
242 |     #   -r requirements/ci.txt
243 |     #   coverage
244 |     #   pep517
245 | tqdm==4.62.3
246 |     # via
247 |     #   -r requirements/ci.txt
248 |     #   twine
249 | twine==3.8.0
250 |     # via -r requirements/ci.txt
251 | typing-extensions==4.0.1
252 |     # via
253 |     #   -r requirements/ci.txt
254 |     #   importlib-metadata
255 | urllib3==1.26.8
256 |     # via
257 |     #   -r requirements/ci.txt
258 |     #   botocore
259 |     #   requests
260 |     #   responses
261 |     #   twine
262 | virtualenv==20.13.0
263 |     # via pre-commit
264 | webencodings==0.5.1
265 |     # via
266 |     #   -r requirements/ci.txt
267 |     #   bleach
268 | werkzeug==2.0.2
269 |     # via
270 |     #   -r requirements/ci.txt
271 |     #   moto
272 | wheel==0.37.1
273 |     # via
274 |     #   -r requirements/ci.txt
275 |     #   -r requirements/dev.in
276 |     #   pip-tools
277 | xmltodict==0.12.0
278 |     # via
279 |     #   -r requirements/ci.txt
280 |     #   moto
281 | zipp==3.7.0
282 |     # via
283 |     #   -r requirements/ci.txt
284 |     #   importlib-metadata
285 |     #   pep517
286 | 
287 | # The following packages are considered to be unsafe in a requirements file:
288 | # pip
289 | # setuptools
290 | 


--------------------------------------------------------------------------------
/mpu/math.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Mathematical functions which are not adequately covered by standard libraries.
  3 | 
  4 | Standard libraries are:
  5 | 
  6 | * `math <https://docs.python.org/3/library/math.html>`_
  7 | * `scipy <https://docs.scipy.org/doc/scipy/reference/>`_
  8 | * `sympy <http://docs.sympy.org/latest/index.html>`_
  9 | 
 10 | """
 11 | 
 12 | # Core Library
 13 | import math as math_stl
 14 | import operator
 15 | from functools import reduce
 16 | from typing import Dict, Iterable, Iterator, List, Optional
 17 | 
 18 | 
 19 | def generate_primes() -> Iterator[int]:
 20 |     """
 21 |     Generate an infinite sequence of prime numbers.
 22 | 
 23 |     The algorithm was originally written by David Eppstein, UC Irvine. See:
 24 |     http://code.activestate.com/recipes/117119/
 25 | 
 26 |     Examples
 27 |     --------
 28 |     >>> g = generate_primes()
 29 |     >>> next(g)
 30 |     2
 31 |     >>> next(g)
 32 |     3
 33 |     >>> next(g)
 34 |     5
 35 |     """
 36 |     divisors: Dict[int, List[int]] = {}  # map number to at least one divisor
 37 | 
 38 |     candidate = 2  # next potential prime
 39 | 
 40 |     while True:
 41 |         if candidate in divisors:
 42 |             # candidate is composite. divisors[candidate] is the list of primes
 43 |             # that divide it. Since we've reached candidate, we no longer need
 44 |             # it in the map, but we'll mark the next multiples of its witnesses
 45 |             # to prepare for larger numbers
 46 |             for p in divisors[candidate]:
 47 |                 divisors.setdefault(p + candidate, []).append(p)
 48 |             del divisors[candidate]
 49 |         else:
 50 |             # candidate is a new prime
 51 |             yield candidate
 52 | 
 53 |             # mark its first multiple that isn't
 54 |             # already marked in previous iterations
 55 |             divisors[candidate * candidate] = [candidate]
 56 | 
 57 |         candidate += 1
 58 | 
 59 | 
 60 | def factorize(number: int) -> List[int]:
 61 |     """
 62 |     Get the prime factors of an integer except for 1.
 63 | 
 64 |     Parameters
 65 |     ----------
 66 |     number : int
 67 | 
 68 |     Returns
 69 |     -------
 70 |     primes : List[int]
 71 | 
 72 |     Examples
 73 |     --------
 74 |     >>> factorize(-17)
 75 |     [-1, 17]
 76 |     >>> factorize(8)
 77 |     [2, 2, 2]
 78 |     >>> factorize(3**25)
 79 |     [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]
 80 |     >>> factorize(1)
 81 |     [1]
 82 |     """
 83 |     if not isinstance(number, int):
 84 |         raise ValueError(f"integer expected, but type(number)={type(number)}")
 85 |     if number < 0:
 86 |         return [-1] + factorize(number * (-1))
 87 |     elif number == 0:
 88 |         raise ValueError("All primes are prime factors of 0.")
 89 |     else:
 90 |         factors = []
 91 |         factor = 2
 92 |         while number % factor == 0:
 93 |             factors.append(factor)
 94 |             number = number // factor
 95 |         if number == 1:
 96 |             if len(factors) > 0:
 97 |                 return factors
 98 |             else:
 99 |                 return [1]
100 |         for factor in range(3, int(math_stl.ceil(number**0.5)) + 1, 2):
101 |             if number % factor == 0:
102 |                 return factors + [factor] + factorize(number // factor)
103 |         return factors + [number]
104 | 
105 | 
106 | def is_prime(number: int) -> bool:
107 |     """
108 |     Check if a number is prime.
109 | 
110 |     Parameters
111 |     ----------
112 |     number : int
113 | 
114 |     Returns
115 |     -------
116 |     is_prime_number : bool
117 | 
118 |     Examples
119 |     --------
120 |     >>> is_prime(-17)
121 |     False
122 |     >>> is_prime(17)
123 |     True
124 |     >>> is_prime(47055833459)
125 |     True
126 |     """
127 |     return len(factorize(number)) == 1
128 | 
129 | 
130 | def product(iterable: Iterable, start: int = 1) -> int:
131 |     """
132 |     Calculate the product of the iterables.
133 | 
134 |     Parameters
135 |     ----------
136 |     iterable : iterable
137 |         List, tuple or similar which contains numbers
138 |     start : number, optional (default: 1)
139 | 
140 |     Returns
141 |     -------
142 |     product : number
143 | 
144 |     Examples
145 |     --------
146 |     >>> product([1, 2, 3, 4, 5])
147 |     120
148 |     >>> product([])
149 |     1
150 |     """
151 |     return reduce(operator.mul, iterable, start)
152 | 
153 | 
154 | def argmax(iterable: Iterable) -> Optional[int]:
155 |     """
156 |     Find the first index of the biggest value in the iterable.
157 | 
158 |     Parameters
159 |     ----------
160 |     iterable : Iterable
161 | 
162 |     Returns
163 |     -------
164 |     argmax : Optional[int]
165 | 
166 |     Examples
167 |     --------
168 |     >>> argmax([0, 0, 0])
169 |     0
170 |     >>> argmax([1, 0, 0])
171 |     0
172 |     >>> argmax([0, 1, 0])
173 |     1
174 |     >>> argmax([])
175 |     """
176 |     max_value = None
177 |     max_index = None
178 |     for index, value in enumerate(iterable):
179 |         if (max_value is None) or max_value < value:
180 |             max_value = value
181 |             max_index = index
182 |     return max_index
183 | 
184 | 
185 | def round_up(x: float, decimal_places: int) -> float:
186 |     """
187 |     Round a float up to decimal_places.
188 | 
189 |     Parameters
190 |     ----------
191 |     x : float
192 |     decimal_places : int
193 | 
194 |     Returns
195 |     -------
196 |     rounded_float : float
197 | 
198 |     Examples
199 |     --------
200 |     >>> round_up(1.2344, 3)
201 |     1.235
202 |     >>> round_up(1.234, 3)
203 |     1.234
204 |     >>> round_up(1.23456, 3)
205 |     1.235
206 |     >>> round_up(1.23456, 2)
207 |     1.24
208 |     """
209 |     return round(x + 5 * 10 ** (-1 * (decimal_places + 1)), decimal_places)
210 | 
211 | 
212 | def round_down(x: float, decimal_places: int) -> float:
213 |     """
214 |     Round a float down to decimal_places.
215 | 
216 |     Parameters
217 |     ----------
218 |     x : float
219 |     decimal_places : int
220 | 
221 |     Returns
222 |     -------
223 |     rounded_float : float
224 | 
225 |     Examples
226 |     --------
227 |     >>> round_down(1.23456, 3)
228 |     1.234
229 |     >>> round_down(1.23456, 2)
230 |     1.23
231 |     """
232 |     d = int("1" + ("0" * decimal_places))
233 |     return math_stl.floor(x * d) / d
234 | 
235 | 
236 | def gcd(a: int, b: int) -> int:
237 |     """
238 |     Calculate the greatest common divisor.
239 | 
240 |     Currently, this uses the Euclidean algorithm.
241 | 
242 |     Parameters
243 |     ----------
244 |     a : int
245 |         Non-zero
246 |     b : int
247 |         Non-zero
248 | 
249 |     Returns
250 |     -------
251 |     greatest_common_divisor : int
252 | 
253 |     Examples
254 |     --------
255 |     >>> gcd(1, 7)
256 |     1
257 |     >>> gcd(-1, -1)
258 |     1
259 |     >>> gcd(1337, 42)
260 |     7
261 |     >>> gcd(-1337, -42)
262 |     7
263 |     >>> gcd(120, 364)
264 |     4
265 |     >>> gcd(273, 1870)
266 |     1
267 |     """
268 |     if a == 0 or b == 0:
269 |         raise ValueError(f"gcd(a={a}, b={b}) is undefined")
270 |     while b != 0:
271 |         a, b = b, a % b
272 |     return abs(a)
273 | 


--------------------------------------------------------------------------------
/tests/test_units.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Test the mpu.units module."""
  4 | 
  5 | # Third party
  6 | import pytest
  7 | import simplejson  # has for_json
  8 | 
  9 | # First party
 10 | from mpu.units import Currency, Money, get_currency
 11 | 
 12 | 
 13 | def test_get_currency():
 14 |     a = Money("0.1", "EUR")
 15 |     assert str(a) == "0.10 Euro"
 16 |     b = Money("0.1", "USD")
 17 |     assert str(b) == "0.10 US Dollar"
 18 |     with pytest.raises(ValueError):
 19 |         Money("0.1", "foobar")
 20 |     c = Money((1, 100), "EUR")
 21 |     d = Money(5, "ESP")
 22 |     assert str(c) == "0.01 Euro"
 23 |     assert repr(c) == "0.01 Euro"
 24 |     assert str(d) == "5.00 Spanish Peseta"
 25 |     with pytest.raises(ValueError):
 26 |         Money((5, 100, 42), "EUR")
 27 |     with pytest.raises(ValueError):
 28 |         Money(0.1, "EUR")
 29 |     non_currency = Money("0.1", None)
 30 |     assert str(non_currency) == "0.10"
 31 |     with pytest.raises(ValueError):
 32 |         Money(1, a)
 33 | 
 34 | 
 35 | def test_currency_for_json():
 36 |     usd = get_currency("USD")
 37 |     dump = simplejson.dumps(usd, for_json=True)
 38 |     dict_ = simplejson.loads(dump)
 39 |     undump = Currency.from_json(dict_)
 40 |     assert usd == undump
 41 | 
 42 | 
 43 | def test_money_json_magic():
 44 |     usd = Money("0.1", "USD")
 45 |     usd_dict = usd.__json__()
 46 |     dump = simplejson.dumps(usd_dict)
 47 |     dict_ = simplejson.loads(dump)
 48 |     undump = Money.from_json(dict_)
 49 |     assert usd == undump
 50 | 
 51 | 
 52 | def test_money_json_magic_none():
 53 |     usd = Money("0.1", None)
 54 |     usd_dict = usd.__json__()
 55 |     dump = simplejson.dumps(usd_dict)
 56 |     dict_ = simplejson.loads(dump)
 57 |     undump = Money.from_json(dict_)
 58 |     assert usd == undump
 59 | 
 60 | 
 61 | def test_money_conversion_float():
 62 |     """Test if one can convert Money instances to float."""
 63 |     a = Money("1337.00", None)
 64 |     assert float(a) == 1337.0
 65 |     b = Money("42.00", "USD")
 66 |     assert float(b) == 42.0
 67 | 
 68 | 
 69 | def test_money_floatingpoint_issue1():
 70 |     """The test is the reason why one should not use float for money."""
 71 |     a = Money("10.00", None)
 72 |     b = Money("1.2", None)
 73 |     assert str(a + b - a) == str(b)
 74 | 
 75 | 
 76 | def test_money_floatingpoint_issue2():
 77 |     """The test is the reason why one should not use float for money."""
 78 |     a = Money("10.00", None)
 79 |     b = Money("1.2", None)
 80 |     assert str((a + b - a) * 10**14 - b * 10**14) == "0.00"
 81 | 
 82 | 
 83 | def test_currency_operations():
 84 |     a = Money("0.5", "EUR")
 85 |     aneg = Money("-0.5", "EUR")
 86 |     b = Money("0.1", "EUR")
 87 |     c = Money("0.1", "USD")
 88 |     d = Money("0.5", "EUR")
 89 |     assert (a == b) is False
 90 |     with pytest.raises(ValueError):
 91 |         a == 0.5
 92 |     assert a == d
 93 |     with pytest.raises(ValueError):
 94 |         a == c
 95 |     assert a != b
 96 |     assert (a != d) is False
 97 |     with pytest.raises(ValueError):
 98 |         a != c
 99 |     assert str(a - b) == "0.40 Euro"
100 |     assert -a == aneg
101 |     assert +a == a
102 |     with pytest.raises(ValueError):
103 |         a - c
104 |     with pytest.raises(ValueError):
105 |         a - 2
106 |     with pytest.raises(ValueError):
107 |         a - 2.0
108 |     assert str(a + b) == "0.60 Euro"
109 |     with pytest.raises(ValueError):
110 |         a + c
111 |     with pytest.raises(ValueError):
112 |         a + 2
113 |     with pytest.raises(ValueError):
114 |         a + 2.0
115 |     assert str(2 * a) == "1.00 Euro"
116 |     assert str(a / b) == "5"
117 |     with pytest.raises(ValueError):
118 |         a / c
119 |     with pytest.raises(ValueError):
120 |         a * 3.141
121 |     with pytest.raises(ValueError):
122 |         3.141 * a
123 |     with pytest.raises(ValueError):
124 |         a / "0.1"
125 |     assert str(a / 2) == "0.25 Euro"
126 | 
127 | 
128 | def test_currency_comperators():
129 |     a = Money("0.5", "EUR")
130 |     b = Money("0.1", "EUR")
131 |     c = Money("0.5", "EUR")
132 |     d = Money("0.5", "USD")
133 |     assert a > b
134 |     assert (a < b) is False
135 |     assert a >= b
136 |     assert (a <= b) is False
137 |     assert (a > c) is False
138 |     assert (a < c) is False
139 |     assert a >= c
140 |     assert a <= c
141 | 
142 |     with pytest.raises(ValueError):
143 |         is_smaller = c < d
144 |     with pytest.raises(ValueError):
145 |         is_smaller = c < d
146 |     with pytest.raises(ValueError):
147 |         is_equal = c == d
148 |     assert (c < 1) is False
149 |     assert (c > 1) is False
150 | 
151 | 
152 | def test_currency():
153 |     eur = Currency(
154 |         name="Euro",
155 |         code="EUR",
156 |         numeric_code=123,
157 |         symbol="€",
158 |         exponent=2,
159 |         entities=["Germany"],
160 |         withdrawal_date=None,
161 |         subunits=2,
162 |     )
163 |     usd = Currency(
164 |         name="US Dollar",
165 |         code="USD",
166 |         numeric_code=456,
167 |         symbol="$",
168 |         exponent=2,
169 |         entities=["United States of America"],
170 |         withdrawal_date=None,
171 |         subunits=2,
172 |     )
173 |     repr(eur)
174 |     assert (eur == usd) is False
175 |     assert (eur == 2) is False
176 |     assert eur != usd
177 |     with pytest.raises(ValueError):
178 |         Currency(
179 |             name=2,
180 |             code="EUR",
181 |             numeric_code=123,
182 |             symbol="€",
183 |             exponent=2,
184 |             entities=["Germany"],
185 |             withdrawal_date=None,
186 |             subunits=2,
187 |         )
188 |     with pytest.raises(ValueError):
189 |         Currency(
190 |             name="Euro",
191 |             code=2,
192 |             numeric_code=123,
193 |             symbol="€",
194 |             exponent=2,
195 |             entities=["Germany"],
196 |             withdrawal_date=None,
197 |             subunits=2,
198 |         )
199 |     with pytest.raises(ValueError):
200 |         Currency(
201 |             name="Euro",
202 |             code="EUR",
203 |             numeric_code=123,
204 |             symbol="€",
205 |             exponent="2",
206 |             entities=["Germany"],
207 |             withdrawal_date=None,
208 |             subunits=2,
209 |         )
210 | 
211 | 
212 | def test_formatting():
213 |     non_currency = Money("12.2", None)
214 |     assert f"{non_currency}" == "12.20"
215 |     assert f"{non_currency:0.2f,symbol}" == "12.20"
216 |     assert f"{non_currency:0.2f,postsymbol}" == "12.20"
217 |     assert f"{non_currency:0.2f,shortcode}" == "12.20"
218 |     assert f"{non_currency:0.2f,postshortcode}" == "12.20"
219 | 
220 |     a = Money("12.20", "USD")
221 |     assert f"{a}" == "12.20 USD"
222 |     assert f"{a:0.2f,symbol}" == "$12.20"
223 |     assert f"{a:0.2f,postsymbol}" == "12.20$"
224 |     assert f"{a:0.2f,shortcode}" == "USD 12.20"
225 |     assert f"{a:0.2f,postshortcode}" == "12.20 USD"
226 | 
227 | 
228 | def test_gt_other_currency_fail():
229 |     a = Money("12.45", "USD")
230 |     b = Money("67.89", "EUR")
231 |     with pytest.raises(ValueError) as exinfo:
232 |         a > b
233 |     error_msg = (
234 |         "Left has currency=US Dollar, right has currency=Euro. "
235 |         "You need to convert to the same currency first."
236 |     )
237 |     assert str(exinfo.value) == error_msg
238 | 


--------------------------------------------------------------------------------
/mpu/__init__.py:
--------------------------------------------------------------------------------
  1 | """mpu: Martins Python Utilities."""
  2 | 
  3 | 
  4 | # Core Library
  5 | import logging
  6 | import math as math_stl
  7 | import multiprocessing.pool
  8 | import random
  9 | import traceback
 10 | from contextlib import closing
 11 | from types import TracebackType
 12 | from typing import Any, Callable, List, Optional, Tuple, TypeVar, Union
 13 | 
 14 | # First party
 15 | from mpu import io, shell, string, units  # noqa
 16 | from mpu._version import __version__  # noqa
 17 | from mpu.type import Comparable
 18 | 
 19 | T = TypeVar("T")
 20 | 
 21 | 
 22 | def parallel_for(
 23 |     loop_function: Callable[[Any], T],
 24 |     parameters: List[Tuple[Any, ...]],
 25 |     nb_threads: int = 100,
 26 | ) -> List[T]:
 27 |     """
 28 |     Execute the loop body in parallel.
 29 | 
 30 |     .. note:: Race-Conditions
 31 |           Executing code in parallel can cause an error class called
 32 |           "race-condition".
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     loop_function : Callable
 37 |         Python function which takes a tuple as input
 38 |     parameters : List[Tuple]
 39 |         Each element here should be executed in parallel.
 40 |     nb_threads : int (default: 100)
 41 |         The number of threads to use.
 42 | 
 43 |     Returns
 44 |     -------
 45 |     return_values : list of return values
 46 |     """
 47 |     with closing(multiprocessing.pool.ThreadPool(nb_threads)) as pool:
 48 |         return pool.map(loop_function, parameters)
 49 | 
 50 | 
 51 | def clip(
 52 |     number: Union[int, float],
 53 |     lowest: Union[None, int, float] = None,
 54 |     highest: Union[None, int, float] = None,
 55 | ) -> Union[int, float]:
 56 |     """
 57 |     Clip a number to a given lowest / highest value.
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     number : number
 62 |     lowest : number, optional
 63 |     highest : number, optional
 64 | 
 65 |     Returns
 66 |     -------
 67 |     clipped_number : number
 68 | 
 69 |     Examples
 70 |     --------
 71 |     >>> clip(42, lowest=0, highest=10)
 72 |     10
 73 |     """
 74 |     if lowest is not None:
 75 |         number = max(number, lowest)
 76 |     if highest is not None:
 77 |         number = min(number, highest)
 78 |     return number
 79 | 
 80 | 
 81 | def consistent_shuffle(*lists: List[List[Any]]) -> Tuple[List[Any], ...]:
 82 |     """
 83 |     Shuffle lists consistently.
 84 | 
 85 |     Parameters
 86 |     ----------
 87 |     *lists
 88 |         Variable length number of lists
 89 | 
 90 |     Returns
 91 |     -------
 92 |     shuffled_lists : tuple of lists
 93 |         All of the lists are shuffled consistently
 94 | 
 95 |     Examples
 96 |     --------
 97 |     >>> import mpu, random; random.seed(8)
 98 |     >>> mpu.consistent_shuffle([1,2,3], ['a', 'b', 'c'], ['A', 'B', 'C'])
 99 |     ([3, 2, 1], ['c', 'b', 'a'], ['C', 'B', 'A'])
100 |     """
101 |     LEN = len(lists[0])
102 |     if any(len(l) != LEN for l in lists):
103 |         raise ValueError("All lists need to have the same length")
104 |     perm = list(range(LEN))
105 |     random.shuffle(perm)
106 |     lists = tuple([sublist[index] for index in perm] for sublist in lists)
107 |     return lists
108 | 
109 | 
110 | class Location:
111 |     """
112 |     Define a single point.
113 | 
114 |     Parameters
115 |     ----------
116 |     latitude : float
117 |         in [-90, 90] - from North to South
118 |     longitude : float
119 |         in [-180, 180] - from West to East
120 |     """
121 | 
122 |     MIN_LATITUDE = -90
123 |     MAX_LATITUDE = 90
124 |     MIN_LONGITUDE = -180
125 |     MAX_LONGITUDE = 180
126 | 
127 |     def __init__(self, latitude: float, longitude: float):
128 |         self.latitude = latitude
129 |         self.longitude = longitude
130 | 
131 |     @property
132 |     def latitude(self) -> float:
133 |         """Getter for latitude."""
134 |         return self._latitude
135 | 
136 |     @latitude.setter
137 |     def latitude(self, latitude: float) -> None:
138 |         """Setter for latitude."""
139 |         if not (Location.MIN_LATITUDE <= latitude <= Location.MAX_LATITUDE):
140 |             raise ValueError(f"latitude was {latitude}, but has to be in [-90, 90]")
141 |         self._latitude = latitude
142 | 
143 |     @property
144 |     def longitude(self) -> float:
145 |         """Getter for longitude."""
146 |         return self._longitude
147 | 
148 |     @longitude.setter
149 |     def longitude(self, longitude: float) -> None:
150 |         """Setter for longitude."""
151 |         if not (Location.MIN_LONGITUDE <= longitude <= Location.MAX_LONGITUDE):
152 |             raise ValueError(f"longitude was {longitude}, but has to be in [-180, 180]")
153 |         self._longitude = longitude
154 | 
155 |     def get_google_maps_link(self) -> str:
156 |         """Get a Google Maps link to this location."""
157 |         return f"https://www.google.com/maps/place/{self.latitude},{self.longitude}"
158 | 
159 |     def distance(self, there: "Location") -> float:
160 |         """
161 |         Calculate the distance from this location to there.
162 | 
163 |         Parameters
164 |         ----------
165 |         there : Location
166 | 
167 |         Returns
168 |         -------
169 |         distance_in_m : float
170 |         """
171 |         return haversine_distance(
172 |             (self.latitude, self.longitude), (there.latitude, there.longitude)
173 |         )
174 | 
175 |     def __repr__(self) -> str:
176 |         """Get an unambiguous representation."""
177 |         return f"Location({self.latitude}, {self.longitude})"
178 | 
179 |     __str__ = __repr__
180 | 
181 | 
182 | def haversine_distance(
183 |     origin: Tuple[float, float], destination: Tuple[float, float]
184 | ) -> float:
185 |     """
186 |     Calculate the Haversine distance.
187 | 
188 |     Parameters
189 |     ----------
190 |     origin : Tuple[float, float]
191 |         (lat, long)
192 |     destination : Tuple[float, float]
193 |         (lat, long)
194 | 
195 |     Returns
196 |     -------
197 |     distance_in_km : float
198 | 
199 |     Examples
200 |     --------
201 |     >>> munich = (48.1372, 11.5756)
202 |     >>> berlin = (52.5186, 13.4083)
203 |     >>> round(haversine_distance(munich, berlin), 1)
204 |     504.2
205 | 
206 |     >>> new_york_city = (40.712777777778, -74.005833333333)  # NYC
207 |     >>> round(haversine_distance(berlin, new_york_city), 1)
208 |     6385.3
209 |     """
210 |     lat1, lon1 = origin
211 |     lat2, lon2 = destination
212 |     if not (Location.MIN_LATITUDE <= lat1 <= Location.MAX_LATITUDE):
213 |         raise ValueError(f"lat1={lat1:2.2f}, but must be in [-90,+90]")
214 |     if not (Location.MIN_LATITUDE <= lat2 <= Location.MAX_LATITUDE):
215 |         raise ValueError(f"lat2={lat2:2.2f}, but must be in [-90,+90]")
216 |     if not (Location.MIN_LONGITUDE <= lon1 <= Location.MAX_LONGITUDE):
217 |         raise ValueError(f"lon1={lat1:2.2f}, but must be in [-180,+180]")
218 |     if not (Location.MIN_LONGITUDE <= lon2 <= Location.MAX_LONGITUDE):
219 |         raise ValueError(f"lon1={lat1:2.2f}, but must be in [-180,+180]")
220 |     radius = 6371  # km
221 | 
222 |     dlat = math_stl.radians(lat2 - lat1)
223 |     dlon = math_stl.radians(lon2 - lon1)
224 |     a = math_stl.sin(dlat / 2) * math_stl.sin(dlat / 2) + math_stl.cos(
225 |         math_stl.radians(lat1)
226 |     ) * math_stl.cos(math_stl.radians(lat2)) * math_stl.sin(dlon / 2) * math_stl.sin(
227 |         dlon / 2
228 |     )
229 |     c = 2 * math_stl.atan2(math_stl.sqrt(a), math_stl.sqrt(1 - a))
230 |     d = radius * c
231 | 
232 |     return d
233 | 
234 | 
235 | def is_in_interval(
236 |     value: Comparable,
237 |     min_value: Comparable,
238 |     max_value: Comparable,
239 |     name: str = "variable",
240 | ) -> None:
241 |     """
242 |     Raise an exception if value is not in an interval.
243 | 
244 |     Parameters
245 |     ----------
246 |     value : Comparable
247 |     min_value : Comparable
248 |     max_value : Comparable
249 |     name : str
250 |         Name of the variable to print in exception.
251 |     """
252 |     if not (min_value <= value <= max_value):
253 |         raise ValueError(f"{name}={value} is not in [{min_value}, {max_value}]")
254 | 
255 | 
256 | def exception_logging(exctype: Any, value: Any, tb: Optional[TracebackType]) -> None:
257 |     """
258 |     Log exception by using the root logger.
259 | 
260 |     Use it as `sys.excepthook = exception_logging`.
261 | 
262 |     Parameters
263 |     ----------
264 |     exctype : type
265 |     value : NameError
266 |     tb : traceback
267 |     """
268 |     write_val = {
269 |         "exception_type": str(exctype),
270 |         "message": str(traceback.format_tb(tb, 10)),
271 |     }
272 |     logging.exception(str(write_val))
273 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | 	$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  epub3      to make an epub3"
 36 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 37 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 38 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 39 | 	@echo "  text       to make text files"
 40 | 	@echo "  man        to make manual pages"
 41 | 	@echo "  texinfo    to make Texinfo files"
 42 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 43 | 	@echo "  gettext    to make PO message catalogs"
 44 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 45 | 	@echo "  xml        to make Docutils-native XML files"
 46 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 47 | 	@echo "  linkcheck  to check all external links for integrity"
 48 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 49 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 50 | 	@echo "  dummy      to check syntax errors of document sources"
 51 | 
 52 | .PHONY: clean
 53 | clean:
 54 | 	rm -rf $(BUILDDIR)/*
 55 | 
 56 | .PHONY: html
 57 | html:
 58 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 61 | 
 62 | .PHONY: dirhtml
 63 | dirhtml:
 64 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 65 | 	@echo
 66 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 67 | 
 68 | .PHONY: singlehtml
 69 | singlehtml:
 70 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 71 | 	@echo
 72 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 73 | 
 74 | .PHONY: pickle
 75 | pickle:
 76 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 77 | 	@echo
 78 | 	@echo "Build finished; now you can process the pickle files."
 79 | 
 80 | .PHONY: json
 81 | json:
 82 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 83 | 	@echo
 84 | 	@echo "Build finished; now you can process the JSON files."
 85 | 
 86 | .PHONY: htmlhelp
 87 | htmlhelp:
 88 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 89 | 	@echo
 90 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 91 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 92 | 
 93 | .PHONY: qthelp
 94 | qthelp:
 95 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 96 | 	@echo
 97 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 98 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 99 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/mpu.qhcp"
100 | 	@echo "To view the help file:"
101 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/mpu.qhc"
102 | 
103 | .PHONY: applehelp
104 | applehelp:
105 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
106 | 	@echo
107 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
108 | 	@echo "N.B. You won't be able to view it unless you put it in" \
109 | 	      "~/Library/Documentation/Help or install it in your application" \
110 | 	      "bundle."
111 | 
112 | .PHONY: devhelp
113 | devhelp:
114 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
115 | 	@echo
116 | 	@echo "Build finished."
117 | 	@echo "To view the help file:"
118 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/mpu"
119 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/mpu"
120 | 	@echo "# devhelp"
121 | 
122 | .PHONY: epub
123 | epub:
124 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
125 | 	@echo
126 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
127 | 
128 | .PHONY: epub3
129 | epub3:
130 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
131 | 	@echo
132 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
133 | 
134 | .PHONY: latex
135 | latex:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo
138 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
139 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
140 | 	      "(use \`make latexpdf' here to do that automatically)."
141 | 
142 | .PHONY: latexpdf
143 | latexpdf:
144 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
145 | 	@echo "Running LaTeX files through pdflatex..."
146 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
147 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
148 | 
149 | .PHONY: latexpdfja
150 | latexpdfja:
151 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
152 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
153 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
154 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
155 | 
156 | .PHONY: text
157 | text:
158 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
159 | 	@echo
160 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
161 | 
162 | .PHONY: man
163 | man:
164 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
165 | 	@echo
166 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
167 | 
168 | .PHONY: texinfo
169 | texinfo:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo
172 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
173 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
174 | 	      "(use \`make info' here to do that automatically)."
175 | 
176 | .PHONY: info
177 | info:
178 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
179 | 	@echo "Running Texinfo files through makeinfo..."
180 | 	make -C $(BUILDDIR)/texinfo info
181 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
182 | 
183 | .PHONY: gettext
184 | gettext:
185 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
186 | 	@echo
187 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
188 | 
189 | .PHONY: changes
190 | changes:
191 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
192 | 	@echo
193 | 	@echo "The overview file is in $(BUILDDIR)/changes."
194 | 
195 | .PHONY: linkcheck
196 | linkcheck:
197 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
198 | 	@echo
199 | 	@echo "Link check complete; look for any errors in the above output " \
200 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
201 | 
202 | .PHONY: doctest
203 | doctest:
204 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
205 | 	@echo "Testing of doctests in the sources finished, look at the " \
206 | 	      "results in $(BUILDDIR)/doctest/output.txt."
207 | 
208 | .PHONY: coverage
209 | coverage:
210 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
211 | 	@echo "Testing of coverage in the sources finished, look at the " \
212 | 	      "results in $(BUILDDIR)/coverage/python.txt."
213 | 
214 | .PHONY: xml
215 | xml:
216 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
217 | 	@echo
218 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
219 | 
220 | .PHONY: pseudoxml
221 | pseudoxml:
222 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
223 | 	@echo
224 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
225 | 
226 | .PHONY: dummy
227 | dummy:
228 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
229 | 	@echo
230 | 	@echo "Build finished. Dummy builder generates no files."
231 | 
232 | apidoc:
233 | 	sphinx-apidoc -o source/ ../mpu
234 | 


--------------------------------------------------------------------------------
/mpu/datastructures/trie/string_trie.py:
--------------------------------------------------------------------------------
  1 | """Implementation of a trie which has multi-character strings as node elements."""
  2 | 
  3 | # Core Library
  4 | import logging
  5 | from typing import List, Set, Tuple
  6 | 
  7 | # First party
  8 | from mpu.datastructures.trie.base import AbstractTrie
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class TrieNode:
 14 |     def __init__(
 15 |         self,
 16 |         value,
 17 |         is_word: bool = False,
 18 |         count: int = 0,
 19 |         children=None,
 20 |         freeze: bool = False,
 21 |     ):
 22 |         if children is None:
 23 |             children = set()
 24 |         self._value = value
 25 |         self.children: Set[TrieNode] = children
 26 |         self.is_word = is_word
 27 |         self.count = count
 28 |         self.is_frozen = freeze
 29 | 
 30 |     def get_subtrie(
 31 |         self, search_prefix: str, current_trie_node_prefix: str = ""
 32 |     ) -> Tuple[str, "TrieNode"]:
 33 |         """
 34 |         Get the TrieNodes which represents the given prefix.
 35 | 
 36 |         If the search_prefix is not in the trie, return ("", EMPTY_NODE).
 37 |         The found_prefix is a prefix of search_prefix or equal to it.
 38 | 
 39 |         Parameters
 40 |         ----------
 41 |         search_prefix : str
 42 |         current_trie_node_prefix : str, optional (default: "")
 43 | 
 44 |         Returns
 45 |         -------
 46 |         found_prefix, subtrie: Tuple[str, TrieNode]
 47 |         """
 48 |         if search_prefix == self._value[: len(search_prefix)]:
 49 |             # search_prefix is a prefix of the current node (or equal to it)
 50 |             return (current_trie_node_prefix, self)
 51 |         elif self._value == search_prefix[: len(self._value)]:
 52 |             # The current node is a prefix of the search_prefix
 53 |             remainder = search_prefix[len(self._value) :]
 54 |             children = sorted(self.children, key=lambda node: node._value)
 55 |             for child in children:
 56 |                 if child._value == remainder[: len(child._value)]:
 57 |                     new_prefix = current_trie_node_prefix + self._value
 58 |                     return child.get_subtrie(
 59 |                         remainder, current_trie_node_prefix=new_prefix
 60 |                     )
 61 |                 elif remainder == child._value[: len(remainder)]:
 62 |                     # The remainder is a prefix of the child
 63 |                     return (current_trie_node_prefix, child)
 64 |         return ("", EMPTY_NODE)
 65 | 
 66 |     def push(self, value: str):
 67 |         if self.is_frozen:
 68 |             raise RuntimeError("The node is frozen. You may not edit it.")
 69 |         if value == self._value:
 70 |             logger.debug("The inserted value is the value of the current node")
 71 |             self.count += 1
 72 |             self.is_word = True
 73 |             return
 74 |         shared_prefix = get_shared_prefix(self._value, value)
 75 | 
 76 |         if len(value) == len(shared_prefix):
 77 |             logger.debug("The new value is a prefix of the current node")
 78 |             new_child = TrieNode(
 79 |                 self._value[len(shared_prefix) :],
 80 |                 is_word=self.is_word,
 81 |                 count=self.count,
 82 |                 children=self.children,
 83 |             )
 84 |             self._value = shared_prefix
 85 |             self.count = 1
 86 |             self.is_word = True
 87 |             self.children = {new_child}
 88 |         elif len(shared_prefix) == len(self._value):
 89 |             logger.debug(
 90 |                 f"The current node={self._value} is a prefix "
 91 |                 f"of the new value={value}"
 92 |             )
 93 |             # Do I have a child which also is a prefix of this?
 94 |             remainder = value[len(shared_prefix) :]
 95 |             for child_trie in self.children:
 96 |                 if len(get_shared_prefix(child_trie._value, remainder)) > 0:
 97 |                     child_trie.push(remainder)
 98 |                     return
 99 |             trie_node = TrieNode(value[len(shared_prefix) :], is_word=True, count=1)
100 |             self.children.add(trie_node)
101 |         else:
102 |             logger.debug(f"No shared prefix for {self._value} and {value}")
103 |             # Current node will become its child
104 |             old_data = TrieNode(
105 |                 self._value[len(shared_prefix) :],
106 |                 is_word=self.is_word,
107 |                 count=self.count,
108 |                 children=self.children,
109 |             )
110 | 
111 |             # New data
112 |             new_data = TrieNode(value[len(shared_prefix) :], is_word=True, count=1)
113 | 
114 |             # Clean up current node
115 |             self.is_word = False
116 |             self.count = 0
117 |             self._value = shared_prefix
118 |             self.children = {old_data, new_data}
119 | 
120 |     def __iter__(self):
121 |         self._iteration_queue: List[Tuple[TrieNode, str]] = [(self, "")]
122 |         while self._iteration_queue:
123 |             trie_node, prefix = self._iteration_queue.pop()
124 |             for child in trie_node.children:
125 |                 self._iteration_queue.append((child, prefix + trie_node._value))
126 |             if trie_node.is_word:
127 |                 for _ in range(trie_node.count):
128 |                     yield prefix + trie_node._value
129 | 
130 |     def print(self, _indent: int = 0):
131 |         string = ""
132 |         if self.is_word:
133 |             string += " " * _indent + self._value + "\n"
134 |         children = sorted(self.children, key=lambda child: child._value)
135 |         for i, child in enumerate(children):
136 |             if i < len(self.children) - 1:
137 |                 string += child.print(_indent=_indent + 1)
138 |             else:
139 |                 string += child.print(_indent=_indent + 1)
140 |         return string
141 | 
142 |     def __str__(self):
143 |         return f"TrieNode(value='{self._value}', nb_children='{len(self.children)}')"
144 | 
145 |     __repr__ = __str__
146 | 
147 | 
148 | EMPTY_NODE = TrieNode(value="", is_word=False, count=0, freeze=True)
149 | 
150 | 
151 | class Trie(AbstractTrie):
152 |     def __init__(self, container=None):
153 |         if container is None:
154 |             container = []
155 |         self._root = None
156 |         self._length = 0
157 |         for element in container:
158 |             self.push(element)
159 | 
160 |     def __len__(self):
161 |         return self._length
162 | 
163 |     def __contains__(self, element) -> bool:
164 |         found_prefix, subtrie = self.get_subtrie(element)
165 |         return subtrie.is_word and found_prefix + subtrie._value == element
166 | 
167 |     def autocomplete(self, prefix):
168 |         found_prefix, subtrie = self.get_subtrie(prefix)
169 |         for word in subtrie:
170 |             yield found_prefix + word
171 | 
172 |     def get_subtrie(
173 |         self, prefix
174 |     ) -> Tuple[str, TrieNode]:  # TODO: Should this be private?
175 |         if self._root is None:
176 |             return ("", EMPTY_NODE)
177 |         return self._root.get_subtrie(prefix)
178 | 
179 |     def __iter__(self):
180 |         self._iteration_index = -1
181 |         self._child_values = []
182 |         if self._root is not None:
183 |             self._child_values = [element for element in self._root]
184 |         return self
185 | 
186 |     def __next__(self):
187 |         """Return the next value from the Trie."""
188 |         self._iteration_index += 1
189 |         if self._iteration_index < self._length:
190 |             return self._child_values[self._iteration_index]
191 |         raise StopIteration
192 | 
193 |     def push(self, element: str):
194 |         if self._root is None:
195 |             self._root = TrieNode(value=element, is_word=True, count=1)
196 |         else:
197 |             self._root.push(element)
198 |         self._length += 1
199 | 
200 |     def print(self, print_stdout=True) -> str:
201 |         string = "Trie\n"
202 |         if self._root is not None:
203 |             string += self._root.print()
204 |         string = string.strip()
205 |         if print_stdout:
206 |             print(string)
207 |         return string
208 | 
209 |     def __str__(self):
210 |         return f"Trie(len={self._length}, {self._root})"
211 | 
212 |     __repr__ = __str__
213 | 
214 | 
215 | def get_shared_prefix(word1: str, word2: str) -> str:
216 |     """
217 |     Get the substring in the beginning of word1 and word2 which both share.
218 | 
219 |     Parameters
220 |     ----------
221 |     word1 : str
222 |     word2 : str
223 | 
224 |     Returns
225 |     -------
226 |     shared_prefix : str
227 | 
228 |     Examples
229 |     --------
230 |     >>> get_shared_prefix("foo", "bar")
231 |     ''
232 |     >>> get_shared_prefix("foobar", "bar")
233 |     ''
234 |     >>> get_shared_prefix("foobar", "foo")
235 |     'foo'
236 |     >>> get_shared_prefix("steamship", "steampowered")
237 |     'steam'
238 |     """
239 |     shared_prefix = ""
240 |     for char1, char2 in zip(word1, word2):
241 |         if char1 == char2:
242 |             shared_prefix += char1
243 |         else:
244 |             break
245 |     return shared_prefix
246 | 


--------------------------------------------------------------------------------
/mpu/pd.py:
--------------------------------------------------------------------------------
  1 | """Pandas utility functions."""
  2 | 
  3 | # Core Library
  4 | import datetime as dt
  5 | import logging
  6 | from typing import Any, Dict, List, Optional, Tuple
  7 | 
  8 | # Third party
  9 | import pandas as pd
 10 | import pkg_resources
 11 | 
 12 | # First party
 13 | import mpu.shell
 14 | 
 15 | countries_file = pkg_resources.resource_filename("mpu", "data/countries.csv")
 16 | countries = pd.read_csv(countries_file)
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def example_df() -> pd.DataFrame:
 21 |     """Create an example dataframe."""
 22 |     country_names = ["Germany", "France", "Indonesia", "Ireland", "Spain", "Vatican"]
 23 |     population = [82521653, 66991000, 255461700, 4761865, 46549045, None]
 24 |     population_time = [
 25 |         dt.datetime(2016, 12, 1),
 26 |         dt.datetime(2017, 1, 1),
 27 |         dt.datetime(2017, 1, 1),
 28 |         None,  # Ireland
 29 |         dt.datetime(2017, 6, 1),  # Spain
 30 |         None,
 31 |     ]
 32 |     euro = [True, True, False, True, True, True]
 33 |     df = pd.DataFrame(
 34 |         {
 35 |             "country": country_names,
 36 |             "population": population,
 37 |             "population_time": population_time,
 38 |             "EUR": euro,
 39 |         }
 40 |     )
 41 |     df = df[["country", "population", "population_time", "EUR"]]
 42 |     return df
 43 | 
 44 | 
 45 | def describe(df: pd.DataFrame, dtype: Optional[Dict] = None) -> Dict:
 46 |     """
 47 |     Print a description of a Pandas dataframe.
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     df : pd.DataFrame
 52 |     dtype : Optional[Dict]
 53 |         Maps column names to types
 54 |     """
 55 |     if dtype is None:
 56 |         dtype = {}
 57 |     print(f"Number of datapoints: {len(df)}")
 58 |     column_info, column_info_meta = _get_column_info(df, dtype)
 59 | 
 60 |     if len(column_info["int"]) > 0:
 61 |         _describe_int(df, column_info)
 62 | 
 63 |     if len(column_info["float"]) > 0:
 64 |         _describe_float(df, column_info)
 65 | 
 66 |     if len(column_info["category"]) > 0:
 67 |         _describe_category(df, column_info, column_info_meta)
 68 | 
 69 |     if len(column_info["time"]) > 0:
 70 |         _describe_time(df, column_info, column_info_meta)
 71 | 
 72 |     if len(column_info["other"]) > 0:
 73 |         _describe_other(df, column_info, column_info_meta)
 74 | 
 75 |     column_types = {}
 76 |     for column_type, columns in column_info.items():
 77 |         for column_name in columns:
 78 |             if column_type == "other":
 79 |                 column_type = "str"
 80 |             column_types[column_name] = column_type
 81 |     return column_types
 82 | 
 83 | 
 84 | def _get_column_info(df: pd.DataFrame, dtype: Dict[str, str]) -> Tuple[Dict, Dict]:
 85 |     column_info: Dict[str, List[str]] = {
 86 |         "int": [],
 87 |         "float": [],
 88 |         "category": [],
 89 |         "other": [],
 90 |         "time": [],
 91 |     }
 92 |     float_types = ["float64"]
 93 |     integer_types = ["int64", "uint8"]
 94 |     time_types = ["datetime64[ns]"]
 95 |     other_types = ["object", "category"]
 96 |     column_info_meta: Dict[str, Dict[str, Any]] = {}
 97 |     for column_name in df:
 98 |         column_info_meta[column_name] = {}
 99 |         counter_obj = df[column_name].value_counts()
100 |         value_list = [
101 |             key
102 |             for key, value in sorted(
103 |                 counter_obj.items(), key=lambda n: (str(n[1]), str(n[0]))
104 |             )
105 |         ]
106 |         value_count = len(value_list)
107 |         is_suspicious_cat = (
108 |             value_count <= 50
109 |             and str(df[column_name].dtype) != "category"
110 |             and column_name not in dtype
111 |         )
112 |         if is_suspicious_cat:
113 |             logger.warning(
114 |                 f"Column '{column_name}' has only {value_count} different "
115 |                 f"values ({value_list}). "
116 |                 "You might want to make it a 'category'"
117 |             )
118 |         if len(value_list) > 0:
119 |             top_count_val = counter_obj.tolist()[0]
120 |         else:
121 |             top_count_val = None
122 |         column_info_meta[column_name]["top_count_val"] = top_count_val
123 |         column_info_meta[column_name]["value_list"] = value_list
124 |         column_info_meta[column_name]["value_count"] = value_count
125 |         is_int_type = (
126 |             df[column_name].dtype in integer_types
127 |             or column_name in dtype
128 |             and dtype[column_name] in integer_types
129 |         )
130 |         is_float_type = (
131 |             df[column_name].dtype in float_types
132 |             or column_name in dtype
133 |             and dtype[column_name] in float_types
134 |         )
135 |         is_cat_type = (
136 |             str(df[column_name].dtype) in ["category", "bool"]
137 |             or column_name in dtype
138 |             and dtype[column_name] in ["category", "bool"]
139 |         )
140 |         is_time_type = str(df[column_name].dtype) in time_types
141 |         is_other_type = (
142 |             str(df[column_name].dtype) in other_types
143 |             or column_name in dtype
144 |             and dtype[column_name] in other_types
145 |         )
146 |         if is_int_type:
147 |             column_info["int"].append(column_name)
148 |         elif is_float_type:
149 |             column_info["float"].append(column_name)
150 |         elif is_cat_type:
151 |             column_info["category"].append(column_name)
152 |         elif is_other_type:
153 |             column_info["other"].append(column_name)
154 |         elif is_time_type:
155 |             column_info["time"].append(column_name)
156 |         else:
157 |             logger.warning(
158 |                 f"mpu.pd.describe does not know type '{df[column_name].dtype}'"
159 |             )
160 |     return column_info, column_info_meta
161 | 
162 | 
163 | def _describe_int(df: pd.DataFrame, column_info: Dict) -> None:
164 |     print("\n## Integer Columns")
165 |     table = [
166 |         ["Column name", "Non-nan", "mean", "std", "min", "25%", "50%", "75%", "max"]
167 |     ]
168 |     for column_name in column_info["int"]:
169 |         row = []
170 |         row.append(column_name)
171 |         row.append(sum(df[column_name].notnull()))
172 |         row.append(df[column_name].mean())
173 |         row.append(df[column_name].std())
174 |         row.append(df[column_name].min())
175 |         row.append(df[column_name].quantile(0.25))
176 |         row.append(df[column_name].quantile(0.50))
177 |         row.append(df[column_name].quantile(0.75))
178 |         row.append(max(df[column_name]))
179 |         table.append(row)
180 |     mpu.shell.print_table(table)
181 | 
182 | 
183 | def _describe_float(df: pd.DataFrame, column_info: Dict) -> None:
184 |     print("\n## Float Columns")
185 |     table = [
186 |         ["Column name", "Non-nan", "mean", "std", "min", "25%", "50%", "75%", "max"]
187 |     ]
188 |     for column_name in column_info["float"]:
189 |         row = []
190 |         row.append(column_name)
191 |         row.append(sum(df[column_name].notnull()))
192 |         row.append(f"{df[column_name].mean():0.2f}")
193 |         row.append(f"{df[column_name].std():0.2f}")
194 |         row.append(f"{df[column_name].min():0.2f}")
195 |         row.append(f"{df[column_name].quantile(0.25):0.2f}")
196 |         row.append(f"{df[column_name].quantile(0.50):0.2f}")
197 |         row.append(f"{df[column_name].quantile(0.75):0.2f}")
198 |         row.append(f"{max(df[column_name]):0.2f}")
199 |         table.append(row)
200 |     mpu.shell.print_table(table)
201 | 
202 | 
203 | def _describe_category(
204 |     df: pd.DataFrame, column_info: Dict, column_info_meta: Dict
205 | ) -> None:
206 |     print("\n## Category Columns")
207 |     table = [["Column name", "Non-nan", "unique", "top el", "top (count)", "rest"]]
208 |     for column_name in column_info["category"]:
209 |         row = []
210 |         row.append(column_name)
211 |         row.append(sum(df[column_name].notnull()))
212 |         row.append(len(df[column_name].unique()))
213 |         row.append(column_info_meta[column_name]["value_list"][0])
214 |         row.append(column_info_meta[column_name]["top_count_val"])
215 |         rest = str(column_info_meta[column_name]["value_list"][1:])[:40]
216 |         row.append(rest)
217 |         table.append(row)
218 |     mpu.shell.print_table(table)
219 | 
220 | 
221 | def _describe_time(df: pd.DataFrame, column_info: Dict, column_info_meta: Dict) -> None:
222 |     print("\n## Time Columns")
223 |     table = [
224 |         ["Column name", "Non-nan", "unique", "top el", "top (count)", "min", "max"]
225 |     ]
226 |     for column_name in column_info["time"]:
227 |         row = []
228 |         row.append(column_name)
229 |         row.append(sum(df[column_name].notnull()))
230 |         row.append(len(df[column_name].unique()))
231 |         row.append(column_info_meta[column_name]["value_list"][0])
232 |         row.append(column_info_meta[column_name]["top_count_val"])
233 |         row.append(df[column_name].min())
234 |         row.append(df[column_name].max())
235 |         table.append(row)
236 |     mpu.shell.print_table(table)
237 | 
238 | 
239 | def _describe_other(
240 |     df: pd.DataFrame, column_info: Dict, column_info_meta: Dict
241 | ) -> None:
242 |     print("\n## Other Columns")
243 |     table = [["Column name", "Non-nan", "unique", "top", "(count)", "rest"]]
244 |     for column_name in column_info["other"]:
245 |         row = []
246 |         row.append(column_name)
247 |         row.append(sum(df[column_name].notnull()))
248 |         row.append(len(df[column_name].unique()))
249 |         row.append(column_info_meta[column_name]["value_list"][0])
250 |         row.append(column_info_meta[column_name]["top_count_val"])
251 |         rest = str(column_info_meta[column_name]["value_list"][1:])[:40]
252 |         row.append(rest)
253 |         table.append(row)
254 |     mpu.shell.print_table(table)
255 | 


--------------------------------------------------------------------------------
/tests/test_io.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """Test the mpu.io module."""
  4 | 
  5 | # Core Library
  6 | import datetime
  7 | import os
  8 | import sys
  9 | from unittest import mock
 10 | 
 11 | # Third party
 12 | import pkg_resources
 13 | import pytest
 14 | 
 15 | # First party
 16 | import mpu.io
 17 | from mpu.io import (
 18 |     _write_jsonl,
 19 |     download,
 20 |     get_file_meta,
 21 |     gzip_file,
 22 |     read,
 23 |     urlread,
 24 |     write,
 25 | )
 26 | 
 27 | 
 28 | def test_download_with_path(jpg_tempfile):
 29 |     source = (
 30 |         "https://upload.wikimedia.org/wikipedia/commons/e/e9/"
 31 |         "Aurelia-aurita-3-1-style.jpg"
 32 |     )
 33 |     download(source, jpg_tempfile)
 34 |     assert os.path.getsize(jpg_tempfile) == 116087
 35 | 
 36 | 
 37 | def test_get_file_meta():
 38 |     path = "files/example.json"
 39 |     source = pkg_resources.resource_filename(__name__, path)
 40 |     with mock.patch.dict(sys.modules, {"magic": None}):
 41 |         meta = get_file_meta(source)
 42 |     meta["filepath"] = None
 43 |     meta["last_access_datetime"] = None
 44 |     meta["modification_datetime"] = None
 45 | 
 46 |     # Exists on Windows, but not on Linux
 47 |     meta["creation_datetime"] = None
 48 | 
 49 |     expected = {
 50 |         "filepath": None,
 51 |         "creation_datetime": None,
 52 |         "last_access_datetime": None,
 53 |         "modification_datetime": None,
 54 |     }
 55 |     assert meta == expected
 56 | 
 57 | 
 58 | def test_urlread():
 59 |     url = "http://example.com"
 60 |     sample = urlread(url)
 61 |     assert sample.startswith("<!doctype html>")
 62 | 
 63 | 
 64 | def test_download_without_path():
 65 |     source = (
 66 |         "https://upload.wikimedia.org/wikipedia/commons/e/e9/"
 67 |         "Aurelia-aurita-3-1-style.jpg"
 68 |     )
 69 |     sink = download(source)
 70 |     download(source, sink)
 71 |     assert os.path.getsize(sink) == 116087
 72 |     os.remove(sink)  # cleanup of mkstemp
 73 | 
 74 | 
 75 | def test_read_csv():
 76 |     path = "files/example.csv"
 77 |     source = pkg_resources.resource_filename(__name__, path)
 78 |     data_real = read(source)
 79 |     data_exp = [
 80 |         ["a", "b", "c"],  # 0
 81 |         ["1", "A towel,", "1.0"],  # 1
 82 |         ["42", " it says, ", "2.0"],  # 2
 83 |         ["1337", "is about the most ", "-1"],  # 3
 84 |         ["0", "massively useful thing ", "123"],  # 4
 85 |         ["-2", "an interstellar hitchhiker can have.\n", "3"],  # 5
 86 |         ["3.141", "Special char test: €üößł", "2.7"],  # 6
 87 |     ]
 88 |     assert len(data_real) == len(data_exp)
 89 |     assert data_real[0] == data_exp[0]
 90 |     assert data_real[1] == data_exp[1]
 91 |     assert data_real[2] == data_exp[2]
 92 |     assert data_real[3] == data_exp[3]
 93 |     assert data_real[4] == data_exp[4]
 94 |     assert data_real[5] == data_exp[5]
 95 |     assert data_real[6] == data_exp[6]
 96 |     assert data_real == data_exp
 97 |     data_real = read(source, skiprows=1)
 98 |     assert data_real == data_exp[1:]
 99 |     data_real = read(source, skiprows=1, delimiter=",", quotechar='"')
100 |     assert data_real == data_exp[1:]
101 | 
102 | 
103 | def test_read_csv_dicts():
104 |     path = "files/example.csv"
105 |     source = pkg_resources.resource_filename(__name__, path)
106 |     data_real = read(source, format="dicts")
107 |     data_exp = [
108 |         {"a": "1", "b": "A towel,", "c": "1.0"},
109 |         {"a": "42", "b": " it says, ", "c": "2.0"},
110 |         {"a": "1337", "b": "is about the most ", "c": "-1"},
111 |         {"a": "0", "b": "massively useful thing ", "c": "123"},
112 |         {"a": "-2", "b": "an interstellar hitchhiker can have.\n", "c": "3"},
113 |         {"a": "3.141", "b": "Special char test: €üößł", "c": "2.7"},
114 |     ]
115 |     assert len(data_real) == len(data_exp)
116 |     assert data_real[0] == data_exp[0]
117 |     assert data_real == data_exp
118 | 
119 | 
120 | def test_write_csv(csv_tempfile):
121 |     newline = "\n"
122 |     data = [
123 |         ["1", "A towel,", "1.0"],
124 |         ["42", " it says, ", "2.0"],
125 |         ["1337", "is about the most ", "-1"],
126 |         ["0", "massively useful thing ", "123"],
127 |         ["-2", "an interstellar hitchhiker can have.\n", "3"],
128 |     ]
129 |     write(csv_tempfile, data, newline=newline)
130 |     data_read = read(csv_tempfile, newline=newline)
131 |     assert data == data_read
132 | 
133 | 
134 | def test_write_h5(hdf5_tempfile):
135 |     data = [
136 |         ["1", "A towel,", "1.0"],
137 |         ["42", " it says, ", "2.0"],
138 |         ["1337", "is about the most ", "-1"],
139 |         ["0", "massively useful thing ", "123"],
140 |         ["-2", "an interstellar hitchhiker can have.\n", "3"],
141 |     ]
142 |     with pytest.raises(NotImplementedError):
143 |         write(hdf5_tempfile, data)
144 | 
145 | 
146 | def test_write_csv_params(csv_tempfile):
147 |     data = [
148 |         ["1", "A towel,", "1.0"],
149 |         ["42", " it says, ", "2.0"],
150 |         ["1337", "is about the most ", "-1"],
151 |         ["0", "massively useful thing ", "123"],
152 |         ["-2", "an interstellar hitchhiker can have.\n", "3"],
153 |     ]
154 |     newline = "\n"
155 |     write(csv_tempfile, data, delimiter=",", quotechar='"', newline=newline)
156 |     data_read = read(csv_tempfile, delimiter=",", quotechar='"', newline=newline)
157 |     assert data == data_read
158 | 
159 | 
160 | def test_read_hdf5():
161 |     path = "files/example.hdf5"
162 |     source = pkg_resources.resource_filename(__name__, path)
163 |     with pytest.raises(NotImplementedError):
164 |         read(source)
165 | 
166 | 
167 | def test_read_json():
168 |     path = "files/example.json"
169 |     source = pkg_resources.resource_filename(__name__, path)
170 |     data_real = read(source)
171 | 
172 |     data_exp = {
173 |         "a list": [1, 42, 3.141, 1337, "help", "€"],
174 |         "a string": "bla",
175 |         "another dict": {"foo": "bar", "key": "value", "the answer": 42},
176 |     }
177 |     assert data_real == data_exp
178 | 
179 | 
180 | def test_read_jsonl():
181 |     path = "files/example.jsonl"
182 |     source = pkg_resources.resource_filename(__name__, path)
183 |     data_real = read(source)
184 |     data_exp = [
185 |         {"some": "thing"},
186 |         {"foo": 17, "bar": False, "quux": True},
187 |         {"may": {"include": "nested", "objects": ["and", "arrays"]}},
188 |     ]
189 |     assert len(data_real) == len(data_exp)
190 |     for real, exp_ in zip(data_real, data_exp):
191 |         assert real == exp_
192 | 
193 | 
194 | def test_read_pickle():
195 |     path = "files/example.pickle"
196 |     source = pkg_resources.resource_filename(__name__, path)
197 |     data_real = read(source)
198 | 
199 |     data_exp = {
200 |         "a list": [1, 42, 3.141, 1337, "help", "€"],
201 |         "a string": "bla",
202 |         "another dict": {"foo": "bar", "key": "value", "the answer": 42},
203 |     }
204 |     assert data_real == data_exp
205 | 
206 | 
207 | def test_write_json(json_tempfile):
208 |     data = {
209 |         "a list": [1, 42, 3.141, 1337, "help", "€"],
210 |         "a string": "bla",
211 |         "another dict": {"foo": "bar", "key": "value", "the answer": 42},
212 |     }
213 |     write(json_tempfile, data)
214 |     data_read = read(json_tempfile)
215 |     assert data == data_read
216 | 
217 | 
218 | def test_write_jsonl(jsonl_tempfile):
219 |     data = [
220 |         {"some": "thing"},
221 |         {"foo": 17, "bar": False, "quux": True},
222 |         {"may": {"include": "nested", "objects": ["and", "arrays"]}},
223 |     ]
224 |     write(jsonl_tempfile, data)
225 |     data_read = read(jsonl_tempfile)
226 |     assert data == data_read
227 | 
228 | 
229 | def test_write_jsonl_all_params(jsonl_tempfile):
230 |     data = [
231 |         {"some": "thing"},
232 |         {"foo": 17, "bar": False, "quux": True},
233 |         {"may": {"include": "nested", "objects": ["and", "arrays"]}},
234 |     ]
235 |     _write_jsonl(
236 |         jsonl_tempfile,
237 |         data,
238 |         kwargs={"sort_keys": True, "separators": (",", ": "), "ensure_ascii": True},
239 |     )
240 |     data_read = read(jsonl_tempfile)
241 |     assert data == data_read
242 | 
243 | 
244 | def test_write_json_params(json_tempfile):
245 |     data = {
246 |         "a list": [1, 42, 3.141, 1337, "help", "€"],
247 |         "a string": "bla",
248 |         "another dict": {"foo": "bar", "key": "value", "the answer": 42},
249 |     }
250 |     write(
251 |         json_tempfile,
252 |         data,
253 |         indent=4,
254 |         sort_keys=True,
255 |         separators=(",", ":"),
256 |         ensure_ascii=False,
257 |     )
258 |     data_read = read(json_tempfile)
259 |     assert data == data_read
260 | 
261 | 
262 | def test_write_pickle(pickle_tempfile):
263 |     data = {
264 |         "a list": [1, 42, 3.141, 1337, "help", "€"],
265 |         "a string": "bla",
266 |         "another dict": {"foo": "bar", "key": "value", "the answer": 42},
267 |     }
268 |     write(pickle_tempfile, data)
269 |     data_read = read(pickle_tempfile)
270 |     assert data == data_read
271 | 
272 | 
273 | def test_write_pickle_protocol(pickle_tempfile):
274 |     data = {
275 |         "a list": [1, 42, 3.141, 1337, "help", "€"],
276 |         "a string": "bla",
277 |         "another dict": {"foo": "bar", "key": "value", "the answer": 42},
278 |     }
279 |     write(pickle_tempfile, data, protocol=0)
280 |     data_read = read(pickle_tempfile)
281 |     assert data == data_read
282 | 
283 | 
284 | def test_read_h5():
285 |     source = pkg_resources.resource_filename("mpu", "io.py")
286 |     with pytest.raises(NotImplementedError):
287 |         read(source)
288 | 
289 | 
290 | def test_gzip(pickle_tempfile):
291 |     path = "files/example.csv"
292 |     source = pkg_resources.resource_filename(__name__, path)
293 |     gzip_file(source, pickle_tempfile)
294 | 
295 | 
296 | def test_hash():
297 |     path = "files/example.pickle"
298 |     source = pkg_resources.resource_filename(__name__, path)
299 |     assert mpu.io.hash(source) == "e845794fde22e7a33dd389ed0f5381ae042154c1"
300 |     expected_hash_md5 = "c59db499d09531a5937c2ae2342cb18b"
301 |     assert mpu.io.hash(source, method="md5") == expected_hash_md5
302 | 
303 | 
304 | def test_get_creation_datetime():
305 |     ret_val = mpu.io.get_creation_datetime(__file__)
306 |     assert isinstance(ret_val, datetime.datetime) or ret_val is None
307 | 
308 | 
309 | def test_get_creation_datetime_windows():
310 |     with mock.patch("platform.system", mock.MagicMock(return_value="Windows")):
311 |         ret_val = mpu.io.get_creation_datetime(__file__)
312 |     assert isinstance(ret_val, datetime.datetime) or ret_val is None
313 | 
314 | 
315 | def test_get_modification_datetime():
316 |     ret_val = mpu.io.get_modification_datetime(__file__)
317 |     assert isinstance(ret_val, datetime.datetime)
318 | 
319 | 
320 | def test_get_access_datetime():
321 |     ret_val = mpu.io.get_access_datetime(__file__)
322 |     assert isinstance(ret_val, datetime.datetime)
323 | 


--------------------------------------------------------------------------------
/mpu/geometry.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Create and manipulate two-dimensional geometrical entities such as lines.
  3 | 
  4 | For more advanced use cases, see:
  5 | 
  6 | * `sympy.geometry <https://docs.sympy.org/latest/modules/geometry/index.html>`_
  7 | * `Shapely <https://pypi.org/project/Shapely/>`_
  8 | """
  9 | 
 10 | from __future__ import annotations
 11 | 
 12 | # Core Library
 13 | import math
 14 | from typing import Any, cast
 15 | 
 16 | # First party
 17 | from mpu.datastructures import Interval
 18 | 
 19 | EPSILON = 0.000001
 20 | FULL_ROTATION = 360
 21 | 
 22 | 
 23 | class Point:
 24 |     """
 25 |     A point in a 2-dimensional Euclidean space.
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     x : float
 30 |     y : float
 31 |     """
 32 | 
 33 |     def __init__(self, x: float, y: float):
 34 |         self.x = x
 35 |         self.y = y
 36 | 
 37 |     def __str__(self) -> str:
 38 |         return f"({self.x}|{self.y})"
 39 | 
 40 |     __repr__ = __str__
 41 | 
 42 |     def __eq__(self, other: Any) -> bool:
 43 |         if not isinstance(other, Point):
 44 |             return False
 45 |         return self.x == other.x and self.y == other.y
 46 | 
 47 |     def __hash__(self) -> int:
 48 |         return hash((self.x, self.y))
 49 | 
 50 |     def simplify(self) -> Point:
 51 |         return self
 52 | 
 53 | 
 54 | class LineSegment:
 55 |     """
 56 |     A line segment a a 2-dimensional Euclidean space.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     p1 : Point
 61 |     p2 : Point
 62 |     """
 63 | 
 64 |     def __init__(self, p1: Point, p2: Point, name: str = "LineSegment"):
 65 |         self.p1 = p1
 66 |         self.p2 = p2
 67 |         self.name = name
 68 | 
 69 |     def length(self) -> float:
 70 |         """Get the length of this line segment."""
 71 |         return ((self.p1.x - self.p2.x) ** 2 + (self.p1.y - self.p2.y) ** 2) ** 0.5
 72 | 
 73 |     def is_point(self) -> bool:
 74 |         """Check if this LineSegment is a point."""
 75 |         return self.p1 == self.p2
 76 | 
 77 |     def angle(self) -> float:
 78 |         """Get the angle of this line."""
 79 |         dx = self.p2.x - self.p1.x
 80 |         dy = self.p2.y - self.p1.y
 81 |         theta = math.atan2(dy, dx)
 82 |         angle = math.degrees(theta)  # angle is in (-180, 180]
 83 |         if angle < 0:
 84 |             angle = FULL_ROTATION + angle
 85 |         return angle
 86 | 
 87 |     def _get_equation_parameters(self) -> tuple[float, float]:
 88 |         """
 89 |         Get the slope and the intercept of a line.
 90 | 
 91 |         y1 = m*x1 + t
 92 |         y2 = m*x2 + t
 93 |         => y1 = m*x1 + (y2-m*x2)
 94 |         <=> m = (y1 - y2) /(x1-x2)
 95 |            t = y1 - m*x1
 96 |         """
 97 |         if self.p1.x == self.p2.x:
 98 |             raise ValueError("The given points have the same x-coordinate")
 99 | 
100 |         y1 = self.p1.y
101 |         y2 = self.p2.y
102 |         x1 = self.p1.x
103 |         x2 = self.p2.x
104 |         m = (y1 - y2) / (x1 - x2)
105 |         t = y1 - m * x1
106 |         return m, t
107 | 
108 |     def simplify(self) -> Point | LineSegment:
109 |         """Simplify this line segment to a point, if possible."""
110 |         if self.is_point():
111 |             return self.p1
112 |         if self.p1.x > self.p2.x:
113 |             return LineSegment(p1=self.p2, p2=self.p1)
114 |         else:
115 |             return self
116 | 
117 |     def intersect(self, other: LineSegment) -> None | LineSegment | Point:
118 |         """
119 |         Get the intersection between this LineSegment and another LineSegment.
120 | 
121 |         Parameters
122 |         ----------
123 |         other : LineSegment
124 | 
125 |         Returns
126 |         -------
127 |         intersection : None | LineSegment | Point
128 |         """
129 |         if not do_lines_intersect(self, other):
130 |             return None
131 |         if self.is_point():
132 |             p1 = self.simplify()
133 |             return p1  # we know they intersect
134 |         elif other.is_point():
135 |             return other.intersect(self)
136 |         elif self.angle() == other.angle():
137 |             # The overlap is a line segment or a point!
138 |             if self.angle() in [90, 270]:
139 |                 # The line segment is not a function
140 |                 x = self.p1.x
141 |                 return _get_straight_line_intersection(
142 |                     x, other.p1.y, other.p2.y, self.p1.y, self.p2.y
143 |                 )
144 |             else:
145 |                 # The LineSegment is a function
146 |                 x_start = max(min(self.p1.x, self.p2.x), min(other.p1.x, other.p2.x))
147 |                 x_end = min(max(self.p1.x, self.p2.x), max(other.p1.x, other.p2.x))
148 |                 m, t = self._get_equation_parameters()
149 |                 p1 = Point(x_start, m * x_start + t)
150 |                 p2 = Point(x_end, m * x_end + t)
151 |                 return LineSegment(p1, p2)
152 |         else:
153 |             # We know that we have to real line segments, that those intersect
154 |             # and that their angle is different. Hence the return value
155 |             # must be a point
156 |             if self.angle() in [90, 270]:
157 |                 x = self.p1.x
158 | 
159 |                 if other.angle() in [90, 270]:
160 |                     return _get_straight_line_intersection(
161 |                         x, other.p1.y, other.p2.y, self.p1.y, self.p2.y
162 |                     )
163 |                 else:
164 |                     m, t = other._get_equation_parameters()
165 |                     y = m * x + t
166 |                     return Point(x, y)
167 |             elif other.angle() in [90, 270]:
168 |                 x = other.p1.x
169 |                 m, t = self._get_equation_parameters()
170 |                 y = m * x + t
171 |                 return Point(x, y)
172 |             else:
173 |                 # The overlap is a point
174 |                 m1, t1 = self._get_equation_parameters()
175 |                 m2, t2 = other._get_equation_parameters()
176 |                 # m1 * x + t1 = m2 * x + t2
177 |                 # <=> (m1 - m2) * x = t2 - t1
178 |                 # <=> x = (t2 - t1) / (m1 - m2)
179 |                 x = (t2 - t1) / (m1 - m2)
180 |                 y = m1 * x + t1
181 |                 return Point(x, y)
182 | 
183 |     def bounding_box(self) -> tuple[Point, Point]:
184 |         """
185 |         Get the bounding box of this line represented by two points.
186 | 
187 |         The p1 point is in the lower left corner, the p2 one at the
188 |         upper right corner.
189 |         """
190 |         result = (
191 |             Point(min(self.p1.x, self.p2.x), min(self.p1.y, self.p2.y)),
192 |             Point(max(self.p1.x, self.p2.x), max(self.p1.y, self.p2.y)),
193 |         )
194 |         return result
195 | 
196 |     def __str__(self) -> str:
197 |         if self.name == "LineSegment":
198 |             return f"LineSegment [{self.p1} to {self.p2}]"
199 |         else:
200 |             return self.name
201 | 
202 |     __repr__ = __str__
203 | 
204 |     def __hash__(self) -> int:
205 |         return hash((self.p1, self.p2, self.name))
206 | 
207 |     def __eq__(self, other: Any) -> bool:
208 |         if not isinstance(other, LineSegment):
209 |             return False
210 |         return self.name == other.name and (
211 |             (self.p1 == other.p1 and self.p2 == other.p2)
212 |             or (self.p1 == other.p2 and self.p2 == other.p1)
213 |         )
214 | 
215 | 
216 | def _get_straight_line_intersection(
217 |     x: float, other_y1: float, other_y2: float, self_y1: float, self_y2: float
218 | ) -> Point | LineSegment:
219 |     """Get the intersection point of two straight vertical lines."""
220 |     self_y = Interval(left=min(self_y1, self_y2), right=max(self_y1, self_y2))
221 |     other_y = Interval(left=min(other_y1, other_y2), right=max(other_y1, other_y2))
222 | 
223 |     intersection = self_y.intersection(other_y)
224 |     if intersection.left == intersection.right:
225 |         return Point(x, cast(float, intersection.left))
226 |     else:
227 |         return LineSegment(
228 |             Point(x, cast(float, intersection.left)),
229 |             Point(x, cast(float, intersection.right)),
230 |         )
231 | 
232 | 
233 | def do_bounding_boxes_intersect(a: tuple[Point, Point], b: tuple[Point, Point]) -> bool:
234 |     """
235 |     Check if bounding boxes do intersect.
236 | 
237 |     If one bounding box touches the other, they do intersect.
238 |     """
239 |     return (
240 |         a[0].x <= b[1].x and a[1].x >= b[0].x and a[0].y <= b[1].y and a[1].y >= b[0].y
241 |     )
242 | 
243 | 
244 | def crossproduct(a: Point, b: Point) -> float:
245 |     """Get the cross product of two points."""
246 |     return a.x * b.y - b.x * a.y
247 | 
248 | 
249 | def is_point_on_line(a: LineSegment, b: Point) -> bool:
250 |     """Check if point b is on LineSegment a."""
251 |     # Move the image, so that a.p1 is on (0|0)
252 |     p2 = Point(a.p2.x - a.p1.x, a.p2.y - a.p1.y)
253 |     a_tmp = LineSegment(Point(0, 0), p2)
254 |     b_tmp = Point(b.x - a.p1.x, b.y - a.p1.y)
255 |     r = crossproduct(a_tmp.p2, b_tmp)
256 |     return abs(r) < EPSILON
257 | 
258 | 
259 | def is_point_right_of_line(a: LineSegment, b: Point) -> bool:
260 |     """Check if point b is right of line a."""
261 |     # Move the image, so that a.p1 is on (0|0)
262 |     a_tmp = LineSegment(Point(0, 0), Point(a.p2.x - a.p1.x, a.p2.y - a.p1.y))
263 |     b_tmp = Point(b.x - a.p1.x, b.y - a.p1.y)
264 |     return crossproduct(a_tmp.p2, b_tmp) < 0
265 | 
266 | 
267 | def line_segment_touches_or_crosses_line(a: LineSegment, b: LineSegment) -> bool:
268 |     """Check if line segment a touches or crosses line segment b."""
269 |     return (
270 |         is_point_on_line(a, b.p1)
271 |         or is_point_on_line(a, b.p2)
272 |         or (is_point_right_of_line(a, b.p1) ^ is_point_right_of_line(a, b.p2))
273 |     )
274 | 
275 | 
276 | def do_lines_intersect(a: LineSegment, b: LineSegment) -> bool:
277 |     """Check if LineSegments a and b intersect."""
278 |     box1 = a.bounding_box()
279 |     box2 = b.bounding_box()
280 |     return (
281 |         do_bounding_boxes_intersect(box1, box2)
282 |         and line_segment_touches_or_crosses_line(a, b)
283 |         and line_segment_touches_or_crosses_line(b, a)
284 |     )
285 | 
286 | 
287 | def get_all_intersecting_lines_by_brute_force(
288 |     lines: list[LineSegment],
289 | ) -> set[frozenset[LineSegment]]:
290 |     """
291 |     Get all intersecting lines by applying a brute force algorithm.
292 | 
293 |     Parameters
294 |     ----------
295 |     lines : all lines you want to check, in no order
296 | 
297 |     Returns
298 |     -------
299 |     intersections : a list that contains all pairs of intersecting lines
300 |     """
301 |     intersections: set[frozenset[LineSegment]] = set()
302 | 
303 |     for i in range(len(lines)):
304 |         for j in range(i + 1, len(lines)):
305 |             if do_lines_intersect(lines[i], lines[j]):
306 |                 tmp = frozenset({lines[i], lines[j]})
307 |                 intersections.add(tmp)
308 |     return intersections
309 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # mpu documentation build configuration file, created by
  4 | # sphinx-quickstart on Wed May  2 22:11:51 2018.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # Core Library
 16 | import os
 17 | import sys
 18 | from typing import Any, Dict, List
 19 | 
 20 | # If extensions (or modules to document with autodoc) are in another directory,
 21 | # add these directories to sys.path here. If the directory is relative to the
 22 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 23 | sys.path.insert(0, os.path.abspath("."))
 24 | sys.path.insert(0, os.path.abspath("../"))
 25 | sys.path.insert(0, os.path.abspath("../../"))
 26 | 
 27 | # The version info for the project you're documenting, acts as replacement for
 28 | # |version| and |release|, also used in various other places throughout the
 29 | # built documents.
 30 | import mpu  # isort:skip  # noqa
 31 | 
 32 | # -- General configuration ------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     "sphinx.ext.autodoc",
 43 |     "sphinx.ext.autosummary",
 44 |     "sphinx.ext.coverage",
 45 |     "sphinx.ext.mathjax",
 46 |     "sphinx.ext.viewcode",
 47 |     "sphinx.ext.napoleon",
 48 | ]
 49 | 
 50 | # Add any paths that contain templates here, relative to this directory.
 51 | templates_path = ["_templates"]
 52 | 
 53 | # The suffix(es) of source filenames.
 54 | # You can specify multiple suffix as a list of string:
 55 | #
 56 | # source_suffix = ['.rst', '.md']
 57 | source_suffix = ".rst"
 58 | 
 59 | # The encoding of source files.
 60 | # source_encoding = 'utf-8-sig'
 61 | 
 62 | # The master toctree document.
 63 | master_doc = "index"
 64 | 
 65 | # General information about the project.
 66 | project = "mpu"
 67 | copyright = "2018, Martin Thoma"
 68 | author = "Martin Thoma"
 69 | 
 70 | # The short X.Y version.
 71 | version = ".".join(mpu.__version__.split(".", 2)[:2])
 72 | # The full version, including alpha/beta/rc tags.
 73 | release = mpu.__version__
 74 | 
 75 | # The language for content autogenerated by Sphinx. Refer to documentation
 76 | # for a list of supported languages.
 77 | #
 78 | # This is also used if you do content translation via gettext catalogs.
 79 | # Usually you set "language" from the command line for these cases.
 80 | language = None
 81 | 
 82 | # There are two options for replacing |today|: either, you set today to some
 83 | # non-false value, then it is used:
 84 | # today = ''
 85 | # Else, today_fmt is used as the format for a strftime call.
 86 | # today_fmt = '%B %d, %Y'
 87 | 
 88 | # List of patterns, relative to source directory, that match files and
 89 | # directories to ignore when looking for source files.
 90 | # This patterns also effect to html_static_path and html_extra_path
 91 | exclude_patterns: List[Any] = []
 92 | 
 93 | # The reST default role (used for this markup: `text`) to use for all
 94 | # documents.
 95 | # default_role = None
 96 | 
 97 | # If true, '()' will be appended to :func: etc. cross-reference text.
 98 | # add_function_parentheses = True
 99 | 
100 | # If true, the current module name will be prepended to all description
101 | # unit titles (such as .. function::).
102 | # add_module_names = True
103 | 
104 | # If true, sectionauthor and moduleauthor directives will be shown in the
105 | # output. They are ignored by default.
106 | # show_authors = False
107 | 
108 | # The name of the Pygments (syntax highlighting) style to use.
109 | pygments_style = "sphinx"
110 | 
111 | # A list of ignored prefixes for module index sorting.
112 | # modindex_common_prefix = []
113 | 
114 | # If true, keep warnings as "system message" paragraphs in the built documents.
115 | # keep_warnings = False
116 | 
117 | # If true, `todo` and `todoList` produce output, else they produce nothing.
118 | todo_include_todos = False
119 | 
120 | 
121 | # -- Options for HTML output ----------------------------------------------
122 | 
123 | # The theme to use for HTML and HTML Help pages.  See the documentation for
124 | # a list of builtin themes.
125 | html_theme = "sphinx_rtd_theme"
126 | 
127 | # Theme options are theme-specific and customize the look and feel of a theme
128 | # further.  For a list of options available for each theme, see the
129 | # documentation.
130 | html_theme_options = {
131 |     "canonical_url": "",
132 |     "analytics_id": "",
133 |     "logo_only": False,
134 |     "display_version": True,
135 |     "prev_next_buttons_location": "bottom",
136 |     "style_external_links": False,
137 |     # Toc options
138 |     "collapse_navigation": True,
139 |     "sticky_navigation": True,
140 |     "navigation_depth": 4,
141 |     "includehidden": True,
142 |     "titles_only": False,
143 | }
144 | 
145 | # Add any paths that contain custom themes here, relative to this directory.
146 | # html_theme_path = []
147 | 
148 | # The name for this set of Sphinx documents.
149 | # "<project> v<release> documentation" by default.
150 | # html_title = u'mpu v0.1.0'
151 | 
152 | # A shorter title for the navigation bar.  Default is the same as html_title.
153 | # html_short_title = None
154 | 
155 | # The name of an image file (relative to this directory) to place at the top
156 | # of the sidebar.
157 | # html_logo = None
158 | 
159 | # The name of an image file (relative to this directory) to use as a favicon of
160 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
161 | # pixels large.
162 | # html_favicon = None
163 | 
164 | # Add any paths that contain custom static files (such as style sheets) here,
165 | # relative to this directory. They are copied after the builtin static files,
166 | # so a file named "default.css" will overwrite the builtin "default.css".
167 | html_static_path = ["_static"]
168 | 
169 | # Add any extra paths that contain custom files (such as robots.txt or
170 | # .htaccess) here, relative to this directory. These files are copied
171 | # directly to the root of the documentation.
172 | # html_extra_path = []
173 | 
174 | # If not None, a 'Last updated on:' timestamp is inserted at every page
175 | # bottom, using the given strftime format.
176 | # The empty string is equivalent to '%b %d, %Y'.
177 | # html_last_updated_fmt = None
178 | 
179 | # If true, SmartyPants will be used to convert quotes and dashes to
180 | # typographically correct entities.
181 | # html_use_smartypants = True
182 | 
183 | # Custom sidebar templates, maps document names to template names.
184 | # html_sidebars = {}
185 | 
186 | # Additional templates that should be rendered to pages, maps page names to
187 | # template names.
188 | # html_additional_pages = {}
189 | 
190 | # If false, no module index is generated.
191 | html_domain_indices = True
192 | 
193 | # If false, no index is generated.
194 | # html_use_index = True
195 | 
196 | # If true, the index is split into individual pages for each letter.
197 | # html_split_index = False
198 | 
199 | # If true, links to the reST sources are added to the pages.
200 | # html_show_sourcelink = True
201 | 
202 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
203 | # html_show_sphinx = True
204 | 
205 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
206 | # html_show_copyright = True
207 | 
208 | # If true, an OpenSearch description file will be output, and all pages will
209 | # contain a <link> tag referring to it.  The value of this option must be the
210 | # base URL from which the finished HTML is served.
211 | # html_use_opensearch = ''
212 | 
213 | # This is the file name suffix for HTML files (e.g. ".xhtml").
214 | # html_file_suffix = None
215 | 
216 | # Language to be used for generating the HTML full-text search index.
217 | # Sphinx supports the following languages:
218 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
219 | #   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
220 | # html_search_language = 'en'
221 | 
222 | # A dictionary with options for the search language support, empty by default.
223 | # 'ja' uses this config value.
224 | # 'zh' user can custom change `jieba` dictionary path.
225 | # html_search_options = {'type': 'default'}
226 | 
227 | # The name of a javascript file (relative to the configuration directory) that
228 | # implements a search results scorer. If empty, the default will be used.
229 | # html_search_scorer = 'scorer.js'
230 | 
231 | # Output file base name for HTML help builder.
232 | htmlhelp_basename = "mpudoc"
233 | 
234 | # -- Options for LaTeX output ---------------------------------------------
235 | 
236 | latex_elements: Dict[str, Any] = {
237 |     #  The paper size ('letterpaper' or 'a4paper').
238 |     # 'papersize': 'letterpaper',
239 |     #  The font size ('10pt', '11pt' or '12pt').
240 |     # 'pointsize': '10pt',
241 |     #  Additional stuff for the LaTeX preamble.
242 |     # 'preamble': '',
243 |     #  Latex figure (float) alignment
244 |     # 'figure_align': 'htbp',
245 | }
246 | 
247 | # Grouping the document tree into LaTeX files. List of tuples
248 | # (source start file, target name, title,
249 | #  author, documentclass [howto, manual, or own class]).
250 | latex_documents = [
251 |     (master_doc, "mpu.tex", "mpu Documentation", "Martin Thoma", "manual")
252 | ]
253 | 
254 | # The name of an image file (relative to this directory) to place at the top of
255 | # the title page.
256 | # latex_logo = None
257 | 
258 | # For "manual" documents, if this is true, then toplevel headings are parts,
259 | # not chapters.
260 | # latex_use_parts = False
261 | 
262 | # If true, show page references after internal links.
263 | # latex_show_pagerefs = False
264 | 
265 | # If true, show URL addresses after external links.
266 | # latex_show_urls = False
267 | 
268 | # Documents to append as an appendix to all manuals.
269 | # latex_appendices = []
270 | 
271 | # If false, no module index is generated.
272 | # latex_domain_indices = True
273 | 
274 | 
275 | # -- Options for manual page output ---------------------------------------
276 | 
277 | # One entry per manual page. List of tuples
278 | # (source start file, name, description, authors, manual section).
279 | man_pages = [(master_doc, "mpu", "mpu Documentation", [author], 1)]
280 | 
281 | # If true, show URL addresses after external links.
282 | # man_show_urls = False
283 | 
284 | 
285 | # -- Options for Texinfo output -------------------------------------------
286 | 
287 | # Grouping the document tree into Texinfo files. List of tuples
288 | # (source start file, target name, title, author,
289 | #  dir menu entry, description, category)
290 | texinfo_documents = [
291 |     (
292 |         master_doc,
293 |         "mpu",
294 |         "mpu Documentation",
295 |         author,
296 |         "mpu",
297 |         "One line description of project.",
298 |         "Miscellaneous",
299 |     )
300 | ]
301 | 
302 | # Documents to append as an appendix to all manuals.
303 | # texinfo_appendices = []
304 | 
305 | # If false, no module index is generated.
306 | # texinfo_domain_indices = True
307 | 
308 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
309 | # texinfo_show_urls = 'footnote'
310 | 
311 | # If true, do not generate a @detailmenu in the "Top" node's menu.
312 | # texinfo_no_detailmenu = False
313 | 


--------------------------------------------------------------------------------
/mpu/string.py:
--------------------------------------------------------------------------------
  1 | """
  2 | String manipulation, verification and formatting.
  3 | 
  4 | For more complex checks, you might want to use the
  5 | [validators](http://validators.readthedocs.io) package.
  6 | """
  7 | 
  8 | # Core Library
  9 | import socket
 10 | from email.utils import parseaddr
 11 | from typing import List, Optional, Union
 12 | 
 13 | # Third party
 14 | import pkg_resources
 15 | from typing_extensions import Literal  # necessary until 3.8
 16 | 
 17 | # First party
 18 | import mpu.io
 19 | 
 20 | email_regex = r"[^@]*[^@\.]+@[^@]+\.[^@]+"
 21 | 
 22 | 
 23 | def is_email(potential_email_address: str) -> bool:
 24 |     """
 25 |     Check if potential_email_address is a valid e-mail address.
 26 | 
 27 |     Please note that this function has no false-negatives but many
 28 |     false-positives. So if it returns that the input is not a valid
 29 |     e-mail adress, it certainly isn't. If it returns True, it might still be
 30 |     invalid. For example, the domain could not be registered.
 31 | 
 32 |     Parameters
 33 |     ----------
 34 |     potential_email_address : str
 35 | 
 36 |     Returns
 37 |     -------
 38 |     is_email : bool
 39 | 
 40 |     Examples
 41 |     --------
 42 |     >>> is_email('')
 43 |     False
 44 |     >>> is_email('info@martin-thoma.de')
 45 |     True
 46 |     >>> is_email('info@math.martin-thoma.de')
 47 |     True
 48 |     >>> is_email('Martin Thoma <info@martin-thoma.de>')
 49 |     False
 50 |     >>> is_email('info@martin-thoma')
 51 |     False
 52 |     >>> is_email('Martin <>')
 53 |     False
 54 |     """
 55 |     context, mail = parseaddr(potential_email_address)
 56 |     first_condition = len(context) == 0
 57 |     dot_after_at = (
 58 |         "@" in potential_email_address and "." in potential_email_address.split("@")[1]
 59 |     )
 60 |     return first_condition and dot_after_at
 61 | 
 62 | 
 63 | def is_int(potential_int: str) -> bool:
 64 |     """
 65 |     Check if potential_int is a valid integer.
 66 | 
 67 |     Parameters
 68 |     ----------
 69 |     potential_int : str
 70 | 
 71 |     Returns
 72 |     -------
 73 |     is_int : bool
 74 | 
 75 |     Examples
 76 |     --------
 77 |     >>> is_int('123')
 78 |     True
 79 |     >>> is_int('1234567890123456789')
 80 |     True
 81 |     >>> is_int('0')
 82 |     True
 83 |     >>> is_int('-123')
 84 |     True
 85 |     >>> is_int('123.45')
 86 |     False
 87 |     >>> is_int('a')
 88 |     False
 89 |     >>> is_int('0x8')
 90 |     False
 91 |     """
 92 |     try:
 93 |         int(potential_int)
 94 |         return True
 95 |     except ValueError:
 96 |         return False
 97 | 
 98 | 
 99 | def is_float(potential_float: str) -> bool:
100 |     """
101 |     Check if potential_float is a valid float.
102 | 
103 |     Returns
104 |     -------
105 |     is_float : bool
106 | 
107 |     Examples
108 |     --------
109 |     >>> is_float('123')
110 |     True
111 |     >>> is_float('1234567890123456789')
112 |     True
113 |     >>> is_float('0')
114 |     True
115 |     >>> is_float('-123')
116 |     True
117 |     >>> is_float('123.45')
118 |     True
119 |     >>> is_float('a')
120 |     False
121 |     >>> is_float('0x8')
122 |     False
123 |     """
124 |     try:
125 |         float(potential_float)
126 |         return True
127 |     except ValueError:
128 |         return False
129 | 
130 | 
131 | def str2bool(string_: str, default: Union[str, bool] = "raise") -> bool:
132 |     """
133 |     Convert a string to a bool.
134 | 
135 |     Parameters
136 |     ----------
137 |     string_ : str
138 |     default : {'raise', False}
139 |         Default behaviour if none of the "true" strings is detected.
140 | 
141 |     Returns
142 |     -------
143 |     boolean : bool
144 | 
145 |     Examples
146 |     --------
147 |     >>> str2bool('True')
148 |     True
149 |     >>> str2bool('1')
150 |     True
151 |     >>> str2bool('0')
152 |     False
153 |     """
154 |     if default not in ["raise", False]:
155 |         raise ValueError(f"default was '{default}', but should be 'raise' or False")
156 |     true = ["true", "t", "1", "y", "yes", "enabled", "enable", "on"]
157 |     false = ["false", "f", "0", "n", "no", "disabled", "disable", "off"]
158 |     if string_.lower() in true:
159 |         return True
160 |     elif string_.lower() in false or (not default):
161 |         return False
162 |     else:
163 |         raise ValueError(f"The value '{string_}' cannot be mapped to boolean.")
164 | 
165 | 
166 | def str2str_or_none(string_: str) -> Optional[str]:
167 |     """
168 |     Convert a string to a str or to None.
169 | 
170 |     Parameters
171 |     ----------
172 |     string_ : str
173 | 
174 |     Returns
175 |     -------
176 |     str_or_none : bool or None
177 | 
178 |     Examples
179 |     --------
180 |     >>> str2str_or_none('True')
181 |     'True'
182 |     >>> str2str_or_none('1')
183 |     '1'
184 |     >>> str2str_or_none('0')
185 |     '0'
186 |     >>> str2str_or_none('undefined')
187 |     """
188 |     if is_none(string_, default=False):
189 |         return None
190 |     else:
191 |         return string_
192 | 
193 | 
194 | def str2bool_or_none(
195 |     string_: str, default: Literal["raise", False] = "raise"
196 | ) -> Optional[bool]:
197 |     """
198 |     Convert a string to a bool or to None.
199 | 
200 |     Parameters
201 |     ----------
202 |     string_ : str
203 |     default : {'raise', False}
204 |         Default behaviour if none of the "true" or "none" strings is detected.
205 | 
206 |     Returns
207 |     -------
208 |     bool_or_none : bool or None
209 | 
210 |     Examples
211 |     --------
212 |     >>> str2bool_or_none('True')
213 |     True
214 |     >>> str2bool_or_none('1')
215 |     True
216 |     >>> str2bool_or_none('0')
217 |     False
218 |     >>> str2bool_or_none('undefined')
219 |     """
220 |     if default not in ["raise", False]:
221 |         raise ValueError(f"default was '{default}', but should be 'raise' or False")
222 |     if is_none(string_, default=False):
223 |         return None
224 |     else:
225 |         return str2bool(string_, default)
226 | 
227 | 
228 | def str2float_or_none(string_: str) -> Optional[float]:
229 |     """
230 |     Convert a string to a float or to None.
231 | 
232 |     Parameters
233 |     ----------
234 |     string_ : str
235 | 
236 |     Returns
237 |     -------
238 |     float_or_none : float or None
239 | 
240 |     Examples
241 |     --------
242 |     >>> str2float_or_none('1')
243 |     1.0
244 |     >>> str2float_or_none('1.2')
245 |     1.2
246 |     >>> str2float_or_none('undefined')
247 |     """
248 |     if is_none(string_, default=False):
249 |         return None
250 |     else:
251 |         return float(string_)
252 | 
253 | 
254 | def str2int_or_none(string_: str) -> Optional[int]:
255 |     """
256 |     Convert a string to a int or to None.
257 | 
258 |     Parameters
259 |     ----------
260 |     string_ : str
261 | 
262 |     Returns
263 |     -------
264 |     int_or_none : int or None
265 | 
266 |     Examples
267 |     --------
268 |     >>> str2int_or_none('2')
269 |     2
270 |     >>> str2int_or_none('undefined')
271 |     """
272 |     if is_none(string_, default=False):
273 |         return None
274 |     else:
275 |         return int(string_)
276 | 
277 | 
278 | def is_none(string_: str, default: Literal["raise", False] = "raise") -> bool:
279 |     """
280 |     Check if a string is equivalent to None.
281 | 
282 |     Parameters
283 |     ----------
284 |     string_ : str
285 |     default : {'raise', False}
286 |         Default behaviour if none of the "None" strings is detected.
287 | 
288 |     Returns
289 |     -------
290 |     is_none : bool
291 | 
292 |     Examples
293 |     --------
294 |     >>> is_none('2', default=False)
295 |     False
296 |     >>> is_none('undefined', default=False)
297 |     True
298 |     """
299 |     if default not in ["raise", False]:
300 |         raise ValueError(f"default was '{default}', but should be 'raise' or False")
301 |     none = ["none", "undefined", "unknown", "null", ""]
302 |     if string_.lower() in none:
303 |         return True
304 |     elif not default:
305 |         return False
306 |     else:
307 |         raise ValueError(f"The value '{string_}' cannot be mapped to none.")
308 | 
309 | 
310 | def is_iban(potential_iban: str) -> bool:
311 |     """
312 |     Check if a string is a valid IBAN number.
313 | 
314 |     IBAN is described in ISO 13616-1:2007 Part 1.
315 | 
316 |     Spaces are ignored.
317 | 
318 |     # CODE
319 |     0 = always zero
320 |     b = BIC or National Bank code
321 |     c = Account number
322 |     i = holder's kennitala (national identification number)
323 |     k = IBAN check digits
324 |     n = Branch number
325 |     t = Account type
326 |     x = National check digit or character
327 | 
328 |     Examples
329 |     --------
330 |     >>> is_iban('DE89 3704 0044 0532 0130 00')
331 |     True
332 |     >>> is_iban('DE89 3704 0044 0532 0130 01')
333 |     False
334 |     """
335 |     path = "data/iban.csv"  # always use slash in Python packages
336 |     filepath = pkg_resources.resource_filename("mpu", path)
337 |     data = mpu.io.read(filepath, delimiter=";", format="dicts")
338 |     potential_iban = potential_iban.replace(" ", "")  # Remove spaces
339 |     if len(potential_iban) < min(int(el["length"]) for el in data):
340 |         return False
341 |     country = None
342 |     for element in data:
343 |         if element["iban_fields"][:2] == potential_iban[:2]:
344 |             country = element
345 |             break
346 |     if country is None:
347 |         return False
348 |     if len(potential_iban) != int(country["length"]):
349 |         return False
350 |     if country["country_en"] == "Germany":
351 |         checksum_vals = [
352 |             value
353 |             for field_type, value in zip(country["iban_fields"], potential_iban)
354 |             if field_type == "k"
355 |         ]
356 |         checksum_val = "".join(checksum_vals)
357 |         checksum_exp = _calculate_german_iban_checksum(
358 |             potential_iban, country["iban_fields"]
359 |         )
360 |         return checksum_val == checksum_exp
361 |     return True
362 | 
363 | 
364 | def is_ipv4(
365 |     potential_ipv4: str,
366 |     allow_leading_zeros: bool = False,
367 |     allow_shortened_addresses: bool = False,
368 | ) -> bool:
369 |     """
370 |     Check if a string is a valid IPv4 address.
371 | 
372 |     Parameters
373 |     ----------
374 |     potential_ipv4 : str
375 |     allow_leading_zeros : bool (default: False)
376 |     allow_shortened_addresses : bool (default: False)
377 | 
378 |     Returns
379 |     -------
380 |     is_valid : bool
381 | 
382 |     Examples
383 |     --------
384 |     >>> is_ipv4("192.168.0.4")
385 |     True
386 |     >>> is_ipv4("192.168..4")
387 |     False
388 |     >>> is_ipv4("192.168.01.4", allow_leading_zeros=True)
389 |     True
390 |     >>> is_ipv4("192.168.01.4", allow_leading_zeros=False)
391 |     False
392 |     >>> is_ipv4("256.168.01.4")
393 |     False
394 |     >>> is_ipv4("4", allow_shortened_addresses=True)
395 |     True
396 |     >>> is_ipv4("4", allow_shortened_addresses=False)
397 |     False
398 |     """
399 |     if not allow_shortened_addresses and potential_ipv4.count(".") != 3:
400 |         return False
401 |     try:
402 |         socket.inet_aton(potential_ipv4)
403 |     except OSError:
404 |         return False
405 |     if allow_leading_zeros:
406 |         return True
407 |     else:
408 |         return all(
409 |             len(block) == 1 or block[0] != "0" for block in potential_ipv4.split(".")
410 |         )
411 | 
412 | 
413 | def _calculate_german_iban_checksum(
414 |     iban: str, iban_fields: str = "DEkkbbbbbbbbcccccccccc"
415 | ) -> str:
416 |     """
417 |     Calculate the checksum of the German IBAN format.
418 | 
419 |     Examples
420 |     --------
421 |     >>> iban =        'DE41500105170123456789'
422 |     >>> _calculate_german_iban_checksum(iban)
423 |     '41'
424 |     """
425 |     numbers: List[str] = [
426 |         value
427 |         for field_type, value in zip(iban_fields, iban)
428 |         if field_type in ["b", "c"]
429 |     ]
430 |     translate = {
431 |         "0": "0",
432 |         "1": "1",
433 |         "2": "2",
434 |         "3": "3",
435 |         "4": "4",
436 |         "5": "5",
437 |         "6": "6",
438 |         "7": "7",
439 |         "8": "8",
440 |         "9": "9",
441 |     }
442 |     for i in range(ord("A"), ord("Z") + 1):
443 |         translate[chr(i)] = str(i - ord("A") + 10)
444 |     for val in "DE00":
445 |         translated = translate[val]
446 |         for char in translated:
447 |             numbers.append(char)
448 |     number = sum(int(value) * 10**i for i, value in enumerate(numbers[::-1]))
449 |     checksum = 98 - (number % 97)
450 |     return str(checksum)
451 | 
452 | 
453 | def human_readable_bytes(nb_bytes: Union[int, float], suffix: str = "B") -> str:
454 |     """
455 |     Convert a byte number into a human readable format.
456 | 
457 |     Parameters
458 |     ----------
459 |     nb_bytes : Union[int, float]
460 |     suffix : str, optional (default: "B")
461 | 
462 |     Returns
463 |     -------
464 |     size_str : str
465 | 
466 |     Examples
467 |     --------
468 |     >>> human_readable_bytes(123)
469 |     '123.0 B'
470 | 
471 |     >>> human_readable_bytes(1025)
472 |     '1.0 KiB'
473 | 
474 |     >>> human_readable_bytes(9671406556917033397649423)
475 |     '8.0 YiB'
476 |     """
477 |     for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
478 |         if abs(nb_bytes) < 1024.0:
479 |             return f"{nb_bytes:3.1f} {unit}{suffix}"
480 |         nb_bytes /= 1024.0
481 |     return f"{nb_bytes:.1f} Yi{suffix}"
482 | 


--------------------------------------------------------------------------------
/mpu/io.py:
--------------------------------------------------------------------------------
  1 | """Reading and writing common file formats."""
  2 | 
  3 | # Core Library
  4 | import csv
  5 | import hashlib
  6 | import json
  7 | import os
  8 | import pickle
  9 | import platform
 10 | from datetime import datetime
 11 | from typing import Any, Dict, List, Optional, Union
 12 | 
 13 | # Third party
 14 | from typing_extensions import Literal
 15 | 
 16 | # First party
 17 | from mpu.datastructures import EList
 18 | 
 19 | 
 20 | def read(filepath: str, **kwargs: Any) -> Any:
 21 |     """
 22 |     Read a file.
 23 | 
 24 |     Supported formats:
 25 | 
 26 |     * CSV
 27 |     * JSON, JSONL
 28 |     * pickle
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     filepath : str
 33 |         Path to the file that should be read. This methods action depends
 34 |         mainly on the file extension.
 35 |     kwargs : Dict
 36 |         Any keywords for the specific file format. For CSV, this is
 37 |         'delimiter', 'quotechar', 'skiprows', 'format'
 38 | 
 39 |     Returns
 40 |     -------
 41 |     data : Union[str, bytes] or other (e.g. format=dicts)
 42 |     """
 43 |     supported_formats = [".csv", ".json", ".jsonl", ".pickle"]
 44 |     if filepath.lower().endswith(".csv"):
 45 |         return _read_csv(filepath, kwargs)
 46 |     elif filepath.lower().endswith(".json"):
 47 |         with open(filepath, encoding="utf8") as data_file:
 48 |             data: Any = json.load(data_file, **kwargs)
 49 |         return data
 50 |     elif filepath.lower().endswith(".jsonl"):
 51 |         return _read_jsonl(filepath, kwargs)
 52 |     elif filepath.lower().endswith(".pickle"):
 53 |         with open(filepath, "rb") as handle:
 54 |             data_pkl = pickle.load(handle)
 55 |         return data_pkl
 56 |     elif filepath.lower().endswith(".yml") or filepath.lower().endswith(".yaml"):
 57 |         raise NotImplementedError(
 58 |             "YAML is not supported, because you need "
 59 |             "PyYAML in Python3. "
 60 |             "See "
 61 |             "https://stackoverflow.com/a/42054860/562769"
 62 |             " as a guide how to use it."
 63 |         )
 64 |     elif filepath.lower().endswith(".h5") or filepath.lower().endswith(".hdf5"):
 65 |         raise NotImplementedError(
 66 |             "HDF5 is not supported. See "
 67 |             "https://stackoverflow.com/a/41586571/562769"
 68 |             " as a guide how to use it."
 69 |         )
 70 |     else:
 71 |         raise NotImplementedError(
 72 |             f"File '{filepath}' does not end with one "
 73 |             f"of the supported file name extensions. "
 74 |             f"Supported are: {supported_formats}"
 75 |         )
 76 | 
 77 | 
 78 | def _read_csv(filepath: str, kwargs: Dict) -> Union[List, Dict]:
 79 |     """See documentation of mpu.io.read."""
 80 |     if "delimiter" not in kwargs:
 81 |         kwargs["delimiter"] = ","
 82 |     if "quotechar" not in kwargs:
 83 |         kwargs["quotechar"] = '"'
 84 |     if "skiprows" not in kwargs:
 85 |         kwargs["skiprows"] = []
 86 |     if isinstance(kwargs["skiprows"], int):
 87 |         kwargs["skiprows"] = list(range(kwargs["skiprows"]))
 88 |     if "format" in kwargs:
 89 |         format_ = kwargs["format"]
 90 |         kwargs.pop("format", None)
 91 |     else:
 92 |         format_ = "default"
 93 |     skiprows = kwargs["skiprows"]
 94 |     kwargs.pop("skiprows", None)
 95 | 
 96 |     newline = None
 97 |     if "newline" in kwargs:
 98 |         newline = kwargs["newline"]
 99 |         del kwargs["newline"]
100 | 
101 |     with open(filepath, encoding="utf8", newline=newline) as fp:
102 |         if format_ == "default":
103 |             reader = csv.reader(fp, **kwargs)
104 |             data_tmp = EList(list(reader))
105 |             data: Union[List, Dict] = data_tmp.remove_indices(skiprows)
106 |         elif format_ == "dicts":
107 |             reader_list = csv.DictReader(fp, **kwargs)
108 |             data = list(reader_list)
109 |         else:
110 |             raise NotImplementedError(f"Format '{format_}' unknown")
111 |     return data
112 | 
113 | 
114 | def _read_jsonl(filepath: str, kwargs: Dict) -> List:
115 |     """See documentation of mpu.io.read."""
116 |     with open(filepath, encoding="utf8") as data_file:
117 |         data = [json.loads(line, **kwargs) for line in data_file if len(line) > 0]
118 |     return data
119 | 
120 | 
121 | def write(filepath: str, data: Union[Dict, List], **kwargs: Any) -> Any:
122 |     """
123 |     Write a file.
124 | 
125 |     Supported formats:
126 | 
127 |     * CSV
128 |     * JSON, JSONL
129 |     * pickle
130 | 
131 |     Parameters
132 |     ----------
133 |     filepath : str
134 |         Path to the file that should be read. This methods action depends
135 |         mainly on the file extension. Make sure that it ends in .csv, .json,
136 |         .jsonl, or .pickle.
137 |     data : Union[Dict, List]
138 |         Content that should be written
139 |     kwargs : Dict
140 |         Any keywords for the specific file format.
141 | 
142 |     Returns
143 |     -------
144 |     data : str or bytes
145 |     """
146 |     supported_formats = [".csv", ".json", ".jsonl", ".pickle"]
147 |     if filepath.lower().endswith(".csv"):
148 |         return _write_csv(filepath, data, kwargs)
149 |     elif filepath.lower().endswith(".json"):
150 |         return _write_json(filepath, data, kwargs)
151 |     elif filepath.lower().endswith(".jsonl"):
152 |         return _write_jsonl(filepath, data, kwargs)
153 |     elif filepath.lower().endswith(".pickle"):
154 |         return _write_pickle(filepath, data, kwargs)
155 |     elif filepath.lower().endswith(".yml") or filepath.lower().endswith(".yaml"):
156 |         raise NotImplementedError(
157 |             "YAML is not supported, because you need "
158 |             "PyYAML in Python3. "
159 |             "See "
160 |             "https://stackoverflow.com/a/42054860/562769"
161 |             " as a guide how to use it."
162 |         )
163 |     elif filepath.lower().endswith(".h5") or filepath.lower().endswith(".hdf5"):
164 |         raise NotImplementedError(
165 |             "HDF5 is not supported. See "
166 |             "https://stackoverflow.com/a/41586571/562769"
167 |             " as a guide how to use it."
168 |         )
169 |     else:
170 |         raise NotImplementedError(
171 |             f"File '{filepath}' does not end in one of the "
172 |             f"supported formats. Supported are: {supported_formats}"
173 |         )
174 | 
175 | 
176 | def _write_csv(filepath: str, data: Any, kwargs: Dict) -> Any:
177 |     """See documentation of mpu.io.write."""
178 |     newline = None
179 |     if "newline" in kwargs:
180 |         newline = kwargs["newline"]
181 |         del kwargs["newline"]
182 |     with open(filepath, "w", encoding="utf8", newline=newline) as fp:
183 |         if "delimiter" not in kwargs:
184 |             kwargs["delimiter"] = ","
185 |         if "quotechar" not in kwargs:
186 |             kwargs["quotechar"] = '"'
187 |         writer = csv.writer(fp, **kwargs)
188 |         writer.writerows(data)
189 |     return data
190 | 
191 | 
192 | def _write_json(filepath: str, data: Any, kwargs: Dict) -> Any:
193 |     """See documentation of mpu.io.write."""
194 |     with open(filepath, "w", encoding="utf8") as outfile:
195 |         if "indent" not in kwargs:
196 |             kwargs["indent"] = 4
197 |         if "sort_keys" not in kwargs:
198 |             kwargs["sort_keys"] = True
199 |         if "separators" not in kwargs:
200 |             kwargs["separators"] = (",", ": ")
201 |         if "ensure_ascii" not in kwargs:
202 |             kwargs["ensure_ascii"] = False
203 |         str_ = json.dumps(data, **kwargs)
204 |         outfile.write(str_)
205 |     return data
206 | 
207 | 
208 | def _write_jsonl(filepath: str, data: Any, kwargs: Dict) -> Any:
209 |     """See documentation of mpu.io.write."""
210 |     with open(filepath, "w", encoding="utf8") as outfile:
211 |         kwargs["indent"] = None  # JSON has to be on one line!
212 |         if "sort_keys" not in kwargs:
213 |             kwargs["sort_keys"] = True
214 |         if "separators" not in kwargs:
215 |             kwargs["separators"] = (",", ": ")
216 |         if "ensure_ascii" not in kwargs:
217 |             kwargs["ensure_ascii"] = False
218 |         for line in data:
219 |             str_ = json.dumps(line, **kwargs)
220 |             outfile.write(str_)
221 |             outfile.write("\n")
222 |     return data
223 | 
224 | 
225 | def _write_pickle(filepath: str, data: Any, kwargs: Dict) -> Any:
226 |     """See documentation of mpu.io.write."""
227 |     if "protocol" not in kwargs:
228 |         kwargs["protocol"] = pickle.HIGHEST_PROTOCOL
229 |     with open(filepath, "wb") as handle:
230 |         pickle.dump(data, handle, **kwargs)
231 |     return data
232 | 
233 | 
234 | def urlread(url: str, encoding: str = "utf8") -> str:
235 |     """
236 |     Read the content of an URL.
237 | 
238 |     Parameters
239 |     ----------
240 |     url : str
241 |     encoding : str (default: "utf8")
242 | 
243 |     Returns
244 |     -------
245 |     content : str
246 |     """
247 |     # Core Library
248 |     from urllib.request import urlopen
249 | 
250 |     response = urlopen(url)
251 |     content = response.read()
252 |     content = content.decode(encoding)
253 |     return content
254 | 
255 | 
256 | def download(source: str, sink: Optional[str] = None) -> str:
257 |     """
258 |     Download a file.
259 | 
260 |     Parameters
261 |     ----------
262 |     source : str
263 |         Where the file comes from. Some URL.
264 |     sink : str, optional (default: same filename in current directory)
265 |         Where the file gets stored. Some filepath in the local file system.
266 |     """
267 |     # Core Library
268 |     from urllib.request import urlretrieve
269 | 
270 |     if sink is None:
271 |         sink = os.path.abspath(os.path.split(source)[1])
272 |     urlretrieve(source, sink)
273 |     return sink
274 | 
275 | 
276 | def hash(
277 |     filepath: str, method: Literal["sha1", "md5"] = "sha1", buffer_size: int = 65536
278 | ) -> str:
279 |     """
280 |     Calculate a hash of a local file.
281 | 
282 |     Parameters
283 |     ----------
284 |     filepath : str
285 |     method : {'sha1', 'md5'}
286 |     buffer_size : int, optional (default: 65536 byte = 64 KiB)
287 |         in byte
288 | 
289 |     Returns
290 |     -------
291 |     hash : str
292 |     """
293 |     if method == "sha1":
294 |         hash_function = hashlib.sha1()
295 |     elif method == "md5":
296 |         hash_function = hashlib.md5()
297 |     else:
298 |         raise NotImplementedError(
299 |             f"Only md5 and sha1 hashes are known, but '{method}' was specified."
300 |         )
301 | 
302 |     with open(filepath, "rb") as fp:
303 |         while True:
304 |             data = fp.read(buffer_size)
305 |             if not data:
306 |                 break
307 |             hash_function.update(data)
308 |     return hash_function.hexdigest()
309 | 
310 | 
311 | def get_creation_datetime(filepath: str) -> Optional[datetime]:
312 |     """
313 |     Get the date that a file was created.
314 | 
315 |     Parameters
316 |     ----------
317 |     filepath : str
318 | 
319 |     Returns
320 |     -------
321 |     creation_datetime : Optional[datetime]
322 |     """
323 |     if platform.system() == "Windows":
324 |         return datetime.fromtimestamp(os.path.getctime(filepath))
325 |     else:
326 |         stat = os.stat(filepath)
327 |         try:
328 |             return datetime.fromtimestamp(stat.st_birthtime)
329 |         except AttributeError:
330 |             # We're probably on Linux. No easy way to get creation dates here,
331 |             # so we'll settle for when its content was last modified.
332 |             return None
333 | 
334 | 
335 | def get_modification_datetime(filepath: str) -> datetime:
336 |     """
337 |     Get the datetime that a file was last modified.
338 | 
339 |     Parameters
340 |     ----------
341 |     filepath : str
342 | 
343 |     Returns
344 |     -------
345 |     modification_datetime : datetime
346 | 
347 |     """
348 |     # Third party
349 |     import tzlocal
350 | 
351 |     timezone = tzlocal.get_localzone()
352 |     mtime = datetime.fromtimestamp(os.path.getmtime(filepath))
353 |     return mtime.replace(tzinfo=timezone)
354 | 
355 | 
356 | def get_access_datetime(filepath: str) -> datetime:
357 |     """
358 |     Get the last time filepath was accessed.
359 | 
360 |     Parameters
361 |     ----------
362 |     filepath : str
363 | 
364 |     Returns
365 |     -------
366 |     access_datetime : datetime
367 |     """
368 |     # Third party
369 |     import tzlocal
370 | 
371 |     tz = tzlocal.get_localzone()
372 |     mtime = datetime.fromtimestamp(os.path.getatime(filepath))
373 |     return mtime.replace(tzinfo=tz)
374 | 
375 | 
376 | def get_file_meta(filepath: str) -> Dict[str, Any]:
377 |     """
378 |     Get meta-information about a file.
379 | 
380 |     Parameters
381 |     ----------
382 |     filepath : str
383 | 
384 |     Returns
385 |     -------
386 |     meta : dict
387 |     """
388 |     meta: Dict[str, Any] = {
389 |         "filepath": os.path.abspath(filepath),
390 |         "creation_datetime": get_creation_datetime(filepath),
391 |         "last_access_datetime": get_access_datetime(filepath),
392 |         "modification_datetime": get_modification_datetime(filepath),
393 |     }
394 |     try:
395 |         # Third party
396 |         import magic
397 | 
398 |         f_mime = magic.Magic(mime=True, uncompress=True)
399 |         f_other = magic.Magic(mime=False, uncompress=True)
400 |         meta["mime"] = f_mime.from_file(meta["filepath"])
401 |         meta["magic-type"] = f_other.from_file(meta["filepath"])
402 |     except ImportError:
403 |         pass
404 |     return meta
405 | 
406 | 
407 | def gzip_file(source: str, sink: str) -> None:
408 |     """
409 |     Create a GZIP file from a source file.
410 | 
411 |     Parameters
412 |     ----------
413 |     source : str
414 |         Filepath
415 |     sink : str
416 |         Filepath
417 |     """
418 |     # Core Library
419 |     import gzip
420 | 
421 |     with open(source, "rb") as f_in, gzip.open(sink, "wb") as f_out:
422 |         f_out.writelines(f_in)
423 | 


--------------------------------------------------------------------------------