├── requirements ├── prod.txt ├── dev.in ├── ci.in ├── lint.in ├── lint.txt ├── ci.txt └── dev.txt ├── mutmut-test.sh ├── .gitattributes ├── tests ├── files │ ├── example.png │ ├── example.pickle │ ├── example.jsonl │ ├── example.csv │ └── example.json ├── test_ml.py ├── test_path.py ├── test_decorators.py ├── test_shell.py ├── test_image.py ├── test_string_trie.py ├── test_mutmut_killers.py ├── conftest.py ├── test_char_trie.py ├── test_datetime.py ├── test_aws.py ├── test_string.py ├── test_trie.py ├── test_main.py ├── test_math.py ├── test_nodebased_trie.py ├── test_pd.py ├── test_units.py └── test_io.py ├── docs ├── requirements.in ├── source │ ├── mpu.rst │ ├── io.rst │ ├── ml.rst │ ├── pd.rst │ ├── aws.rst │ ├── image.rst │ ├── math.rst │ ├── path.rst │ ├── shell.rst │ ├── type.rst │ ├── string.rst │ ├── datetime.rst │ ├── geometry.rst │ ├── decorators.rst │ ├── datastructures.rst │ ├── index.rst │ ├── units.rst │ └── conf.py ├── requirements.txt └── Makefile ├── mpu ├── data │ ├── laguages.csv.gz │ ├── sources.txt │ └── iban.csv ├── _version.py ├── datastructures │ └── trie │ │ ├── base.py │ │ ├── full_prefix_dict.py │ │ ├── __init__.py │ │ ├── char_trie.py │ │ └── string_trie.py ├── image.py ├── type.py ├── decorators.py ├── path.py ├── ml.py ├── datetime.py ├── shell.py ├── aws.py ├── math.py ├── __init__.py ├── pd.py ├── geometry.py ├── string.py └── io.py ├── requirements.txt ├── .pylintrc ├── create_package.sh ├── .travis.yml ├── tox.ini ├── .isort.cfg ├── .circleci └── config.yml ├── .readthedocs.yml ├── .coveragerc ├── setup.py ├── azure-pipelines.yml ├── Makefile ├── LICENSE ├── .github └── workflows │ └── python-package.yml ├── .pre-commit-config.yaml ├── .gitignore ├── setup.cfg └── README.md /requirements/prod.txt: -------------------------------------------------------------------------------- 1 | -r ../requirements.txt 2 | -------------------------------------------------------------------------------- /mutmut-test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | mypy mpu/ 3 | pytest -x 4 | -------------------------------------------------------------------------------- /requirements/dev.in: -------------------------------------------------------------------------------- 1 | -r ci.txt 2 | pip-tools 3 | pre-commit 4 | wheel 5 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py encoding=utf-8 2 | *.json encoding=utf-8 3 | *.csv encoding=utf-8 4 | -------------------------------------------------------------------------------- /tests/files/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/mpu/HEAD/tests/files/example.png -------------------------------------------------------------------------------- /docs/requirements.in: -------------------------------------------------------------------------------- 1 | pandas 2 | sphinx_rtd_theme>=0.3.1 3 | boto3 4 | Sphinx 5 | typing_extensions 6 | -------------------------------------------------------------------------------- /mpu/data/laguages.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/mpu/HEAD/mpu/data/laguages.csv.gz -------------------------------------------------------------------------------- /tests/files/example.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MartinThoma/mpu/HEAD/tests/files/example.pickle -------------------------------------------------------------------------------- /docs/source/mpu.rst: -------------------------------------------------------------------------------- 1 | mpu 2 | --- 3 | 4 | .. automodule:: mpu 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /mpu/data/sources.txt: -------------------------------------------------------------------------------- 1 | ## languages.csv.gz 2 | 3 | Credits to https://github.com/annexare/Countries/blob/master/data/languages.json 4 | -------------------------------------------------------------------------------- /docs/source/io.rst: -------------------------------------------------------------------------------- 1 | mpu.io 2 | ------ 3 | 4 | .. automodule:: mpu.io 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/ml.rst: -------------------------------------------------------------------------------- 1 | mpu.ml 2 | ------ 3 | 4 | .. automodule:: mpu.ml 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/pd.rst: -------------------------------------------------------------------------------- 1 | mpu.pd 2 | ------ 3 | 4 | .. automodule:: mpu.pd 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /mpu/_version.py: -------------------------------------------------------------------------------- 1 | """Store the version for setup.py and the module itself.""" 2 | __version__ = "0.23.0" # keep in sync with ../setup.py 3 | -------------------------------------------------------------------------------- /docs/source/aws.rst: -------------------------------------------------------------------------------- 1 | mpu.aws 2 | -------- 3 | 4 | .. automodule:: mpu.aws 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /tests/files/example.jsonl: -------------------------------------------------------------------------------- 1 | {"some":"thing"} 2 | {"foo":17,"bar":false,"quux":true} 3 | {"may":{"include":"nested","objects":["and","arrays"]}} 4 | -------------------------------------------------------------------------------- /docs/source/image.rst: -------------------------------------------------------------------------------- 1 | mpu.image 2 | --------- 3 | 4 | .. automodule:: mpu.image 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/math.rst: -------------------------------------------------------------------------------- 1 | mpu.math 2 | -------- 3 | 4 | .. automodule:: mpu.math 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/path.rst: -------------------------------------------------------------------------------- 1 | mpu.path 2 | -------- 3 | 4 | .. automodule:: mpu.path 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/shell.rst: -------------------------------------------------------------------------------- 1 | mpu.shell 2 | --------- 3 | 4 | .. automodule:: mpu.shell 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/type.rst: -------------------------------------------------------------------------------- 1 | mpu.type 2 | -------- 3 | 4 | .. automodule:: mpu.type 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.7 3 | # To update, run: 4 | # 5 | # pip-compile setup.py 6 | # 7 | -------------------------------------------------------------------------------- /docs/source/string.rst: -------------------------------------------------------------------------------- 1 | mpu.string 2 | ---------- 3 | 4 | .. automodule:: mpu.string 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/datetime.rst: -------------------------------------------------------------------------------- 1 | mpu.datetime 2 | ------------ 3 | 4 | .. automodule:: mpu.datetime 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/geometry.rst: -------------------------------------------------------------------------------- 1 | mpu.geometry 2 | ------------ 3 | 4 | .. automodule:: mpu.geometry 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/decorators.rst: -------------------------------------------------------------------------------- 1 | mpu.decorators 2 | -------------- 3 | 4 | .. automodule:: mpu.decorators 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | load-plugins=pylint.extensions.mccabe 3 | 4 | [MESSAGES CONTROL] 5 | disable=R0205,R1705,C0411,C0413,C0103,C1801,C0325,bad-continuation,logging-fstring-interpolation 6 | -------------------------------------------------------------------------------- /docs/source/datastructures.rst: -------------------------------------------------------------------------------- 1 | mpu.datastructures 2 | ------------------ 3 | 4 | .. automodule:: mpu.datastructures 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /requirements/ci.in: -------------------------------------------------------------------------------- 1 | -r prod.txt 2 | boto3>=1.7.84 3 | hypothesis 4 | moto>=1.3.3 5 | pandas 6 | pip-tools 7 | pytest 8 | pytest-cov 9 | pytest-timeout 10 | pytest-benchmark 11 | simplejson 12 | twine 13 | wheel 14 | -------------------------------------------------------------------------------- /tests/files/example.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 1,"A towel,",1.0 3 | 42," it says, ",2.0 4 | 1337,is about the most ,-1 5 | 0,massively useful thing ,123 6 | -2,"an interstellar hitchhiker can have. 7 | ",3 8 | 3.141,Special char test: €üößł,2.7 9 | -------------------------------------------------------------------------------- /create_package.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm -rf lambda.zip; 4 | rm -rf venv-lambda && python3 -m venv venv-lambda && source venv-lambda/bin/activate && pip install . --upgrade && deactivate 5 | cd venv-lambda/lib/python3.8/site-packages/; 6 | zip -ur -D ../../../../lambda.zip mpu; 7 | -------------------------------------------------------------------------------- /tests/files/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "a list": [ 3 | 1, 4 | 42, 5 | 3.141, 6 | 1337, 7 | "help", 8 | "€" 9 | ], 10 | "a string": "bla", 11 | "another dict": { 12 | "foo": "bar", 13 | "key": "value", 14 | "the answer": 42 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.7 4 | - 3.8 5 | - 3.9 6 | before_install: 7 | - sudo rm -f /etc/boto.cfg # https://github.com/travis-ci/travis-ci/issues/7940#issuecomment-310759657 8 | install: 9 | - pip install coveralls tox-travis 10 | script: 11 | - tox 12 | after_success: 13 | - coveralls 14 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = linter,py37,py38,py39 3 | 4 | [testenv] 5 | deps = 6 | -r requirements/ci.txt 7 | commands = 8 | pip install -e .[all] 9 | pytest . 10 | 11 | [testenv:linter] 12 | deps = 13 | -r requirements/lint.txt 14 | commands = 15 | flake8 16 | black --check . 17 | pydocstyle 18 | mypy . 19 | -------------------------------------------------------------------------------- /requirements/lint.in: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | flake8_implicit_str_concat 4 | flake8-assert-msg 5 | flake8-bugbear 6 | flake8-builtins 7 | flake8-comprehensions 8 | flake8-eradicate 9 | flake8-executable 10 | flake8-isort 11 | flake8-pytest-style 12 | flake8-raise 13 | flake8-simplify 14 | flake8-string-format 15 | mccabe 16 | mypy 17 | pydocstyle 18 | -------------------------------------------------------------------------------- /mpu/datastructures/trie/base.py: -------------------------------------------------------------------------------- 1 | # Core Library 2 | from abc import abstractmethod 3 | from collections.abc import Collection 4 | from typing import List 5 | 6 | 7 | class AbstractTrie(Collection): 8 | @abstractmethod 9 | def autocomplete(self, prefix: str) -> List[str]: 10 | """Return a list of all words with the given prefix.""" 11 | -------------------------------------------------------------------------------- /tests/test_ml.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | # Third party 5 | import pytest 6 | 7 | # First party 8 | import mpu.ml 9 | 10 | 11 | def test_negative_class_number(): 12 | with pytest.raises(ValueError): 13 | mpu.ml.indices2one_hot([0, 1, 1], 0) 14 | 15 | 16 | def test_indices2one_hot(): 17 | assert mpu.ml.indices2one_hot([0, 0], 1) == [[1], [1]] 18 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | line_length=79 3 | indent=' ' 4 | multi_line_output=3 5 | length_sort=0 6 | import_heading_stdlib=Core Library 7 | import_heading_firstparty=First party 8 | import_heading_thirdparty=Third party 9 | import_heading_localfolder=Local 10 | known_third_party = boto3,hypothesis,moto,pandas,pkg_resources,pytest,pytz,setuptools,simplejson,typing_extensions 11 | include_trailing_comma=True 12 | -------------------------------------------------------------------------------- /tests/test_path.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Third party 4 | import pkg_resources 5 | 6 | # First party 7 | from mpu.path import get_all_files, get_from_package 8 | 9 | 10 | def test_get_meta(): 11 | path = "files" 12 | root = pkg_resources.resource_filename(__name__, path) 13 | meta = get_all_files(root) 14 | assert len(meta) == 5 15 | 16 | 17 | def test_get_from_package(): 18 | get_from_package("mpu", "data/iban.csv") 19 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | orbs: 4 | python: circleci/python@0.2.1 5 | 6 | jobs: 7 | build-and-test: 8 | executor: python/default 9 | steps: 10 | - checkout 11 | - run: 12 | command: pip install -r requirements/ci.txt 13 | name: Install Test requirements-dev 14 | - run: 15 | command: pip install -e .[all] 16 | name: Install Pacakge 17 | - run: 18 | command: pytest 19 | name: Test 20 | 21 | workflows: 22 | main: 23 | jobs: 24 | - build-and-test 25 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: docs/source/conf.py 11 | 12 | # Don't build any extra formats 13 | formats: 14 | - htmlzip 15 | - pdf 16 | 17 | # Optionally set the version of Python and requirements required to build your docs 18 | python: 19 | version: 3.8 20 | install: 21 | - requirements: docs/requirements.txt 22 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = mpu 3 | branch = True 4 | 5 | [report] 6 | # Regexes for lines to exclude from consideration 7 | exclude_lines = 8 | # Have to re-enable the standard pragma 9 | pragma: no cover 10 | @overload 11 | 12 | # Don't complain about missing debug-only code: 13 | def __repr__ 14 | def __str__ 15 | if self\.debug 16 | 17 | # Don't complain if tests don't hit defensive assertion code: 18 | raise AssertionError 19 | raise NotImplementedError 20 | 21 | # Don't complain if non-runnable code isn't run: 22 | if 0: 23 | if __name__ == .__main__.: 24 | -------------------------------------------------------------------------------- /tests/test_decorators.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Core Library 4 | import warnings 5 | 6 | # First party 7 | from mpu.decorators import deprecated, timing 8 | 9 | 10 | def test_timing(): 11 | @timing 12 | def fib(n): 13 | if n < 1: 14 | return n 15 | else: 16 | return fib(n - 1) + fib(n - 2) 17 | 18 | fib(2) 19 | 20 | 21 | def test_deprecated(): 22 | with warnings.catch_warnings(record=True): 23 | 24 | @deprecated 25 | def fib(n): 26 | if n < 1: 27 | return n 28 | else: 29 | return fib(n - 1) + fib(n - 2) 30 | 31 | fib(2) 32 | -------------------------------------------------------------------------------- /tests/test_shell.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Core Library 4 | import sys 5 | from io import StringIO 6 | 7 | # First party 8 | from mpu.shell import Codes, text_input 9 | 10 | 11 | def test_codes(): 12 | s = Codes.BOLD + Codes.GREEN + "WORKS!" + Codes.RESET_ALL 13 | assert isinstance(s, str) 14 | 15 | 16 | def test_codes_start_with_esc(): 17 | ESC = "\033" # https://askubuntu.com/q/831971/10425 18 | codes_obj = Codes() 19 | codes = [a for a in dir(codes_obj) if not a.startswith("__")] 20 | for code in codes: 21 | assert Codes.__dict__[code].startswith(ESC) 22 | 23 | 24 | def test_text_input(): 25 | sys.stdin = StringIO("foo\nbar") 26 | text_input("foo") 27 | -------------------------------------------------------------------------------- /tests/test_image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Core Library 4 | import sys 5 | from unittest import mock 6 | 7 | # Third party 8 | import pkg_resources 9 | 10 | # First party 11 | from mpu.image import get_meta 12 | 13 | # def test_get_meta(): 14 | # path = "files/example.png" 15 | # source = pkg_resources.resource_filename(__name__, path) 16 | # meta = get_meta(source) 17 | # meta["file"] = None 18 | # assert meta == {"width": 252, "height": 167, "channels": 4, "file": None} 19 | 20 | 21 | # def test_import_error(): 22 | # path = "files/example.png" 23 | # source = pkg_resources.resource_filename(__name__, path) 24 | # with mock.patch.dict(sys.modules, {"PIL": None}): 25 | # meta = get_meta(source) 26 | # meta["file"] = None 27 | # assert meta == {"file": None} 28 | -------------------------------------------------------------------------------- /mpu/image.py: -------------------------------------------------------------------------------- 1 | """Image manipulation.""" 2 | 3 | # Core Library 4 | from typing import Dict 5 | 6 | # First party 7 | import mpu 8 | 9 | 10 | def get_meta(filepath: str) -> Dict: 11 | """ 12 | Get meta-information of an image. 13 | 14 | Parameters 15 | ---------- 16 | filepath : str 17 | 18 | Returns 19 | ------- 20 | meta : Dict 21 | """ 22 | meta = {} 23 | try: 24 | # Third party 25 | from PIL import Image 26 | 27 | with Image.open(filepath) as img: 28 | width, height = img.size 29 | meta["width"] = width 30 | meta["height"] = height 31 | meta["channels"] = len(img.mode) # RGB, RGBA - does this always work? 32 | except ImportError: 33 | pass 34 | 35 | # Get times - creation, last edit, last open 36 | meta["file"] = mpu.io.get_file_meta(filepath) 37 | return meta 38 | -------------------------------------------------------------------------------- /mpu/type.py: -------------------------------------------------------------------------------- 1 | """Helpers for type annotations.""" 2 | 3 | # Core Library 4 | import typing 5 | from typing import Any 6 | 7 | # Third party 8 | from typing_extensions import Protocol 9 | 10 | C = typing.TypeVar("C", bound="Comparable") 11 | 12 | 13 | class Comparable(Protocol): 14 | """Type for a function which is comparable to other instances.""" 15 | 16 | def __eq__(self, other: Any) -> bool: 17 | """Check if the comparable is equal to other.""" 18 | 19 | def __lt__(self: C, other: C) -> bool: 20 | """Check if the comparable is less than other.""" 21 | 22 | def __gt__(self: C, other: C) -> bool: 23 | """Check if the comparable is greater than other.""" 24 | 25 | def __le__(self: C, other: C) -> bool: 26 | """Check if the comparable is less than or equal to other.""" 27 | 28 | def __ge__(self: C, other: C) -> bool: 29 | """Check if the comparable is greater than or equal to other.""" 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """mpu: Martins Python Utilities.""" 2 | 3 | # Third party 4 | from setuptools import setup 5 | 6 | requires_datetime = ["pytz"] 7 | requires_image = ["Pillow"] 8 | requires_io = ["pytz", "tzlocal"] 9 | requires_aws = ["boto3"] 10 | requires_tests = [ 11 | "pytest", 12 | "pytest-cov", 13 | "pytest-mccabe", 14 | "pytest-flake8", 15 | "simplejson", 16 | ] 17 | requires_all = ( 18 | ["pandas", "python-magic", "typing_extensions"] 19 | + requires_datetime 20 | + requires_image 21 | + requires_io 22 | + requires_aws 23 | + requires_tests 24 | ) 25 | 26 | setup( 27 | package_data={"mpu": ["units/currencies.csv", "data/*", "package/templates/*"]}, 28 | extras_require={ 29 | "all": requires_all, 30 | "aws": requires_aws, 31 | "datetime": requires_datetime, 32 | "image": requires_image, 33 | "io": requires_io, 34 | "tests": requires_tests, 35 | }, 36 | tests_require=requires_tests, 37 | ) 38 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | # Python package 2 | # Create and test a Python package on multiple Python versions. 3 | # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: 4 | # https://docs.microsoft.com/azure/devops/pipelines/languages/python 5 | 6 | trigger: 7 | - master 8 | 9 | pool: 10 | vmImage: 'VS2017-Win2016' 11 | strategy: 12 | matrix: 13 | Python37: 14 | python.version: '3.7' 15 | Python38: 16 | python.version: '3.8' 17 | 18 | steps: 19 | - task: UsePythonVersion@0 20 | inputs: 21 | versionSpec: '$(python.version)' 22 | displayName: 'Use Python $(python.version)' 23 | 24 | - script: | 25 | python -m pip install --upgrade pip 26 | pip install -r requirements/ci.txt 27 | displayName: 'Install dependencies' 28 | 29 | - script: | 30 | pip install .[all] 31 | displayName: 'Install package' 32 | 33 | - script: | 34 | pip install pytest pytest-azurepipelines 35 | pytest -vv 36 | displayName: 'pytest' 37 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | maint: 2 | pip install -r requirements/dev.txt 3 | pre-commit autoupdate && pre-commit run --all-files 4 | pip-compile -U docs/requirements.in 5 | pip-compile -U setup.py 6 | pip-compile -U requirements/ci.in 7 | pip-compile -U requirements/dev.in 8 | 9 | upload: 10 | make clean 11 | python setup.py sdist bdist_wheel && twine upload -s dist/* 12 | 13 | clean: 14 | python setup.py clean --all 15 | pyclean . 16 | rm -rf *.pyc build dist tests/reports docs/build .pytest_cache .tox .coverage html/ 17 | rm -rf mpu.egg-info lambda.zip venv-lambda 18 | rm -rf __pycache__ mpu/datastructures/trie/__pycache__ mpu/__pycache__ mpu/units/__pycache__ tests/__pycache__ 19 | 20 | package: 21 | make clean 22 | ./create_package.sh 23 | 24 | mutation-test: 25 | mutmut run 26 | 27 | mutmut-results: 28 | mutmut junitxml --suspicious-policy=ignore --untested-policy=ignore > mutmut-results.xml 29 | junit2html mutmut-results.xml mutmut-results.html 30 | 31 | bandit: 32 | # Python3 only: B322 is save 33 | bandit -r mpu -s B322 34 | -------------------------------------------------------------------------------- /tests/test_string_trie.py: -------------------------------------------------------------------------------- 1 | # First party 2 | from mpu.datastructures.trie.string_trie import Trie 3 | 4 | 5 | def test_trie_print(): 6 | data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 7 | trie = Trie(data) 8 | trie_data = trie.print(print_stdout=True) 9 | trie_data = trie.print(print_stdout=False) 10 | expected = """Trie 11 | cat 12 | tle 13 | d 14 | og 15 | tom 16 | atoe 17 | cat""" 18 | assert trie_data == expected 19 | trie.print(print_stdout=True) 20 | 21 | 22 | def test_trie_creation_prefix_search(): 23 | data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 24 | trie = Trie(data) 25 | expected = {"tom", "tomcat", "tomatoe"} 26 | prefix, subtrie = trie.get_subtrie("tom") 27 | assert {prefix + element for element in subtrie} == expected 28 | 29 | 30 | def test_get_subtrie_direct_hit2(): 31 | trie = Trie(["foobar"]) 32 | assert [word for subtrie in trie.get_subtrie("foobar") for word in subtrie] == [ 33 | "foobar" 34 | ] 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Martin Thoma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mpu/datastructures/trie/full_prefix_dict.py: -------------------------------------------------------------------------------- 1 | # Core Library 2 | from collections import defaultdict 3 | 4 | # First party 5 | from mpu.datastructures.trie.base import AbstractTrie 6 | 7 | 8 | class FullPrefixDict(AbstractTrie): 9 | def __init__(self, container=None): 10 | if container is None: 11 | container = [] 12 | self._prefix2words = defaultdict(list) # Prefix to list of words 13 | self._len = 0 14 | for element in container: 15 | self.push(element) 16 | 17 | def __len__(self): 18 | return self._len 19 | 20 | def __iter__(self): 21 | yield from self._prefix2words[""] 22 | 23 | def push(self, element): 24 | self._len += 1 25 | for i in range(0, len(element) + 1): 26 | prefix = element[:i] 27 | self._prefix2words[prefix].append(element) 28 | self._prefix2words[prefix] = sorted(self._prefix2words[prefix]) 29 | 30 | def autocomplete(self, prefix): 31 | return self._prefix2words[prefix] 32 | 33 | def __contains__(self, element): 34 | return element in self._prefix2words[element] 35 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.7, 3.8, 3.9] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | pip install -r requirements/ci.txt 30 | pip install -r requirements/lint.txt 31 | pip install .[all] 32 | - name: Test with pytest 33 | run: | 34 | pytest 35 | - name: Test with mypy 36 | run: | 37 | mypy . --exclude=build 38 | -------------------------------------------------------------------------------- /tests/test_mutmut_killers.py: -------------------------------------------------------------------------------- 1 | # Third party 2 | import pytest 3 | 4 | # First party 5 | from mpu.datastructures import Interval 6 | from mpu.units import Money 7 | 8 | 9 | def test_eq_exception_msg(): 10 | a = Money("0.1", "EUR") 11 | with pytest.raises(ValueError) as excinfo: 12 | a == 0.5 13 | assert "XX" not in str(excinfo) 14 | 15 | 16 | def test_interval_exception_msg(): 17 | with pytest.raises(RuntimeError) as excinfo: 18 | Interval(None, 3) 19 | assert "XX" not in str(excinfo) 20 | 21 | 22 | def test_interval_left_bigger_right_exception_msg(): 23 | with pytest.raises(RuntimeError) as excinfo: 24 | Interval(5, 3) 25 | assert "XX" not in str(excinfo) 26 | 27 | 28 | def test_interval_invalid_issubset(): 29 | class Impossible: 30 | def __init__(self): 31 | self.left = -1 32 | self.right = float("nan") 33 | 34 | def is_empty(self): 35 | return False 36 | 37 | interval = Interval(0, 1) 38 | other = Impossible() 39 | with pytest.raises(RuntimeError) as excinfo: 40 | interval.issubset(other) 41 | assert "XX" not in str(excinfo) 42 | -------------------------------------------------------------------------------- /mpu/datastructures/trie/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A trie is a prefix-tree. It allows efficient search by prefixes. 3 | 4 | There are three different prefix-trees implemented in mpu: 5 | 6 | * FullPrefixTrie: Every prefix of every word is stored 7 | * CharTrie: Every single character is a node 8 | * StringTrie: Every node stores a substring of the word which is as long as 9 | possible. 10 | 11 | | | FullPrefixTrie | CharTrie | StringTrie | 12 | | ----------------------------- | -------------- | -------- | ---------- | 13 | | Insert word with w characters | O(w) | O(w) | ? | 14 | | Lookup word with w characters | O(1) | O(w) | ? | 15 | 16 | Typically, the FullPrefixTrie is the fastest solution and uses by far most 17 | memory. 18 | 19 | See also 20 | -------- 21 | * [Should a prefix tree (trie) node store only a single character or a 22 | string?](https://cs.stackexchange.com/q/121937/2914) 23 | """ 24 | 25 | # First party 26 | from mpu.datastructures.trie.char_trie import Trie as CharTrie # noqa 27 | from mpu.datastructures.trie.full_prefix_dict import FullPrefixDict # noqa 28 | from mpu.datastructures.trie.string_trie import Trie as StringTrie # noqa 29 | 30 | Trie = FullPrefixDict 31 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Core Library 2 | import os 3 | from tempfile import mkstemp 4 | 5 | # Third party 6 | import pytest 7 | 8 | 9 | def create_tempfile(suffix=None, prefix=None): 10 | """Named""" 11 | handle, pathname = mkstemp(suffix=suffix, prefix=prefix) 12 | os.close(handle) 13 | return pathname 14 | 15 | 16 | @pytest.fixture 17 | def json_tempfile(): 18 | pathname = create_tempfile(suffix=".json") 19 | yield pathname 20 | os.remove(pathname) 21 | 22 | 23 | @pytest.fixture 24 | def jsonl_tempfile(): 25 | pathname = create_tempfile(suffix=".jsonl") 26 | yield pathname 27 | os.remove(pathname) 28 | 29 | 30 | @pytest.fixture 31 | def jpg_tempfile(): 32 | pathname = create_tempfile(suffix=".jpg") 33 | yield pathname 34 | os.remove(pathname) 35 | 36 | 37 | @pytest.fixture 38 | def pickle_tempfile(): 39 | pathname = create_tempfile(suffix=".pickle") 40 | yield pathname 41 | os.remove(pathname) 42 | 43 | 44 | @pytest.fixture 45 | def csv_tempfile(): 46 | pathname = create_tempfile(suffix=".csv") 47 | yield pathname 48 | os.remove(pathname) 49 | 50 | 51 | @pytest.fixture 52 | def hdf5_tempfile(): 53 | pathname = create_tempfile(suffix=".hdf5") 54 | yield pathname 55 | os.remove(pathname) 56 | -------------------------------------------------------------------------------- /mpu/decorators.py: -------------------------------------------------------------------------------- 1 | """Decorators which are not in `functools`.""" 2 | 3 | # Core Library 4 | import functools 5 | import warnings 6 | from time import time 7 | from typing import Callable, Dict, List 8 | 9 | 10 | def timing(func: Callable) -> Callable: 11 | """Measure the execution time of a function call and print the result.""" 12 | 13 | @functools.wraps(func) 14 | def wrap(*args: List, **kw: Dict) -> Callable: 15 | t0 = time() 16 | result = func(*args, **kw) 17 | t1 = time() 18 | print( 19 | f"func:{func.__name__!r} args:[{args!r}, {kw!r}] took: " 20 | f"{t1 - t0:2.4f} sec" 21 | ) 22 | return result 23 | 24 | return wrap 25 | 26 | 27 | def deprecated(func: Callable) -> Callable: 28 | """ 29 | Mark functions as deprecated. 30 | 31 | It will result in a warning being emitted when the function is used. 32 | """ 33 | 34 | @functools.wraps(func) 35 | def new_func(*args: List, **kwargs: Dict) -> Callable: 36 | warnings.warn_explicit( 37 | f"Call to deprecated function {func.__name__}.", 38 | category=DeprecationWarning, 39 | filename=func.__code__.co_filename, 40 | lineno=func.__code__.co_firstlineno + 1, 41 | ) 42 | return func(*args, **kwargs) 43 | 44 | return new_func 45 | -------------------------------------------------------------------------------- /mpu/path.py: -------------------------------------------------------------------------------- 1 | """Functions for path manipulation and retrieval of files.""" 2 | 3 | # Core Library 4 | import os 5 | from typing import List 6 | 7 | # Third party 8 | import pkg_resources 9 | 10 | 11 | def get_all_files(root: str, followlinks: bool = False) -> List: 12 | """ 13 | Get all files within the given root directory. 14 | 15 | Note that this list is not ordered. 16 | 17 | Parameters 18 | ---------- 19 | root : str 20 | Path to a directory 21 | followlinks : bool, optional (default: False) 22 | 23 | Returns 24 | ------- 25 | filepaths : List 26 | List of absolute paths to files 27 | """ 28 | filepaths = [] 29 | for path, _, files in os.walk(root, followlinks=followlinks): 30 | for name in files: 31 | filepaths.append(os.path.abspath(os.path.join(path, name))) 32 | return filepaths 33 | 34 | 35 | def get_from_package(package_name: str, path: str) -> str: 36 | """ 37 | Get the absolute path to a file in a package. 38 | 39 | Parameters 40 | ---------- 41 | package_name : str 42 | e.g. 'mpu' 43 | path : str 44 | Path within a package 45 | 46 | Returns 47 | ------- 48 | filepath : str 49 | """ 50 | filepath = pkg_resources.resource_filename(package_name, path) 51 | return os.path.abspath(filepath) 52 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. mpu documentation master file, created by 2 | sphinx-quickstart on Wed May 2 22:11:51 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to mpu's documentation! 7 | =============================== 8 | 9 | 10 | This package contains various small functions and classes. All of the 11 | functionality is not offered by any mayor package. 12 | 13 | Core design principles are: 14 | 15 | * **Lightweight**: mpu does not bring unexpected dependencies. You have 16 | fine-grained control via extras. 17 | * **Documentation**: Every parameter is properly documented. For each opened 18 | issue or question I will think about adding the information to the docs 19 | * **Testing**: >90% test coverage. For each issue found I will think about 20 | creating a test which could have shown the issue. 21 | 22 | Please note that this is not in version 1.0 yet. So there will likely be 23 | breaking changes. 24 | 25 | 26 | Contents: 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | 31 | mpu 32 | aws 33 | datastructures 34 | datetime 35 | decorators 36 | geometry 37 | image 38 | io 39 | math 40 | ml 41 | path 42 | pd 43 | shell 44 | string 45 | type 46 | units 47 | 48 | 49 | 50 | Indices and tables 51 | ------------------ 52 | 53 | * :ref:`modindex` 54 | * :ref:`search` 55 | -------------------------------------------------------------------------------- /tests/test_char_trie.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Test the mpu.datastructures.char_trie module.""" 4 | 5 | # Third party 6 | import pytest 7 | 8 | # First party 9 | from mpu.datastructures.trie.char_trie import EMPTY_NODE, Trie, TrieNode 10 | 11 | 12 | def test_trie_print(): 13 | data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 14 | trie = Trie(data) 15 | trie_data = trie.print(print_stdout=False) 16 | expected = """Trie 17 | 18 | c 19 | a 20 | t 21 | t 22 | l 23 | e 24 | d 25 | o 26 | g 27 | t 28 | o 29 | m 30 | a 31 | t 32 | o 33 | e 34 | c 35 | a 36 | t""" 37 | assert trie_data == expected 38 | trie.print(print_stdout=True) 39 | 40 | 41 | def test_create_trie_node_with_children(): 42 | TrieNode("b", children={"a": TrieNode("a")}) 43 | 44 | 45 | def test_trie_node_push(): 46 | node = TrieNode(value="a") 47 | with pytest.raises(ValueError) as exinfo: 48 | node.push("") 49 | assert str(exinfo.value) == "The pushed value should not be empty" 50 | 51 | 52 | def test_get_subtrie_from_empty(): 53 | node = Trie() 54 | prefix, node = node.get_subtrie("") 55 | assert prefix == "" 56 | assert node._value == EMPTY_NODE._value 57 | assert node.is_word == EMPTY_NODE.is_word 58 | assert node.count == EMPTY_NODE.count 59 | assert node.children == EMPTY_NODE.children 60 | -------------------------------------------------------------------------------- /docs/source/units.rst: -------------------------------------------------------------------------------- 1 | mpu.units 2 | ========= 3 | 4 | Module contents 5 | --------------- 6 | 7 | .. automodule:: mpu.units 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | Allowed operations with Money 13 | ----------------------------- 14 | Here you can see which operations are allowed by two Money objects of 15 | currencies (A and B): 16 | 17 | +---------+----------------------+----------+---------+---------------+ 18 | | Money A | Operator | Money A | Money B | int, Fraction | 19 | +=========+======================+==========+=========+===============+ 20 | | | `+` , `-` | Money A | N/A | N/A | 21 | +---------+----------------------+----------+---------+---------------+ 22 | | | `*` | N/A | N/A | Money A | 23 | +---------+----------------------+----------+---------+---------------+ 24 | | | `/` | N/A | N/A | N/A | 25 | +---------+----------------------+----------+---------+---------------+ 26 | | | `//` | Fraction | N/A | Money A | 27 | +---------+----------------------+----------+---------+---------------+ 28 | | | `>`, `>=`, `<`, `<=` | Bool | N/A | N/A | 29 | +---------+----------------------+----------+---------+---------------+ 30 | | | == | Bool | False | False | 31 | +---------+----------------------+----------+---------+---------------+ 32 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # pre-commit run --all-files 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v4.1.0 5 | hooks: 6 | - id: check-ast 7 | - id: check-byte-order-marker 8 | - id: check-case-conflict 9 | - id: check-docstring-first 10 | - id: check-executables-have-shebangs 11 | - id: check-json 12 | - id: check-yaml 13 | - id: debug-statements 14 | - id: detect-private-key 15 | - id: end-of-file-fixer 16 | - id: trailing-whitespace 17 | - id: mixed-line-ending 18 | - repo: https://github.com/MartinThoma/check-pip-compile 19 | rev: 0.1.0 20 | hooks: 21 | - id: check-pip-compile 22 | args: ['requirements/ci.in', 'requirements/lint.in'] 23 | - repo: https://github.com/pre-commit/mirrors-mypy 24 | rev: v0.931 25 | hooks: 26 | - id: mypy 27 | args: [--ignore-missing-imports] 28 | additional_dependencies: [lxml, types-simplejson, types-pytz, types-tzlocal, types-setuptools] 29 | - repo: https://github.com/asottile/seed-isort-config 30 | rev: v2.2.0 31 | hooks: 32 | - id: seed-isort-config 33 | - repo: https://github.com/pre-commit/mirrors-isort 34 | rev: v5.10.1 35 | hooks: 36 | - id: isort 37 | - repo: https://github.com/psf/black 38 | rev: 22.1.0 39 | hooks: 40 | - id: black 41 | - repo: https://github.com/asottile/pyupgrade 42 | rev: v2.31.0 43 | hooks: 44 | - id: pyupgrade 45 | args: [--py37-plus] 46 | - repo: https://github.com/asottile/blacken-docs 47 | rev: v1.12.1 48 | hooks: 49 | - id: blacken-docs 50 | additional_dependencies: [black==20.8b1] 51 | -------------------------------------------------------------------------------- /mpu/ml.py: -------------------------------------------------------------------------------- 1 | """Machine Learning functions.""" 2 | 3 | # Core Library 4 | from typing import Iterable, List 5 | 6 | # First party 7 | from mpu.math import argmax 8 | 9 | 10 | def indices2one_hot(indices: Iterable, nb_classes: int) -> List: 11 | """ 12 | Convert an iterable of indices to one-hot encoded list. 13 | 14 | You might also be interested in sklearn.preprocessing.OneHotEncoder 15 | 16 | Parameters 17 | ---------- 18 | indices : Iterable 19 | iterable of indices 20 | nb_classes : int 21 | Number of classes 22 | 23 | Returns 24 | ------- 25 | one_hot : List 26 | 27 | Examples 28 | -------- 29 | >>> indices2one_hot([0, 1, 1], 3) 30 | [[1, 0, 0], [0, 1, 0], [0, 1, 0]] 31 | >>> indices2one_hot([0, 1, 1], 2) 32 | [[1, 0], [0, 1], [0, 1]] 33 | """ 34 | if nb_classes < 1: 35 | raise ValueError(f"nb_classes={nb_classes}, but positive number expected") 36 | 37 | one_hot = [] 38 | for index in indices: 39 | one_hot.append([0] * nb_classes) 40 | one_hot[-1][index] = 1 41 | return one_hot 42 | 43 | 44 | def one_hot2indices(one_hots: List) -> List: 45 | """ 46 | Convert an iterable of one-hot encoded targets to a list of indices. 47 | 48 | Parameters 49 | ---------- 50 | one_hots : List 51 | 52 | Returns 53 | ------- 54 | indices : List 55 | 56 | Examples 57 | -------- 58 | >>> one_hot2indices([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 59 | [0, 1, 2] 60 | 61 | >>> one_hot2indices([[1, 0], [1, 0], [0, 1]]) 62 | [0, 0, 1] 63 | """ 64 | indices = [] 65 | for one_hot in one_hots: 66 | indices.append(argmax(one_hot)) 67 | return indices 68 | -------------------------------------------------------------------------------- /tests/test_datetime.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | # Core Library 5 | from datetime import datetime 6 | 7 | # Third party 8 | import pytest 9 | import pytz 10 | 11 | # First party 12 | import mpu.datetime 13 | 14 | 15 | def test_add_hour(): 16 | tz = pytz.timezone("Europe/Berlin") 17 | out = mpu.datetime.add_time( 18 | datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz), hours=1 19 | ).isoformat() 20 | assert out == "1918-04-15T03:00:00+02:00" 21 | 22 | 23 | def test_add_day(): 24 | tz = pytz.timezone("Europe/Berlin") 25 | out = mpu.datetime.add_time( 26 | datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz), 27 | days=1, 28 | ).isoformat() 29 | assert out == "1918-04-16T02:00:00+02:00" 30 | 31 | 32 | def test_add_time_neutral(): 33 | """Call add_time without any specified time to add.""" 34 | tz = pytz.timezone("Europe/Berlin") 35 | out = mpu.datetime.add_time( 36 | datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz) 37 | ).isoformat() 38 | assert out == "1918-04-15T01:00:00+01:00" 39 | 40 | 41 | def test_add_time_all(): 42 | """Call add_time without any specified time to add.""" 43 | tz = pytz.timezone("Europe/Berlin") 44 | out = mpu.datetime.add_time( 45 | datetime(1918, 4, 15, 0, 0, tzinfo=pytz.utc).astimezone(tz), 46 | seconds=1, 47 | minutes=2, 48 | hours=3, 49 | ).isoformat() 50 | assert out == "1918-04-15T05:02:01+02:00" 51 | 52 | 53 | def test_generate_fail(): 54 | with pytest.raises(ValueError): 55 | mpu.datetime.generate(datetime(2018, 1, 1), datetime(2018, 1, 1)) 56 | 57 | 58 | def test_generate(): 59 | start = datetime(2018, 1, 1) 60 | end = datetime(2018, 2, 1) 61 | generated = mpu.datetime.generate(start, end) 62 | assert start <= generated <= end 63 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.7 3 | # To update, run: 4 | # 5 | # pip-compile docs/requirements.in 6 | # 7 | alabaster==0.7.12 8 | # via sphinx 9 | babel==2.9.1 10 | # via sphinx 11 | boto3==1.20.47 12 | # via -r requirements.in 13 | botocore==1.23.47 14 | # via 15 | # boto3 16 | # s3transfer 17 | certifi==2021.10.8 18 | # via requests 19 | charset-normalizer==2.0.11 20 | # via requests 21 | docutils==0.17.1 22 | # via 23 | # sphinx 24 | # sphinx-rtd-theme 25 | idna==3.3 26 | # via requests 27 | imagesize==1.3.0 28 | # via sphinx 29 | jinja2==3.0.3 30 | # via sphinx 31 | jmespath==0.10.0 32 | # via 33 | # boto3 34 | # botocore 35 | markupsafe==2.0.1 36 | # via jinja2 37 | numpy==1.22.0 38 | # via pandas 39 | packaging==21.3 40 | # via sphinx 41 | pandas==1.3.5 42 | # via -r requirements.in 43 | pygments==2.11.2 44 | # via sphinx 45 | pyparsing==3.0.7 46 | # via packaging 47 | python-dateutil==2.8.2 48 | # via 49 | # botocore 50 | # pandas 51 | pytz==2021.3 52 | # via 53 | # babel 54 | # pandas 55 | requests==2.27.1 56 | # via sphinx 57 | s3transfer==0.5.1 58 | # via boto3 59 | six==1.16.0 60 | # via python-dateutil 61 | snowballstemmer==2.2.0 62 | # via sphinx 63 | sphinx==4.4.0 64 | # via 65 | # -r requirements.in 66 | # sphinx-rtd-theme 67 | sphinx-rtd-theme==1.0.0 68 | # via -r requirements.in 69 | sphinxcontrib-applehelp==1.0.2 70 | # via sphinx 71 | sphinxcontrib-devhelp==1.0.2 72 | # via sphinx 73 | sphinxcontrib-htmlhelp==2.0.0 74 | # via sphinx 75 | sphinxcontrib-jsmath==1.0.1 76 | # via sphinx 77 | sphinxcontrib-qthelp==1.0.3 78 | # via sphinx 79 | sphinxcontrib-serializinghtml==1.1.5 80 | # via sphinx 81 | typing-extensions==4.0.1 82 | # via -r requirements.in 83 | urllib3==1.26.8 84 | # via 85 | # botocore 86 | # requests 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | mypy-html/ 103 | 104 | # pytest 105 | .pytest_cache/ 106 | 107 | tests/reports/ 108 | 109 | lambda.zip 110 | venv-lambda/ 111 | report/ 112 | 113 | # https://pypi.org/project/mutmut/ 114 | .mutmut-cache 115 | html/ 116 | report.html 117 | assets/ 118 | 119 | .vscode/ 120 | *.code-workspace 121 | -------------------------------------------------------------------------------- /mpu/datetime.py: -------------------------------------------------------------------------------- 1 | """Datetime related utility functions.""" 2 | 3 | # Core Library 4 | import datetime as dt 5 | import random 6 | 7 | # Third party 8 | import pytz 9 | 10 | local_random = random.Random() 11 | 12 | 13 | def add_time(datetime_obj, days=0, hours=0, minutes=0, seconds=0): 14 | """ 15 | Add time to a timezone-aware datetime object. 16 | 17 | This keeps the timezone correct, even if it changes due to daylight 18 | saving time (DST). 19 | 20 | Parameters 21 | ---------- 22 | datetime_obj : datetime.datetime 23 | days : int 24 | hours : int 25 | minutes : int 26 | seconds : int 27 | 28 | Returns 29 | ------- 30 | datetime : datetime.datetime 31 | """ 32 | seconds += minutes * 60 33 | seconds += hours * 60**2 34 | seconds += days * 24 * 60**2 35 | t14 = datetime_obj + dt.timedelta(seconds=seconds) # Invalid timezone! 36 | t14 = t14.astimezone(pytz.utc).astimezone(t14.tzinfo) # Fix the timezone 37 | return t14 38 | 39 | 40 | def generate(minimum, maximum, local_random=local_random): 41 | """ 42 | Generate a random date. 43 | 44 | The generated dates are uniformly distributed. 45 | 46 | Parameters 47 | ---------- 48 | minimum : datetime object 49 | maximum : datetime object 50 | local_random : random.Random 51 | 52 | Returns 53 | ------- 54 | generated_date : datetime object 55 | 56 | Examples 57 | -------- 58 | >>> import random; r = random.Random(); r.seed(0) 59 | >>> from datetime import datetime 60 | 61 | >>> generate(datetime(2018, 1, 1), datetime(2018, 1, 2), local_random=r) 62 | datetime.datetime(2018, 1, 1, 20, 15, 58, 47972) 63 | 64 | >>> generate(datetime(2018, 1, 1), datetime(2018, 1, 2), local_random=r) 65 | datetime.datetime(2018, 1, 1, 18, 11, 27, 260414) 66 | """ 67 | if not (minimum < maximum): 68 | raise ValueError(f"{minimum} is not smaller than {maximum}") 69 | 70 | time_d = maximum - minimum 71 | time_d_rand = time_d * local_random.random() 72 | generated = minimum + time_d_rand 73 | return generated 74 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | # https://setuptools.readthedocs.io/en/latest/setuptools.html#configuring-setup-using-setup-cfg-files 3 | name = mpu 4 | 5 | author = Martin Thoma 6 | author_email = info@martin-thoma.de 7 | maintainer = Martin Thoma 8 | maintainer_email = info@martin-thoma.de 9 | 10 | # keep in sync with mpu/_version.py 11 | version = 0.23.1 12 | 13 | description = Martins Python Utilities 14 | long_description = file: README.md 15 | long_description_content_type = text/markdown 16 | keywords = utility, 17 | 18 | platforms = Linux 19 | 20 | url = https://github.com/MartinThoma/mpu 21 | download_url = https://github.com/MartinThoma/mpu 22 | 23 | license = MIT 24 | 25 | # https://pypi.org/pypi?%3Aaction=list_classifiers 26 | classifiers = 27 | Development Status :: 3 - Alpha 28 | Environment :: Console 29 | Intended Audience :: Developers 30 | Intended Audience :: Information Technology 31 | License :: OSI Approved :: MIT License 32 | Natural Language :: English 33 | Operating System :: OS Independent 34 | Programming Language :: Python :: 3 35 | Programming Language :: Python :: 3 :: Only 36 | Programming Language :: Python :: 3.7 37 | Programming Language :: Python :: 3.8 38 | Programming Language :: Python :: 3.9 39 | Topic :: Software Development :: Libraries :: Python Modules 40 | Topic :: Software Development 41 | Topic :: Utilities 42 | 43 | [options] 44 | packages = find: 45 | python_requires = >=3.7 46 | 47 | [tool:pytest] 48 | addopts = --doctest-modules --cov=./mpu --cov-report html:tests/reports/coverage-html --cov-report term-missing --ignore=docs/ --durations=3 --timeout=30 49 | doctest_encoding = utf-8 50 | 51 | [pydocstyle] 52 | match_dir = mpu 53 | ignore = D105, D413, D107, D416, D212, D203, D417 54 | 55 | [flake8] 56 | max-complexity=10 57 | max_line_length = 88 58 | exclude = tests/*,.tox/*,.nox/*,docs/* 59 | ignore = H301,H306,H404,H405,W503,D105,D413,D103,D107,E252,N803,E203,C416,A001,A003,P102,SIM106 60 | 61 | [mutmut] 62 | backup = False 63 | runner = ./mutmut-test.sh 64 | tests_dir = tests/ 65 | 66 | [mypy] 67 | ignore_missing_imports = True 68 | -------------------------------------------------------------------------------- /requirements/lint.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile 3 | # To update, run: 4 | # 5 | # pip-compile requirements/lint.in 6 | # 7 | appdirs==1.4.4 8 | # via black 9 | astor==0.8.1 10 | # via flake8-simplify 11 | attrs==20.3.0 12 | # via 13 | # flake8-bugbear 14 | # flake8-eradicate 15 | # flake8-implicit-str-concat 16 | black==20.8b1 17 | # via -r requirements/lint.in 18 | click==7.1.2 19 | # via black 20 | eradicate==2.0.0 21 | # via flake8-eradicate 22 | flake8-assert-msg==1.1.1 23 | # via -r requirements/lint.in 24 | flake8-bugbear==21.3.2 25 | # via -r requirements/lint.in 26 | flake8-builtins==1.5.3 27 | # via -r requirements/lint.in 28 | flake8-comprehensions==3.4.0 29 | # via -r requirements/lint.in 30 | flake8-eradicate==1.0.0 31 | # via -r requirements/lint.in 32 | flake8-executable==2.1.1 33 | # via -r requirements/lint.in 34 | flake8-implicit-str-concat==0.2.0 35 | # via -r requirements/lint.in 36 | flake8-isort==4.0.0 37 | # via -r requirements/lint.in 38 | flake8-plugin-utils==1.3.1 39 | # via flake8-pytest-style 40 | flake8-pytest-style==1.4.0 41 | # via -r requirements/lint.in 42 | flake8-raise==0.0.5 43 | # via -r requirements/lint.in 44 | flake8-simplify==0.14.0 45 | # via -r requirements/lint.in 46 | flake8-string-format==0.3.0 47 | # via -r requirements/lint.in 48 | flake8==3.9.0 49 | # via 50 | # -r requirements/lint.in 51 | # flake8-assert-msg 52 | # flake8-bugbear 53 | # flake8-builtins 54 | # flake8-comprehensions 55 | # flake8-eradicate 56 | # flake8-executable 57 | # flake8-isort 58 | # flake8-raise 59 | # flake8-simplify 60 | # flake8-string-format 61 | isort==5.8.0 62 | # via flake8-isort 63 | mccabe==0.6.1 64 | # via 65 | # -r requirements/lint.in 66 | # flake8 67 | more-itertools==8.7.0 68 | # via flake8-implicit-str-concat 69 | mypy-extensions==0.4.3 70 | # via 71 | # black 72 | # mypy 73 | mypy==0.812 74 | # via -r requirements/lint.in 75 | pathspec==0.8.1 76 | # via black 77 | pycodestyle==2.7.0 78 | # via flake8 79 | pydocstyle==6.0.0 80 | # via -r requirements/lint.in 81 | pyflakes==2.3.0 82 | # via flake8 83 | regex==2021.3.17 84 | # via black 85 | snowballstemmer==2.1.0 86 | # via pydocstyle 87 | testfixtures==6.17.1 88 | # via flake8-isort 89 | toml==0.10.2 90 | # via black 91 | typed-ast==1.4.2 92 | # via 93 | # black 94 | # mypy 95 | typing-extensions==3.7.4.3 96 | # via 97 | # black 98 | # mypy 99 | -------------------------------------------------------------------------------- /tests/test_aws.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Test mpu.aws module.""" 4 | 5 | # Core Library 6 | import filecmp 7 | import os 8 | from tempfile import mkstemp 9 | 10 | # Third party 11 | import boto3 12 | import pkg_resources 13 | import pytest 14 | from moto import mock_s3 15 | 16 | # First party 17 | # internal modules 18 | import mpu.aws 19 | from mpu.aws import ExistsStrategy 20 | 21 | 22 | @pytest.mark.xfail 23 | @mock_s3 24 | def test_list_no_files(): 25 | """Test if listing files of an S3 bucket works.""" 26 | # We need to create the bucket since this is all in Moto's 'virtual' 27 | # AWS account 28 | conn = boto3.resource("s3", region_name="us-east-1") 29 | conn.create_bucket(Bucket="mybucket") 30 | assert mpu.aws.list_files("mybucket") == [] 31 | 32 | # Test upload 33 | path = "files/example.csv" 34 | local_path = pkg_resources.resource_filename(__name__, path) 35 | mpu.aws.s3_upload(local_path, "s3://mybucket/example_test.csv") 36 | assert mpu.aws.list_files("mybucket") == ["s3://mybucket/example_test.csv"] 37 | 38 | # Test download 39 | _, destination = mkstemp(suffix="example.csv") 40 | os.remove(destination) # make sure this file does NOT exist 41 | mpu.aws.s3_download("s3://mybucket/example_test.csv", destination) 42 | assert filecmp.cmp(destination, local_path) 43 | os.remove(destination) # cleanup of mkstemp 44 | 45 | # Test download without destination 46 | destination = mpu.aws.s3_download("s3://mybucket/example_test.csv") 47 | os.remove(destination) 48 | 49 | # Test download: File exists 50 | _, destination = mkstemp(suffix="example.csv") 51 | with pytest.raises(RuntimeError): 52 | mpu.aws.s3_download( 53 | "s3://mybucket/example_test.csv", 54 | destination, 55 | exists_strategy=mpu.aws.ExistsStrategy.RAISE, 56 | ) 57 | with pytest.raises(ValueError): 58 | mpu.aws.s3_download( 59 | "s3://mybucket/example_test.csv", 60 | destination, 61 | exists_strategy=ExistsStrategy.RAISE, 62 | ) 63 | mpu.aws.s3_download( 64 | "s3://mybucket/example_test.csv", 65 | destination, 66 | exists_strategy=mpu.aws.ExistsStrategy.ABORT, 67 | ) 68 | mpu.aws.s3_download( 69 | "s3://mybucket/example_test.csv", 70 | destination, 71 | exists_strategy=mpu.aws.ExistsStrategy.REPLACE, 72 | ) 73 | 74 | mpu.aws.s3_read("s3://mybucket/example_test.csv") 75 | os.remove(destination) # cleanup of mkstemp 76 | 77 | 78 | def test_s3_path_split(): 79 | with pytest.raises(ValueError) as exinfo: 80 | mpu.aws._s3_path_split("foo/bar") 81 | assert ( 82 | str(exinfo.value) 83 | == "s3_path is expected to start with 's3://', but was foo/bar" 84 | ) 85 | -------------------------------------------------------------------------------- /tests/test_string.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Core Library 4 | import re 5 | 6 | # Third party 7 | import hypothesis.strategies as st 8 | import pytest 9 | from hypothesis import given 10 | 11 | # First party 12 | import mpu.string 13 | 14 | 15 | def test_str2bool_no_mapping(): 16 | with pytest.raises(ValueError): 17 | mpu.string.str2bool("foobar") 18 | 19 | 20 | @pytest.mark.parametrize("illegal_default", ["foobar", True]) 21 | def test_str2bool_illegal_default(illegal_default): 22 | with pytest.raises(ValueError): 23 | mpu.string.str2bool("yes", default=illegal_default) 24 | 25 | 26 | @pytest.mark.parametrize("illegal_default", ["foobar", True]) 27 | def test_str2bool_or_none_illegal_default(illegal_default): 28 | with pytest.raises(ValueError): 29 | mpu.string.str2bool_or_none("yes", default=illegal_default) 30 | 31 | 32 | def test_is_iban_not(): 33 | assert mpu.string.is_iban("DE12") is False 34 | assert mpu.string.is_iban("") is False 35 | assert mpu.string.is_iban("ZZaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") is False 36 | assert mpu.string.is_iban("DEaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") is False 37 | 38 | 39 | def test_is_iban(): 40 | iban = "FR14 2004 1010 0505 0001 3M02 606" 41 | assert mpu.string.is_iban(iban) 42 | 43 | 44 | @pytest.mark.parametrize("illegal_default", ["foobar", True]) 45 | def test_is_none_illegal_default(illegal_default): 46 | with pytest.raises(ValueError): 47 | mpu.string.is_none("none", default=illegal_default) 48 | 49 | 50 | def test_is_none_not(): 51 | with pytest.raises(ValueError): 52 | mpu.string.is_none("foobar") 53 | 54 | 55 | @given(st.emails()) 56 | def test_is_email(email): 57 | assert mpu.string.is_email(email), f"is_email({email}) returned False" 58 | 59 | 60 | @given(st.ip_addresses(v=4)) 61 | def test_is_ipv4(ip): 62 | assert mpu.string.is_ipv4(str(ip)), f"is_ipv4({ip}) returned False" 63 | 64 | 65 | @pytest.mark.parametrize( 66 | "valid_mail", 67 | [ 68 | "noreply@example.com", 69 | "noreply@example.de", 70 | "noreply+foo@gmail.com", 71 | "jon.smith@notice.tuya.co", 72 | "NoRePlY@ExAmPlE.cOm", 73 | "noreply@example.de", 74 | "noreply+foo@gmail.com", 75 | "jon.smith@notice.tuya.co", 76 | ], 77 | ) 78 | def test_email_pattern_positive(valid_mail): 79 | """Check if valid emails are recognized as being valid.""" 80 | email_pattern = re.compile(mpu.string.email_regex) 81 | assert email_pattern.match(valid_mail) 82 | 83 | 84 | @pytest.mark.parametrize( 85 | "invalid_mail", 86 | [ 87 | "noreply.@example.com", 88 | "@example.de", 89 | ], 90 | ) 91 | def test_email_pattern_negative(invalid_mail): 92 | """Check if invalid emails are recognized as being invalid.""" 93 | email_pattern = re.compile(mpu.string.email_regex) 94 | assert email_pattern.match(invalid_mail) is None 95 | -------------------------------------------------------------------------------- /mpu/shell.py: -------------------------------------------------------------------------------- 1 | """Enhancing printed terminal output.""" 2 | 3 | # Core Library 4 | from typing import List 5 | 6 | 7 | class Codes: 8 | """Escape sequences for enhanced shell output.""" 9 | 10 | RESET_ALL = "\033[0m" 11 | 12 | BOLD = "\033[1m" 13 | DIM = "\033[2m" 14 | UNDERLINED = "\033[4m" 15 | BLINK = "\033[5m" 16 | REVERSE = "\033[7m" 17 | HIDDEN = "\033[8m" 18 | 19 | RESET_BOLD = "\033[21m" 20 | RESET_DIM = "\033[22m" 21 | RESET_UNDERLINED = "\033[24m" 22 | RESET_BLINK = "\033[25m" 23 | RESET_REVERSE = "\033[27m" 24 | RESET_HIDDEN = "\033[28m" 25 | 26 | DEFAULT = "\033[39m" 27 | BLACK = "\033[30m" 28 | RED = "\033[31m" 29 | GREEN = "\033[32m" 30 | YELLOW = "\033[33m" 31 | BLUE = "\033[34m" 32 | MAGENTA = "\033[35m" 33 | CYAN = "\033[36m" 34 | LIGHT_GRAY = "\033[37m" 35 | DARK_GRAY = "\033[90m" 36 | LIGHT_RED = "\033[91m" 37 | LIGHT_GREEN = "\033[92m" 38 | LIGHT_YELLOW = "\033[93m" 39 | LIGHT_BLUE = "\033[94m" 40 | LIGHT_MAGENTA = "\033[95m" 41 | LIGHT_CYAN = "\033[96m" 42 | WHITE = "\033[97m" 43 | 44 | BACKGROUND_DEFAULT = "\033[49m" 45 | BACKGROUND_BLACK = "\033[40m" 46 | BACKGROUND_RED = "\033[41m" 47 | BACKGROUND_GREEN = "\033[42m" 48 | BACKGROUND_YELLOW = "\033[43m" 49 | BACKGROUND_BLUE = "\033[44m" 50 | BACKGROUND_MAGENTA = "\033[45m" 51 | BACKGROUND_CYAN = "\033[46m" 52 | BACKGROUND_LIGHT_GRAY = "\033[47m" 53 | BACKGROUND_DARK_GRAY = "\033[100m" 54 | BACKGROUND_LIGHT_RED = "\033[101m" 55 | BACKGROUND_LIGHT_GREEN = "\033[102m" 56 | BACKGROUND_LIGHT_YELLOW = "\033[103m" 57 | BACKGROUND_LIGHT_BLUE = "\033[104m" 58 | BACKGROUND_LIGHT_MAGENTA = "\033[105m" 59 | BACKGROUND_LIGHT_CYAN = "\033[106m" 60 | BACKGROUND_WHITE = "\033[107m" 61 | 62 | 63 | def print_table(table: List) -> None: 64 | """ 65 | Print as a table. 66 | 67 | I recommend looking at [`tabulate`](https://pypi.org/project/tabulate/). 68 | 69 | Parameters 70 | ---------- 71 | table : List 72 | 73 | Examples 74 | -------- 75 | >>> print_table([[1, 2, 3], [41, 0, 1]]) 76 | 1 2 3 77 | 41 0 1 78 | """ 79 | table = [[str(cell) for cell in row] for row in table] 80 | column_widths = [len(cell) for cell in table[0]] 81 | for row in table: 82 | for x, cell in enumerate(row): 83 | column_widths[x] = max(column_widths[x], len(cell)) 84 | 85 | formatters = [] 86 | for width in column_widths: 87 | formatters.append("{:>" + str(width) + "}") 88 | formatter = " ".join(formatters) 89 | for row in table: 90 | print(formatter.format(*row)) 91 | 92 | 93 | def text_input(text: str) -> str: 94 | """ 95 | Ask the user for textual input. 96 | 97 | Parameters 98 | ---------- 99 | text : str 100 | What the user sees. 101 | 102 | Returns 103 | ------- 104 | entered_text : str 105 | What the user wrote. 106 | """ 107 | return input(text) 108 | -------------------------------------------------------------------------------- /tests/test_trie.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Test the mpu.datastructures.trie module.""" 4 | 5 | # Third party 6 | import pytest 7 | 8 | # First party 9 | from mpu.datastructures.trie import Trie as DefaultTrie 10 | from mpu.datastructures.trie.base import AbstractTrie 11 | from mpu.datastructures.trie.char_trie import Trie as CharTrie 12 | from mpu.datastructures.trie.full_prefix_dict import FullPrefixDict 13 | from mpu.datastructures.trie.string_trie import Trie as StringTrie 14 | 15 | all_tries = [CharTrie, StringTrie, FullPrefixDict, DefaultTrie] 16 | 17 | 18 | def test_abstract_trie(): 19 | with pytest.raises(TypeError) as exinfo: 20 | trie = AbstractTrie() 21 | msg = ( 22 | "Can't instantiate abstract class AbstractTrie with abstract " 23 | "methods __contains__, __iter__, __len__, autocomplete" 24 | ) 25 | assert str(exinfo.value) == msg 26 | 27 | 28 | @pytest.mark.parametrize("Trie", all_tries) 29 | def test_trie_creation(Trie): 30 | data = ["dog", "cat", "cattle", "tom", "dinosaur", "tomcat", "tomatoe"] 31 | trie = Trie(data) 32 | assert {element for element in trie} == set(data) 33 | 34 | 35 | @pytest.mark.parametrize("Trie", all_tries) 36 | def test_trie_add_same(Trie): 37 | trie = Trie(["dog", "cat", "dog"]) 38 | assert sorted(word for word in trie) == ["cat", "dog", "dog"] 39 | 40 | 41 | @pytest.mark.parametrize("Trie", all_tries) 42 | def test_empty_trie_iter_empty(Trie): 43 | trie = Trie() 44 | assert [word for word in trie] == [] 45 | 46 | 47 | @pytest.mark.parametrize("Trie", all_tries) 48 | def test_contains(Trie): 49 | words = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 50 | trie = Trie(words) 51 | for word in words: 52 | assert word in trie 53 | 54 | words = [ 55 | "creeker", 56 | "creekfish", 57 | "creekfishes", 58 | "Creeks", 59 | "creekside", 60 | "creekstuff", 61 | "creeky", 62 | ] 63 | trie = Trie(words) 64 | for word in words: 65 | assert word in trie 66 | 67 | 68 | @pytest.mark.parametrize("Trie", all_tries) 69 | def test_len_initialization(Trie): 70 | words = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 71 | trie = Trie(words) 72 | assert len(trie) == len(words) 73 | 74 | 75 | @pytest.mark.parametrize("Trie", all_tries) 76 | def test_len_push(Trie): 77 | words = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 78 | trie = Trie() 79 | for word in words: 80 | trie.push(word) 81 | assert len(trie) == len(words) 82 | 83 | 84 | @pytest.mark.parametrize("Trie", all_tries) 85 | def test_autocomplete_empty(Trie): 86 | trie = Trie() 87 | assert list(trie.autocomplete("")) == [] 88 | 89 | 90 | @pytest.mark.parametrize("Trie", all_tries) 91 | def test_contains_empty_true(Trie): 92 | trie = Trie([""]) 93 | assert "" in trie 94 | 95 | 96 | @pytest.mark.parametrize("Trie", all_tries) 97 | def test_contains_empty_false(Trie): 98 | trie = Trie(["foo"]) 99 | assert "" not in trie 100 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Core Library 4 | import sys 5 | import time 6 | import traceback 7 | 8 | # Third party 9 | import pytest 10 | 11 | # First party 12 | from mpu import ( 13 | Location, 14 | clip, 15 | consistent_shuffle, 16 | exception_logging, 17 | haversine_distance, 18 | is_in_interval, 19 | parallel_for, 20 | ) 21 | 22 | 23 | def test_clip(): 24 | assert clip(42) == 42 25 | assert clip(42, 0, 100) == 42 26 | assert clip(42, 0, 42.0) == 42 27 | assert clip(42, None, 100) == 42 28 | assert clip(42, 0, None) == 42 29 | assert clip(-42, 0, None) == 0 30 | assert clip(420, None, 100) == 100 31 | 32 | 33 | def test_parallel_for(): 34 | def looping_function(payload): 35 | i, j = payload 36 | time.sleep(1) 37 | return i + j 38 | 39 | parameters = [(i, i + 1) for i in range(50)] 40 | out = parallel_for(looping_function, parameters) 41 | assert out == [2 * i + 1 for i in range(50)] 42 | 43 | 44 | def test_haversine(): 45 | with pytest.raises(ValueError): 46 | haversine_distance((-200, 0), (0, 0)) 47 | with pytest.raises(ValueError): 48 | haversine_distance((0, -200), (0, 0)) 49 | with pytest.raises(ValueError): 50 | haversine_distance((0, 0), (-200, 0)) 51 | with pytest.raises(ValueError): 52 | haversine_distance((0, 0), (0, -200)) 53 | 54 | 55 | def test_is_in_interval_raises(): 56 | with pytest.raises(ValueError): 57 | is_in_interval(10, 20, 100) 58 | 59 | 60 | def test_is_in_interval_ok(): 61 | is_in_interval(10, 10, 100) 62 | 63 | 64 | def test_exception_logging(): 65 | def raise_exception(): 66 | try: 67 | raise Exception 68 | except Exception: 69 | ex_type, ex, tb = sys.exc_info() 70 | traceback.print_tb(tb) 71 | return tb 72 | 73 | exception_logging(exctype="ValueError", value=None, tb=raise_exception()) 74 | 75 | 76 | def test_location_class(): 77 | munich = Location(48.137222222222, 11.575555555556) 78 | berlin = Location(52.518611111111, 13.408333333333) 79 | assert abs(munich.distance(berlin) - 506.7) < 10 80 | assert "google.com" in munich.get_google_maps_link() 81 | assert munich.get_google_maps_link().startswith("http") 82 | assert str(munich) == "Location(48.137222222222, 11.575555555556)" 83 | 84 | 85 | def test_location_value_range(): 86 | with pytest.raises(ValueError): 87 | Location(90.000000001, 42) 88 | with pytest.raises(ValueError): 89 | Location(-90.000000001, 42) 90 | Location(90.0, 42) 91 | Location(-90.0, 42) 92 | with pytest.raises(ValueError): 93 | Location(42, 180.000000001) 94 | with pytest.raises(ValueError): 95 | Location(42, -180.000000001) 96 | Location(42, 180.0) 97 | Location(42, -180.0) 98 | 99 | 100 | def test_consistent_shuffle_single(): 101 | input_list = [[1, 2], [3, 4]] 102 | result = consistent_shuffle(*input_list) 103 | assert result == ([1, 2], [3, 4]) or result == ([2, 1], [4, 3]) 104 | -------------------------------------------------------------------------------- /tests/test_math.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | # Core Library 5 | import itertools 6 | 7 | # Third party 8 | import hypothesis.strategies as st 9 | import pytest 10 | from hypothesis import given 11 | 12 | # First party 13 | import mpu.math 14 | 15 | 16 | def test_factorize_zero(): 17 | with pytest.raises(ValueError) as exinfo: 18 | mpu.math.factorize(0) 19 | assert str(exinfo.value) == "All primes are prime factors of 0." 20 | 21 | 22 | @given(st.floats()) 23 | def test_factorize_float(a_float): 24 | with pytest.raises(ValueError) as exinfo: 25 | mpu.math.factorize(a_float) 26 | assert str(exinfo.value) == "integer expected, but type(number)=" 27 | 28 | 29 | def test_factorize_at_border(): 30 | assert mpu.math.factorize(991**2) == [991, 991] 31 | 32 | 33 | @given(an_integer=st.integers(min_value=-(10**6), max_value=10**6)) 34 | def test_factorize(an_integer): 35 | if an_integer == 0: 36 | # This is tested in `test_factorize_zero` and should throw an exception 37 | return 38 | factors = mpu.math.factorize(an_integer) 39 | product = 1 40 | for factor in factors: 41 | product *= factor 42 | assert product == an_integer 43 | 44 | 45 | def test_factorize_5(): 46 | assert mpu.math.factorize(5) == [5] 47 | 48 | 49 | def test_factorize_1024(benchmark): 50 | assert benchmark(mpu.math.factorize, 1024) == [2] * 10 51 | 52 | 53 | def test_factorize_3072(benchmark): 54 | assert benchmark(mpu.math.factorize, 3072) == [2] * 10 + [3] 55 | 56 | 57 | def test_argmax(benchmark): 58 | assert benchmark(mpu.math.argmax, [1, 2, 3]) == 2 59 | 60 | 61 | @given(st.lists(st.integers(), min_size=1)) 62 | def test_argmax_property(integer_list): 63 | argmax = mpu.math.argmax(integer_list) 64 | max_value = integer_list[argmax] 65 | for el in integer_list: 66 | assert el <= max_value 67 | 68 | 69 | def test_gcd_fail(): 70 | with pytest.raises(ValueError) as exinfo: 71 | mpu.math.gcd(0, 7) 72 | assert str(exinfo.value) == "gcd(a=0, b=7) is undefined" 73 | 74 | 75 | @given(st.integers(), st.integers()) 76 | def test_gcd_is_divisor(a, b): 77 | if a == 0 or b == 0: 78 | with pytest.raises(ValueError) as exinfo: 79 | mpu.math.gcd(a, b) 80 | assert str(exinfo.value) == f"gcd(a={a}, b={b}) is undefined" 81 | else: 82 | gcd = mpu.math.gcd(a, b) 83 | assert a % gcd == 0 84 | assert b % gcd == 0 85 | 86 | 87 | @given(st.integers(), st.integers(), st.integers()) 88 | def test_gcd_is_divisor_min_size(a, b, c): 89 | if a == 0 or b == 0 or c == 0: 90 | with pytest.raises(ValueError) as exinfo: 91 | mpu.math.gcd(a * c, b * c) 92 | assert str(exinfo.value) == f"gcd(a={a*c}, b={b*c}) is undefined" 93 | else: 94 | gcd = mpu.math.gcd(a * c, b * c) 95 | assert (a * c) % gcd == 0 96 | assert (b * c) % gcd == 0 97 | assert gcd % c == 0 98 | 99 | 100 | def test_generate_primes(): 101 | first_primes = list(itertools.islice(mpu.math.generate_primes(), 10)) 102 | assert first_primes == [2, 3, 5, 7, 11, 13, 17, 19, 23, 29] 103 | -------------------------------------------------------------------------------- /mpu/data/iban.csv: -------------------------------------------------------------------------------- 1 | country_en;length;bban_format;iban_fields 2 | Albania;28;8n,16c;ALkkbbbssssxcccccccccccccccc 3 | Andorra;24;8n,12c;ADkkbbbbsssscccccccccccc 4 | Austria;20;16n;ATkkbbbbbccccccccccc 5 | Azerbaijan;28;4c,20n;AZkkbbbbcccccccccccccccccccc 6 | Bahrain;22;4a,14c;BHkkbbbbcccccccccccccc 7 | Belarus;28;4c,20n;BYkkbbbbaaaacccccccccccccccc 8 | Belgium;16;12n;BEkkbbbcccccccxx 9 | Bosnia and Herzegovina;20;16n;BAkkbbbsssccccccccxx 10 | Brazil;29;23n,1a,1c;BRkkbbbbbbbbssssscccccccccctn 11 | Bulgaria;22;4a,6n,8c;BGkkbbbbssssttcccccccc 12 | Costa Rica;22;18n;CRkk0bbbcccccccccccccc 13 | Croatia;21;17n;HRkkbbbbbbbcccccccccc 14 | Cyprus;28;8n,16c;CYkkbbbssssscccccccccccccccc 15 | Czech Republic;24;20n;CZkkbbbbsssssscccccccccc 16 | Denmark;18;14n;DKkkbbbbcccccccccc 17 | Dominican Republic;28;4a,20n;DOkkbbbbcccccccccccccccccccc 18 | East Timor;23;19n;TLkkbbbccccccccccccccxx 19 | Estonia;20;16n;EEkkbbsscccccccccccx 20 | Faroe Islands;18;14n;FOkkbbbbcccccccccx 21 | Finland;18;14n;FIkkbbbbbbcccccccx 22 | France;27;10n,11c,2n;FRkkbbbbbssssscccccccccccxx 23 | Georgia;22;2c,16n;GEkkbbcccccccccccccccc 24 | Germany;22;18n;DEkkbbbbbbbbcccccccccc 25 | Gibraltar;23;4a,15c;GIkkbbbbccccccccccccccc 26 | Greece;27;7n,16c;GRkkbbbsssscccccccccccccccc 27 | Greenland;18;14n;GLkkbbbbcccccccccc 28 | Guatemala;28;4c,20c;GTkkbbbbmmttcccccccccccccccc 29 | Hungary;28;24n;HUkkbbbssssxcccccccccccccccx 30 | Iceland;26;22n;ISkkbbbbsscccccciiiiiiiiii 31 | Ireland;22;4c,14n;IEkkaaaabbbbbbcccccccc 32 | Israel;23;19n;ILkkbbbnnnccccccccccccc 33 | Italy;27;1a,10n,12c;ITkkxbbbbbssssscccccccccccc 34 | Jordan;30;4a,22n;JOkkbbbbsssscccccccccccccccccc 35 | Kazakhstan;20;3n,13c;KZkkbbbccccccccccccc 36 | Kosovo;20;4n,10n,2n;XKkkbbbbcccccccccccc 37 | Kuwait;30;4a,22c;KWkkbbbbcccccccccccccccccccccc 38 | Latvia;21;4a,13c;LVkkbbbbccccccccccccc 39 | Lebanon;28;4n,20c;LBkkbbbbcccccccccccccccccccc 40 | Liechtenstein;21;5n,12c;LIkkbbbbbcccccccccccc 41 | Lithuania;20;16n;LTkkbbbbbccccccccccc 42 | Luxembourg;20;3n,13c;LUkkbbbccccccccccccc 43 | Macedonia;19;3n,10c,2n;MKkkbbbccccccccccxx 44 | Malta;31;4a,5n,18c;MTkkbbbbssssscccccccccccccccccc 45 | Mauritania;27;23n;MRkkbbbbbssssscccccccccccxx 46 | Mauritius;30;4a,19n,3a;MUkkbbbbbbsscccccccccccc000mmm 47 | Monaco;27;10n,11c,2n;MCkkbbbbbssssscccccccccccxx 48 | Moldova;24;2c,18c;MDkkbbcccccccccccccccccc 49 | Montenegro;22;18n;MEkkbbbcccccccccccccxx 50 | Netherlands;18;4a,10n;NLkkbbbbcccccccccc 51 | Norway;15;11n;NOkkbbbbccccccx 52 | Pakistan;24;4c,16n;PKkkbbbbcccccccccccccccc 53 | Palestinian territories;29;4c,21n;PSkkbbbbxxxxxxxxxcccccccccccc 54 | Poland;28;24n;PLkkbbbssssxcccccccccccccccc 55 | Portugal;25;21n;PTkkbbbbsssscccccccccccxx 56 | Qatar;29;4a,21c;QAkkbbbbccccccccccccccccccccc 57 | Romania;24;4a,16c;ROkkbbbbcccccccccccccccc 58 | San Marino;27;1a,10n,12c;SMkkxbbbbbssssscccccccccccc 59 | Saudi Arabia;24;2n,18c;SAkkbbcccccccccccccccccc 60 | Serbia;22;18n;RSkkbbbcccccccccccccxx 61 | Slovakia;24;20n;SKkkbbbbsssssscccccccccc 62 | Slovenia;19;15n;SIkkbbsssccccccccxx 63 | Spain;24;20n;ESkkbbbbssssxxcccccccccc 64 | Sweden;24;20n;SEkkbbbccccccccccccccccc 65 | Switzerland;21;5n,12c;CHkkbbbbbcccccccccccc 66 | Tunisia;24;20n;TNkkbbsssccccccccccccccc 67 | Turkey;26;5n,17c;TRkkbbbbbxcccccccccccccccc 68 | United Arab Emirates;23;3n,16n;AEkkbbbcccccccccccccccc 69 | United Kingdom;22;4a,14n;GBkkbbbbsssssscccccccc 70 | Virgin Islands, British;24;4c,16n;VGkkbbbbcccccccccccccccc 71 | -------------------------------------------------------------------------------- /tests/test_nodebased_trie.py: -------------------------------------------------------------------------------- 1 | # Third party 2 | import pytest 3 | 4 | # First party 5 | from mpu.datastructures.trie.char_trie import EMPTY_NODE as CHAR_EMPTY_NODE 6 | from mpu.datastructures.trie.char_trie import Trie as CharTrie 7 | from mpu.datastructures.trie.char_trie import TrieNode as CharTrieNode 8 | from mpu.datastructures.trie.string_trie import EMPTY_NODE as STRING_EMPTY_NODE 9 | from mpu.datastructures.trie.string_trie import Trie as StringTrie 10 | from mpu.datastructures.trie.string_trie import TrieNode as StringTrieNode 11 | 12 | nodebased_tries = [CharTrie, StringTrie] 13 | nodebased_tries_empty_nodes = [ 14 | (CharTrie, CHAR_EMPTY_NODE), 15 | (StringTrie, STRING_EMPTY_NODE), 16 | ] 17 | 18 | 19 | @pytest.mark.parametrize("Trie,EMPTY_NODE", nodebased_tries_empty_nodes) 20 | def test_get_subtrie_prefix_hit_miss(Trie, EMPTY_NODE): 21 | trie = Trie(["foo"]) 22 | prefix, subtrie = trie.get_subtrie("foobar") 23 | assert subtrie is EMPTY_NODE 24 | 25 | 26 | @pytest.mark.parametrize("Trie", nodebased_tries) 27 | def test_get_subtrie_prefix_hit_hit(Trie): 28 | trie = Trie(["foo", "foobar"]) 29 | words = [] 30 | prefix, subtrie = trie.get_subtrie("foobar") 31 | for word in subtrie: 32 | words.append(prefix + word) 33 | assert words == ["foobar"] 34 | 35 | 36 | @pytest.mark.parametrize("Trie,EMPTY_NODE", nodebased_tries_empty_nodes) 37 | def test_get_subtrie_direct_miss(Trie, EMPTY_NODE): 38 | trie = Trie(["foo"]) 39 | prefix, subtrie = trie.get_subtrie("bar") 40 | assert subtrie is EMPTY_NODE 41 | 42 | 43 | @pytest.mark.parametrize("Trie", nodebased_tries) 44 | def test_trie_autocomplete(Trie): 45 | data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 46 | trie = Trie(data) 47 | assert list(trie.autocomplete("d")) == ["d", "dog"] 48 | expected = ["tom", "tomatoe", "tomcat"] 49 | assert sorted(trie.autocomplete("tom")) == expected 50 | 51 | data = ["tom", "d"] 52 | trie = Trie(data) 53 | assert list(trie.autocomplete("t")) == ["tom"] 54 | 55 | data = ["dog", "tomco", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 56 | trie = Trie(data) 57 | assert sorted(trie.autocomplete("tomc")) == ["tomcat", "tomco"] 58 | trie.print() 59 | print(trie.get_subtrie("tom")) 60 | assert list(trie.autocomplete("x")) == [] 61 | 62 | 63 | @pytest.mark.parametrize("Trie", nodebased_tries) 64 | def test_get_subtrie_direct_hit(Trie): 65 | trie = Trie(["foobar"]) 66 | prefix, subtrie = trie.get_subtrie("foobar") 67 | assert [prefix + word for word in subtrie] == ["foobar"] 68 | 69 | 70 | @pytest.mark.parametrize("Trie", nodebased_tries) 71 | def test_get_subtrie_empty(Trie): 72 | trie = Trie() 73 | prefix, subtrie = trie.get_subtrie("foobar") 74 | assert prefix == "" 75 | assert not subtrie.is_word 76 | assert subtrie.count == 0 77 | 78 | 79 | @pytest.mark.parametrize("Trie", nodebased_tries) 80 | def test_trie_creation_prefix_search(Trie): 81 | data = ["dog", "cat", "cattle", "tom", "d", "tomcat", "tomatoe"] 82 | trie = Trie(data) 83 | expected = {"tom", "tomcat", "tomatoe"} 84 | prefix, subtrie = trie.get_subtrie("tom") 85 | assert {prefix + element for element in subtrie} == expected 86 | 87 | 88 | @pytest.mark.parametrize("TrieNode", [CharTrieNode, StringTrieNode]) 89 | def test_frozen_node_push(TrieNode): 90 | node = TrieNode("a", freeze=True) 91 | with pytest.raises(RuntimeError): 92 | node.push("b") 93 | 94 | 95 | @pytest.mark.parametrize("Trie", nodebased_tries) 96 | def test_push_empty(Trie): 97 | trie = Trie() 98 | trie.push("") 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![PyPI version](https://badge.fury.io/py/mpu.svg)](https://badge.fury.io/py/mpu) 2 | [![Python Support](https://img.shields.io/pypi/pyversions/mpu.svg)](https://pypi.org/project/mpu/) 3 | [![Documentation Status](https://readthedocs.org/projects/mpu/badge/?version=latest)](http://mpu.readthedocs.io/en/latest/?badge=latest) 4 | [![Build Status](https://travis-ci.org/MartinThoma/mpu.svg?branch=master)](https://travis-ci.org/MartinThoma/mpu) 5 | [![MartinThoma](https://circleci.com/gh/MartinThoma/mpu.svg?style=shield)](https://app.circleci.com/pipelines/github/MartinThoma/mpu) 6 | [![Build Status](https://dev.azure.com/martinthoma/mpu/_apis/build/status/MartinThoma.mpu?branchName=master)](https://dev.azure.com/martinthoma/mpu/_build/latest?definitionId=1&branchName=master) 7 | [![Coverage Status](https://coveralls.io/repos/github/MartinThoma/mpu/badge.svg?branch=master)](https://coveralls.io/github/MartinThoma/mpu?branch=master) 8 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 9 | ![GitHub last commit](https://img.shields.io/github/last-commit/MartinThoma/mpu) 10 | ![GitHub commits since latest release (by SemVer)](https://img.shields.io/github/commits-since/MartinThoma/mpu/0.23.1) 11 | [![CodeFactor](https://www.codefactor.io/repository/github/martinthoma/mpu/badge/master)](https://www.codefactor.io/repository/github/martinthoma/mpu/overview/master) 12 | [![mutmut](https://pypi.org/project/mutmut/)](https://img.shields.io/badge/mutmut-1417%2F1813-lightgrey) 13 | 14 | # mpu 15 | Martins Python Utilities (mpu) is a collection of utility functions and classes 16 | with no other dependencies. 17 | 18 | The total size of the package will never be bigger than 10 MB and currently it 19 | is 120 kB in zipped form. This makes it a candidate to include into AWS Lambda 20 | projects. 21 | 22 | 23 | ## Installation 24 | 25 | ```bash 26 | $ pip install git+https://github.com/MartinThoma/mpu.git 27 | ``` 28 | 29 | It can, of course, also be installed via PyPI. 30 | 31 | 32 | ## Usage 33 | 34 | ### Datastructures 35 | 36 | ```python-repl 37 | >>> from mpu.datastructures import EList 38 | 39 | >>> l = EList([2, 1, 0]) 40 | >>> l[2] 41 | 0 42 | 43 | >>> l[[2, 0]] 44 | [0, 2] 45 | 46 | >>> l[l] 47 | [0, 1, 2] 48 | ``` 49 | 50 | ### Shell 51 | 52 | To enhance your terminals output, you might want to do something like: 53 | 54 | ```python 55 | from mpu.shell import Codes 56 | 57 | print("{c.GREEN}{c.UNDERLINED}Works{c.RESET_ALL}".format(c=Codes)) 58 | ``` 59 | 60 | 61 | ### Quick Examples 62 | 63 | Creating small example datastructures is a task I encounter once in a while 64 | for StackExchange answers. 65 | 66 | ```python 67 | from mpu.pd import example_df 68 | 69 | df = example_df() 70 | print(df) 71 | ``` 72 | 73 | gives 74 | 75 | ``` 76 | country population population_time EUR 77 | 0 Germany 82521653.0 2016-12-01 True 78 | 1 France 66991000.0 2017-01-01 True 79 | 2 Indonesia 255461700.0 2017-01-01 False 80 | 3 Ireland 4761865.0 NaT True 81 | 4 Spain 46549045.0 2017-06-01 True 82 | 5 Vatican NaN NaT True 83 | ``` 84 | 85 | 86 | ### Money 87 | 88 | ```python 89 | import mpu 90 | from fractions import Fraction 91 | 92 | gross_income = mpu.units.Money("2345.10", "EUR") 93 | net_income = gross_income * Fraction("0.80") 94 | apartment = mpu.units.Money("501.23", "EUR") 95 | savings = net_income - apartment 96 | print(savings) 97 | ``` 98 | 99 | prints `1375.31 Euro` 100 | 101 | 102 | ### IO 103 | 104 | * Download files with [`mpu.io.download(source, sink)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.download). 105 | * Read CSV, JSON and pickle with [`mpu.io.read(filepath)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.write). 106 | * Write CSV, JSON and pickle with [`mpu.io.write(filepath, data)`](https://mpu.readthedocs.io/en/latest/io.html#mpu.io.read) 107 | -------------------------------------------------------------------------------- /requirements/ci.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.7 3 | # To update, run: 4 | # 5 | # pip-compile requirements/ci.in 6 | # 7 | attrs==21.4.0 8 | # via 9 | # hypothesis 10 | # pytest 11 | bleach==4.1.0 12 | # via readme-renderer 13 | boto3==1.20.47 14 | # via 15 | # -r requirements/ci.in 16 | # moto 17 | botocore==1.23.47 18 | # via 19 | # boto3 20 | # moto 21 | # s3transfer 22 | certifi==2021.10.8 23 | # via requests 24 | cffi==1.15.0 25 | # via cryptography 26 | charset-normalizer==2.0.11 27 | # via requests 28 | click==8.0.3 29 | # via pip-tools 30 | colorama==0.4.4 31 | # via twine 32 | coverage[toml]==6.3.1 33 | # via pytest-cov 34 | cryptography==36.0.1 35 | # via 36 | # moto 37 | # secretstorage 38 | docutils==0.18.1 39 | # via readme-renderer 40 | hypothesis==6.36.1 41 | # via -r requirements/ci.in 42 | idna==3.3 43 | # via requests 44 | importlib-metadata==4.10.1 45 | # via 46 | # click 47 | # keyring 48 | # moto 49 | # pep517 50 | # pluggy 51 | # pytest 52 | # twine 53 | iniconfig==1.1.1 54 | # via pytest 55 | jeepney==0.7.1 56 | # via 57 | # keyring 58 | # secretstorage 59 | jinja2==3.0.3 60 | # via moto 61 | jmespath==0.10.0 62 | # via 63 | # boto3 64 | # botocore 65 | keyring==23.5.0 66 | # via twine 67 | markupsafe==2.0.1 68 | # via 69 | # jinja2 70 | # moto 71 | moto==3.0.2 72 | # via -r requirements/ci.in 73 | numpy==1.21.5 74 | # via pandas 75 | packaging==21.3 76 | # via 77 | # bleach 78 | # pytest 79 | pandas==1.3.5 80 | # via -r requirements/ci.in 81 | pep517==0.12.0 82 | # via pip-tools 83 | pip-tools==6.4.0 84 | # via -r requirements/ci.in 85 | pkginfo==1.8.2 86 | # via twine 87 | pluggy==1.0.0 88 | # via pytest 89 | py==1.11.0 90 | # via pytest 91 | py-cpuinfo==8.0.0 92 | # via pytest-benchmark 93 | pycparser==2.21 94 | # via cffi 95 | pygments==2.11.2 96 | # via readme-renderer 97 | pyparsing==3.0.7 98 | # via packaging 99 | pytest==6.2.5 100 | # via 101 | # -r requirements/ci.in 102 | # pytest-benchmark 103 | # pytest-cov 104 | # pytest-timeout 105 | pytest-benchmark==3.4.1 106 | # via -r requirements/ci.in 107 | pytest-cov==3.0.0 108 | # via -r requirements/ci.in 109 | pytest-timeout==2.1.0 110 | # via -r requirements/ci.in 111 | python-dateutil==2.8.2 112 | # via 113 | # botocore 114 | # moto 115 | # pandas 116 | pytz==2021.3 117 | # via 118 | # moto 119 | # pandas 120 | readme-renderer==32.0 121 | # via twine 122 | requests==2.27.1 123 | # via 124 | # moto 125 | # requests-toolbelt 126 | # responses 127 | # twine 128 | requests-toolbelt==0.9.1 129 | # via twine 130 | responses==0.18.0 131 | # via moto 132 | rfc3986==2.0.0 133 | # via twine 134 | s3transfer==0.5.1 135 | # via boto3 136 | secretstorage==3.3.1 137 | # via keyring 138 | simplejson==3.17.6 139 | # via -r requirements/ci.in 140 | six==1.16.0 141 | # via 142 | # bleach 143 | # python-dateutil 144 | sortedcontainers==2.4.0 145 | # via hypothesis 146 | toml==0.10.2 147 | # via pytest 148 | tomli==2.0.0 149 | # via 150 | # coverage 151 | # pep517 152 | tqdm==4.62.3 153 | # via twine 154 | twine==3.8.0 155 | # via -r requirements/ci.in 156 | typing-extensions==4.0.1 157 | # via importlib-metadata 158 | urllib3==1.26.8 159 | # via 160 | # botocore 161 | # requests 162 | # responses 163 | # twine 164 | webencodings==0.5.1 165 | # via bleach 166 | werkzeug==2.0.2 167 | # via moto 168 | wheel==0.37.1 169 | # via 170 | # -r requirements/ci.in 171 | # pip-tools 172 | xmltodict==0.12.0 173 | # via moto 174 | zipp==3.7.0 175 | # via 176 | # importlib-metadata 177 | # pep517 178 | 179 | # The following packages are considered to be unsafe in a requirements file: 180 | # pip 181 | # setuptools 182 | -------------------------------------------------------------------------------- /tests/test_pd.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Core Library 4 | import datetime 5 | 6 | # Third party 7 | import pandas as pd 8 | 9 | # First party 10 | import mpu.pd 11 | 12 | 13 | def test_example_df(): 14 | df = mpu.pd.example_df() 15 | assert list(df.columns) == ["country", "population", "population_time", "EUR"] 16 | assert list(df["country"]) == [ 17 | "Germany", 18 | "France", 19 | "Indonesia", 20 | "Ireland", 21 | "Spain", 22 | "Vatican", 23 | ] 24 | assert list(df["population"])[:5] == [ 25 | 82521653.0, 26 | 66991000.0, 27 | 255461700.0, 28 | 4761865.0, 29 | 46549045.0, 30 | ] 31 | assert df["population_time"].equals( 32 | pd.Series( 33 | [ 34 | datetime.datetime(2016, 12, 1), 35 | datetime.datetime(2017, 1, 1), 36 | datetime.datetime(2017, 1, 1), 37 | None, # Ireland 38 | datetime.datetime(2017, 6, 1), # Spain 39 | None, 40 | ] 41 | ) 42 | ) 43 | assert list(df["EUR"]) == [True, True, False, True, True, True] 44 | 45 | 46 | def test_describe(capsys): 47 | mpu.pd.describe(mpu.pd.example_df()) 48 | captured = capsys.readouterr() 49 | assert ( 50 | captured.out 51 | == """Number of datapoints: 6 52 | 53 | ## Float Columns 54 | Column name Non-nan mean std min 25% 50% 75% max 55 | population 5 91257052.60 96317882.77 4761865.00 46549045.00 66991000.00 82521653.00 255461700.00 56 | 57 | ## Category Columns 58 | Column name Non-nan unique top el top (count) rest 59 | EUR 6 2 False 5 [True] 60 | 61 | ## Time Columns 62 | Column name Non-nan unique top el top (count) min max 63 | population_time 4 4 2016-12-01 00:00:00 2 2016-12-01 00:00:00 2017-06-01 00:00:00 64 | 65 | ## Other Columns 66 | Column name Non-nan unique top (count) rest 67 | country 6 6 France 1 ['Germany', 'Indonesia', 'Ireland', 'Spa 68 | """ 69 | ) 70 | 71 | 72 | def test_describe_int(capsys): 73 | column_info = {"int": ["numbers"]} 74 | df = pd.DataFrame({"numbers": [1, 2, 3, 100, 500]}) 75 | mpu.pd._describe_int(df, column_info) 76 | mpu.pd.describe(df, column_info) 77 | captured = capsys.readouterr() 78 | assert ( 79 | captured.out 80 | == """ 81 | ## Integer Columns 82 | Column name Non-nan mean std min 25% 50% 75% max 83 | numbers 5 121.2 215.96689561134133 1 2.0 3.0 100.0 500 84 | Number of datapoints: 5 85 | 86 | ## Integer Columns 87 | Column name Non-nan mean std min 25% 50% 75% max 88 | numbers 5 121.2 215.96689561134133 1 2.0 3.0 100.0 500 89 | """ 90 | ) 91 | 92 | 93 | def test_get_column_info_suspicious_categorical(): 94 | df = pd.DataFrame({"numbers": [1, 2, 3, 100, 500]}) 95 | mpu.pd._get_column_info(df, []) 96 | 97 | 98 | def test_get_column_info_nonsuspicious_categorical(): 99 | df = pd.DataFrame({"numbers": [i for i in range(200)]}) 100 | mpu.pd._get_column_info(df, []) 101 | 102 | 103 | def test_get_column_info_no_values(): 104 | df = pd.DataFrame({"numbers": []}) 105 | mpu.pd._get_column_info(df, []) 106 | 107 | 108 | def test_get_column_info_mixed_column(): 109 | df = pd.DataFrame({"numbers": [1, 2.3, None, "Foobar", (5, 10)]}) 110 | info = mpu.pd._get_column_info(df, []) 111 | 112 | assert set(info[1]["numbers"]["value_list"]) == {(5, 10), 2.3, "Foobar", 1} 113 | info[1]["numbers"]["value_list"] = None 114 | 115 | expected_column_info = { 116 | "category": [], 117 | "float": [], 118 | "int": [], 119 | "other": ["numbers"], 120 | "time": [], 121 | } 122 | expected_column_meta = { 123 | "numbers": {"top_count_val": 1, "value_list": None, "value_count": 4} 124 | } 125 | expected = (expected_column_info, expected_column_meta) 126 | assert info == expected 127 | 128 | 129 | def test_get_column_info_column_unknown_dtype(): 130 | df = pd.DataFrame({"numbers": [datetime.timedelta(days=3)]}) 131 | info = mpu.pd._get_column_info(df, []) 132 | 133 | assert set(info[1]["numbers"]["value_list"]) == {datetime.timedelta(days=3)} 134 | info[1]["numbers"]["value_list"] = None 135 | 136 | expected_column_info = { 137 | "category": [], 138 | "float": [], 139 | "int": [], 140 | "other": [], 141 | "time": [], 142 | } 143 | expected_column_meta = { 144 | "numbers": {"top_count_val": 1, "value_list": None, "value_count": 1} 145 | } 146 | expected = (expected_column_info, expected_column_meta) 147 | assert info == expected 148 | 149 | 150 | def test_countries_global(): 151 | assert len(mpu.pd.countries) == 248 152 | -------------------------------------------------------------------------------- /mpu/datastructures/trie/char_trie.py: -------------------------------------------------------------------------------- 1 | # Core Library 2 | import logging 3 | from typing import Dict, List, Tuple 4 | 5 | # First party 6 | from mpu.datastructures.trie.base import AbstractTrie 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class TrieNode: 12 | def __init__(self, value, is_word=False, count=0, children=None, freeze=False): 13 | if children is None: 14 | children = {} 15 | self._value = value 16 | self.children: Dict[str, TrieNode] = children 17 | self.is_word = is_word 18 | self.count = count 19 | self.is_frozen = freeze 20 | 21 | def get_subtrie(self, search_prefix: str, current_trie_node_prefix: str = ""): 22 | """ 23 | Get the TrieNodes which represents the given prefix. 24 | 25 | If the search_prefix is not in the trie, return ("", EMPTY_NODE). 26 | The found_prefix is a prefix of search_prefix or equal to it. 27 | 28 | Parameters 29 | ---------- 30 | search_prefix : str 31 | current_trie_node_prefix : str, optional (default: "") 32 | 33 | Returns 34 | ------- 35 | found_prefix, subtrie: Tuple[str, TrieNode] 36 | """ 37 | if len(search_prefix) == 0: 38 | return (current_trie_node_prefix, self) 39 | elif search_prefix[0] in self.children: 40 | child = self.children[search_prefix[0]] 41 | remainder = search_prefix[1:] 42 | new_prefix = current_trie_node_prefix + self._value 43 | return child.get_subtrie(remainder, current_trie_node_prefix=new_prefix) 44 | else: 45 | return ("", EMPTY_NODE) 46 | 47 | def push(self, value: str): 48 | if self.is_frozen: 49 | raise RuntimeError("The node is frozen. You may not edit it.") 50 | if value == self._value and len(value) == 0: 51 | # This is the root node 52 | self.is_word = True 53 | self.count += 1 54 | return 55 | if len(value) == 0: 56 | raise ValueError("The pushed value should not be empty") 57 | elif len(value) == 1: 58 | char = value[0] 59 | if char not in self.children: 60 | self.children[char] = TrieNode(value=char, is_word=True, count=1) 61 | else: 62 | self.children[char].is_word = True 63 | self.children[char].count += 1 64 | else: 65 | char = value[0] 66 | if char not in self.children: 67 | self.children[char] = TrieNode(value=char, is_word=False, count=0) 68 | self.children[char].push(value[1:]) 69 | 70 | def __iter__(self): 71 | self._iteration_queue: List[Tuple[TrieNode, str]] = [(self, "")] 72 | while self._iteration_queue: 73 | trie_node, prefix = self._iteration_queue.pop() 74 | children = sorted(trie_node.children.items(), key=lambda n: n[0]) 75 | for _, child in children: 76 | self._iteration_queue.append((child, prefix + trie_node._value)) 77 | if trie_node.is_word: 78 | for _ in range(trie_node.count): 79 | yield prefix + trie_node._value 80 | 81 | def print(self, _indent: int = 0): 82 | string = "" 83 | string += " " * _indent + self._value + "\n" 84 | children = sorted(self.children.values(), key=lambda child: child._value) 85 | for child in children: 86 | string += child.print(_indent=_indent + 1) 87 | return string 88 | 89 | def __str__(self): 90 | return f"TrieNode(value='{self._value}', nb_children='{len(self.children)}')" 91 | 92 | __repr__ = __str__ 93 | 94 | 95 | EMPTY_NODE = TrieNode(value="", is_word=False, count=0, freeze=True) 96 | 97 | 98 | class Trie(AbstractTrie): 99 | def __init__(self, container=None): 100 | if container is None: 101 | container = [] 102 | self._root = TrieNode(value="", count=0, is_word=0) 103 | self._length = 0 104 | for element in container: 105 | self.push(element) 106 | 107 | def __len__(self) -> int: 108 | return self._length 109 | 110 | def __contains__(self, element) -> bool: 111 | found_prefix, subtrie = self.get_subtrie(element) 112 | return subtrie.is_word and found_prefix + subtrie._value == element 113 | 114 | def autocomplete(self, prefix): 115 | found_prefix, subtrie = self.get_subtrie(prefix) 116 | for word in subtrie: 117 | yield found_prefix + word 118 | 119 | def get_subtrie(self, prefix) -> Tuple[str, TrieNode]: 120 | return self._root.get_subtrie(prefix) 121 | 122 | def __iter__(self): 123 | self._iteration_index = -1 124 | self._child_values = [element for element in self._root] 125 | return self 126 | 127 | def __next__(self): 128 | """Return the next value from the Trie.""" 129 | self._iteration_index += 1 130 | if self._iteration_index < self._length: 131 | return self._child_values[self._iteration_index] 132 | raise StopIteration 133 | 134 | def push(self, element: str): 135 | self._root.push(element) 136 | self._length += 1 137 | 138 | def print(self, print_stdout=True) -> str: 139 | string = "Trie\n" 140 | string += self._root.print() 141 | string = string.strip() 142 | if print_stdout: 143 | print(string) 144 | return string 145 | 146 | def __str__(self): 147 | return f"Trie(len={self._length}, {self._root})" 148 | 149 | __repr__ = __str__ 150 | -------------------------------------------------------------------------------- /mpu/aws.py: -------------------------------------------------------------------------------- 1 | """Convenience functions for AWS interactions.""" 2 | 3 | # Core Library 4 | import enum 5 | import os 6 | from collections import namedtuple 7 | from tempfile import mkstemp 8 | from typing import List, Optional 9 | 10 | # Third party 11 | import boto3.session 12 | 13 | 14 | def list_files( 15 | bucket: str, prefix: str = "", profile_name: Optional[str] = None 16 | ) -> List[str]: 17 | """ 18 | List up to 1000 files in a bucket. 19 | 20 | Parameters 21 | ---------- 22 | bucket : str 23 | prefix : str 24 | profile_name : str, optional 25 | AWS profile 26 | 27 | Returns 28 | ------- 29 | s3_paths : List[str] 30 | """ 31 | session = boto3.session.Session(profile_name=profile_name) 32 | conn = session.client("s3") 33 | keys = [] 34 | ret = conn.list_objects_v2(Bucket=bucket, Prefix=prefix) 35 | if "Contents" not in ret: 36 | return [] 37 | # Make this a generator in future and use the marker: 38 | # https://boto3.readthedocs.io/en/latest/reference/services/ 39 | # s3.html#S3.Client.list_objects 40 | for key in conn.list_objects_v2(Bucket=bucket, Prefix=prefix)["Contents"]: 41 | keys.append("s3://" + bucket + "/" + key["Key"]) 42 | return keys 43 | 44 | 45 | def s3_read(source: str, profile_name: Optional[str] = None) -> bytes: 46 | """ 47 | Read a file from an S3 source. 48 | 49 | Parameters 50 | ---------- 51 | source : str 52 | Path starting with s3://, e.g. 's3://bucket-name/key/foo.bar' 53 | profile_name : str, optional 54 | AWS profile 55 | 56 | Returns 57 | ------- 58 | content : bytes 59 | 60 | Raises 61 | ------ 62 | botocore.exceptions.NoCredentialsError 63 | Botocore is not able to find your credentials. Either specify 64 | profile_name or add the environment variables AWS_ACCESS_KEY_ID, 65 | AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN. 66 | See https://boto3.readthedocs.io/en/latest/guide/configuration.html 67 | """ 68 | session = boto3.session.Session(profile_name=profile_name) 69 | s3 = session.client("s3") 70 | bucket_name, key = _s3_path_split(source) 71 | s3_object = s3.get_object(Bucket=bucket_name, Key=key) 72 | body = s3_object["Body"] 73 | return body.read() 74 | 75 | 76 | class ExistsStrategy(enum.Enum): 77 | """Strategies what to do when a file already exists.""" 78 | 79 | RAISE = "raise" 80 | REPLACE = "replace" 81 | ABORT = "abort" 82 | 83 | 84 | def s3_download( 85 | source: str, 86 | destination: Optional[str] = None, 87 | exists_strategy: ExistsStrategy = ExistsStrategy.RAISE, 88 | profile_name: Optional[str] = None, 89 | ) -> Optional[str]: 90 | """ 91 | Copy a file from an S3 source to a local destination. 92 | 93 | Parameters 94 | ---------- 95 | source : str 96 | Path starting with s3://, e.g. 's3://bucket-name/key/foo.bar' 97 | destination : str, optional 98 | If none is given, a temporary file is created 99 | exists_strategy : {'raise', 'replace', 'abort'} 100 | What is done when the destination already exists? 101 | * `ExistsStrategy.RAISE` means a RuntimeError is raised, 102 | * `ExistsStrategy.REPLACE` means the local file is replaced, 103 | * `ExistsStrategy.ABORT` means the download is not done. 104 | profile_name : str, optional 105 | AWS profile 106 | 107 | Returns 108 | ------- 109 | download_path : Optional[str] 110 | Path of the downloaded file, if any was downloaded. 111 | 112 | Raises 113 | ------ 114 | botocore.exceptions.NoCredentialsError 115 | Botocore is not able to find your credentials. Either specify 116 | profile_name or add the environment variables AWS_ACCESS_KEY_ID, 117 | AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN. 118 | See https://boto3.readthedocs.io/en/latest/guide/configuration.html 119 | """ 120 | if not isinstance(exists_strategy, ExistsStrategy): 121 | raise ValueError( 122 | f"exists_strategy '{exists_strategy}' is not in {ExistsStrategy}" 123 | ) 124 | session = boto3.session.Session(profile_name=profile_name) 125 | s3 = session.resource("s3") 126 | bucket_name, key = _s3_path_split(source) 127 | if destination is None: 128 | _, filename = os.path.split(source) 129 | prefix, suffix = os.path.splitext(filename) 130 | _, destination = mkstemp(prefix=prefix, suffix=suffix) 131 | elif os.path.isfile(destination): 132 | if exists_strategy is ExistsStrategy.RAISE: 133 | raise RuntimeError(f"File '{destination}' already exists.") 134 | elif exists_strategy is ExistsStrategy.ABORT: 135 | return None 136 | s3.Bucket(bucket_name).download_file(key, destination) 137 | return destination 138 | 139 | 140 | def s3_upload( 141 | source: str, destination: str, profile_name: Optional[str] = None 142 | ) -> None: 143 | """ 144 | Copy a file from a local source to an S3 destination. 145 | 146 | Parameters 147 | ---------- 148 | source : str 149 | destination : str 150 | Path starting with s3://, e.g. 's3://bucket-name/key/foo.bar' 151 | profile_name : str, optional 152 | AWS profile 153 | """ 154 | session = boto3.session.Session(profile_name=profile_name) 155 | s3 = session.resource("s3") 156 | bucket_name, key = _s3_path_split(destination) 157 | with open(source, "rb") as data: 158 | s3.Bucket(bucket_name).put_object(Key=key, Body=data) 159 | 160 | 161 | S3Path = namedtuple("S3Path", ["bucket_name", "key"]) 162 | 163 | 164 | def _s3_path_split(s3_path: str) -> S3Path: 165 | """ 166 | Split an S3 path into bucket and key. 167 | 168 | Parameters 169 | ---------- 170 | s3_path : str 171 | 172 | Returns 173 | ------- 174 | splitted : S3Path 175 | 176 | Examples 177 | -------- 178 | >>> _s3_path_split('s3://my-bucket/foo/bar.jpg') 179 | S3Path(bucket_name='my-bucket', key='foo/bar.jpg') 180 | """ 181 | if not s3_path.startswith("s3://"): 182 | raise ValueError( 183 | f"s3_path is expected to start with 's3://', but was {s3_path}" 184 | ) 185 | bucket_key = s3_path[len("s3://") :] 186 | bucket_name, key = bucket_key.split("/", 1) 187 | return S3Path(bucket_name, key) 188 | -------------------------------------------------------------------------------- /requirements/dev.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This file is autogenerated by pip-compile with python 3.7 3 | # To update, run: 4 | # 5 | # pip-compile requirements/dev.in 6 | # 7 | attrs==21.4.0 8 | # via 9 | # -r requirements/ci.txt 10 | # hypothesis 11 | # pytest 12 | bleach==4.1.0 13 | # via 14 | # -r requirements/ci.txt 15 | # readme-renderer 16 | boto3==1.20.47 17 | # via 18 | # -r requirements/ci.txt 19 | # moto 20 | botocore==1.23.47 21 | # via 22 | # -r requirements/ci.txt 23 | # boto3 24 | # moto 25 | # s3transfer 26 | certifi==2021.10.8 27 | # via 28 | # -r requirements/ci.txt 29 | # requests 30 | cffi==1.15.0 31 | # via 32 | # -r requirements/ci.txt 33 | # cryptography 34 | cfgv==3.3.1 35 | # via pre-commit 36 | charset-normalizer==2.0.11 37 | # via 38 | # -r requirements/ci.txt 39 | # requests 40 | click==8.0.3 41 | # via 42 | # -r requirements/ci.txt 43 | # pip-tools 44 | colorama==0.4.4 45 | # via 46 | # -r requirements/ci.txt 47 | # twine 48 | coverage[toml]==6.3.1 49 | # via 50 | # -r requirements/ci.txt 51 | # pytest-cov 52 | cryptography==36.0.1 53 | # via 54 | # -r requirements/ci.txt 55 | # moto 56 | # secretstorage 57 | distlib==0.3.4 58 | # via virtualenv 59 | docutils==0.18.1 60 | # via 61 | # -r requirements/ci.txt 62 | # readme-renderer 63 | filelock==3.4.2 64 | # via virtualenv 65 | hypothesis==6.36.1 66 | # via -r requirements/ci.txt 67 | identify==2.4.7 68 | # via pre-commit 69 | idna==3.3 70 | # via 71 | # -r requirements/ci.txt 72 | # requests 73 | importlib-metadata==4.10.1 74 | # via 75 | # -r requirements/ci.txt 76 | # click 77 | # keyring 78 | # moto 79 | # pep517 80 | # pluggy 81 | # pre-commit 82 | # pytest 83 | # twine 84 | # virtualenv 85 | iniconfig==1.1.1 86 | # via 87 | # -r requirements/ci.txt 88 | # pytest 89 | jeepney==0.7.1 90 | # via 91 | # -r requirements/ci.txt 92 | # keyring 93 | # secretstorage 94 | jinja2==3.0.3 95 | # via 96 | # -r requirements/ci.txt 97 | # moto 98 | jmespath==0.10.0 99 | # via 100 | # -r requirements/ci.txt 101 | # boto3 102 | # botocore 103 | keyring==23.5.0 104 | # via 105 | # -r requirements/ci.txt 106 | # twine 107 | markupsafe==2.0.1 108 | # via 109 | # -r requirements/ci.txt 110 | # jinja2 111 | # moto 112 | moto==3.0.2 113 | # via -r requirements/ci.txt 114 | nodeenv==1.6.0 115 | # via pre-commit 116 | numpy==1.21.5 117 | # via 118 | # -r requirements/ci.txt 119 | # pandas 120 | packaging==21.3 121 | # via 122 | # -r requirements/ci.txt 123 | # bleach 124 | # pytest 125 | pandas==1.3.5 126 | # via -r requirements/ci.txt 127 | pep517==0.12.0 128 | # via 129 | # -r requirements/ci.txt 130 | # pip-tools 131 | pip-tools==6.4.0 132 | # via 133 | # -r requirements/ci.txt 134 | # -r requirements/dev.in 135 | pkginfo==1.8.2 136 | # via 137 | # -r requirements/ci.txt 138 | # twine 139 | platformdirs==2.4.1 140 | # via virtualenv 141 | pluggy==1.0.0 142 | # via 143 | # -r requirements/ci.txt 144 | # pytest 145 | pre-commit==2.17.0 146 | # via -r requirements/dev.in 147 | py==1.11.0 148 | # via 149 | # -r requirements/ci.txt 150 | # pytest 151 | py-cpuinfo==8.0.0 152 | # via 153 | # -r requirements/ci.txt 154 | # pytest-benchmark 155 | pycparser==2.21 156 | # via 157 | # -r requirements/ci.txt 158 | # cffi 159 | pygments==2.11.2 160 | # via 161 | # -r requirements/ci.txt 162 | # readme-renderer 163 | pyparsing==3.0.7 164 | # via 165 | # -r requirements/ci.txt 166 | # packaging 167 | pytest==6.2.5 168 | # via 169 | # -r requirements/ci.txt 170 | # pytest-benchmark 171 | # pytest-cov 172 | # pytest-timeout 173 | pytest-benchmark==3.4.1 174 | # via -r requirements/ci.txt 175 | pytest-cov==3.0.0 176 | # via -r requirements/ci.txt 177 | pytest-timeout==2.1.0 178 | # via -r requirements/ci.txt 179 | python-dateutil==2.8.2 180 | # via 181 | # -r requirements/ci.txt 182 | # botocore 183 | # moto 184 | # pandas 185 | pytz==2021.3 186 | # via 187 | # -r requirements/ci.txt 188 | # moto 189 | # pandas 190 | pyyaml==6.0 191 | # via pre-commit 192 | readme-renderer==32.0 193 | # via 194 | # -r requirements/ci.txt 195 | # twine 196 | requests==2.27.1 197 | # via 198 | # -r requirements/ci.txt 199 | # moto 200 | # requests-toolbelt 201 | # responses 202 | # twine 203 | requests-toolbelt==0.9.1 204 | # via 205 | # -r requirements/ci.txt 206 | # twine 207 | responses==0.18.0 208 | # via 209 | # -r requirements/ci.txt 210 | # moto 211 | rfc3986==2.0.0 212 | # via 213 | # -r requirements/ci.txt 214 | # twine 215 | s3transfer==0.5.1 216 | # via 217 | # -r requirements/ci.txt 218 | # boto3 219 | secretstorage==3.3.1 220 | # via 221 | # -r requirements/ci.txt 222 | # keyring 223 | simplejson==3.17.6 224 | # via -r requirements/ci.txt 225 | six==1.16.0 226 | # via 227 | # -r requirements/ci.txt 228 | # bleach 229 | # python-dateutil 230 | # virtualenv 231 | sortedcontainers==2.4.0 232 | # via 233 | # -r requirements/ci.txt 234 | # hypothesis 235 | toml==0.10.2 236 | # via 237 | # -r requirements/ci.txt 238 | # pre-commit 239 | # pytest 240 | tomli==2.0.0 241 | # via 242 | # -r requirements/ci.txt 243 | # coverage 244 | # pep517 245 | tqdm==4.62.3 246 | # via 247 | # -r requirements/ci.txt 248 | # twine 249 | twine==3.8.0 250 | # via -r requirements/ci.txt 251 | typing-extensions==4.0.1 252 | # via 253 | # -r requirements/ci.txt 254 | # importlib-metadata 255 | urllib3==1.26.8 256 | # via 257 | # -r requirements/ci.txt 258 | # botocore 259 | # requests 260 | # responses 261 | # twine 262 | virtualenv==20.13.0 263 | # via pre-commit 264 | webencodings==0.5.1 265 | # via 266 | # -r requirements/ci.txt 267 | # bleach 268 | werkzeug==2.0.2 269 | # via 270 | # -r requirements/ci.txt 271 | # moto 272 | wheel==0.37.1 273 | # via 274 | # -r requirements/ci.txt 275 | # -r requirements/dev.in 276 | # pip-tools 277 | xmltodict==0.12.0 278 | # via 279 | # -r requirements/ci.txt 280 | # moto 281 | zipp==3.7.0 282 | # via 283 | # -r requirements/ci.txt 284 | # importlib-metadata 285 | # pep517 286 | 287 | # The following packages are considered to be unsafe in a requirements file: 288 | # pip 289 | # setuptools 290 | -------------------------------------------------------------------------------- /mpu/math.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mathematical functions which are not adequately covered by standard libraries. 3 | 4 | Standard libraries are: 5 | 6 | * `math `_ 7 | * `scipy `_ 8 | * `sympy `_ 9 | 10 | """ 11 | 12 | # Core Library 13 | import math as math_stl 14 | import operator 15 | from functools import reduce 16 | from typing import Dict, Iterable, Iterator, List, Optional 17 | 18 | 19 | def generate_primes() -> Iterator[int]: 20 | """ 21 | Generate an infinite sequence of prime numbers. 22 | 23 | The algorithm was originally written by David Eppstein, UC Irvine. See: 24 | http://code.activestate.com/recipes/117119/ 25 | 26 | Examples 27 | -------- 28 | >>> g = generate_primes() 29 | >>> next(g) 30 | 2 31 | >>> next(g) 32 | 3 33 | >>> next(g) 34 | 5 35 | """ 36 | divisors: Dict[int, List[int]] = {} # map number to at least one divisor 37 | 38 | candidate = 2 # next potential prime 39 | 40 | while True: 41 | if candidate in divisors: 42 | # candidate is composite. divisors[candidate] is the list of primes 43 | # that divide it. Since we've reached candidate, we no longer need 44 | # it in the map, but we'll mark the next multiples of its witnesses 45 | # to prepare for larger numbers 46 | for p in divisors[candidate]: 47 | divisors.setdefault(p + candidate, []).append(p) 48 | del divisors[candidate] 49 | else: 50 | # candidate is a new prime 51 | yield candidate 52 | 53 | # mark its first multiple that isn't 54 | # already marked in previous iterations 55 | divisors[candidate * candidate] = [candidate] 56 | 57 | candidate += 1 58 | 59 | 60 | def factorize(number: int) -> List[int]: 61 | """ 62 | Get the prime factors of an integer except for 1. 63 | 64 | Parameters 65 | ---------- 66 | number : int 67 | 68 | Returns 69 | ------- 70 | primes : List[int] 71 | 72 | Examples 73 | -------- 74 | >>> factorize(-17) 75 | [-1, 17] 76 | >>> factorize(8) 77 | [2, 2, 2] 78 | >>> factorize(3**25) 79 | [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] 80 | >>> factorize(1) 81 | [1] 82 | """ 83 | if not isinstance(number, int): 84 | raise ValueError(f"integer expected, but type(number)={type(number)}") 85 | if number < 0: 86 | return [-1] + factorize(number * (-1)) 87 | elif number == 0: 88 | raise ValueError("All primes are prime factors of 0.") 89 | else: 90 | factors = [] 91 | factor = 2 92 | while number % factor == 0: 93 | factors.append(factor) 94 | number = number // factor 95 | if number == 1: 96 | if len(factors) > 0: 97 | return factors 98 | else: 99 | return [1] 100 | for factor in range(3, int(math_stl.ceil(number**0.5)) + 1, 2): 101 | if number % factor == 0: 102 | return factors + [factor] + factorize(number // factor) 103 | return factors + [number] 104 | 105 | 106 | def is_prime(number: int) -> bool: 107 | """ 108 | Check if a number is prime. 109 | 110 | Parameters 111 | ---------- 112 | number : int 113 | 114 | Returns 115 | ------- 116 | is_prime_number : bool 117 | 118 | Examples 119 | -------- 120 | >>> is_prime(-17) 121 | False 122 | >>> is_prime(17) 123 | True 124 | >>> is_prime(47055833459) 125 | True 126 | """ 127 | return len(factorize(number)) == 1 128 | 129 | 130 | def product(iterable: Iterable, start: int = 1) -> int: 131 | """ 132 | Calculate the product of the iterables. 133 | 134 | Parameters 135 | ---------- 136 | iterable : iterable 137 | List, tuple or similar which contains numbers 138 | start : number, optional (default: 1) 139 | 140 | Returns 141 | ------- 142 | product : number 143 | 144 | Examples 145 | -------- 146 | >>> product([1, 2, 3, 4, 5]) 147 | 120 148 | >>> product([]) 149 | 1 150 | """ 151 | return reduce(operator.mul, iterable, start) 152 | 153 | 154 | def argmax(iterable: Iterable) -> Optional[int]: 155 | """ 156 | Find the first index of the biggest value in the iterable. 157 | 158 | Parameters 159 | ---------- 160 | iterable : Iterable 161 | 162 | Returns 163 | ------- 164 | argmax : Optional[int] 165 | 166 | Examples 167 | -------- 168 | >>> argmax([0, 0, 0]) 169 | 0 170 | >>> argmax([1, 0, 0]) 171 | 0 172 | >>> argmax([0, 1, 0]) 173 | 1 174 | >>> argmax([]) 175 | """ 176 | max_value = None 177 | max_index = None 178 | for index, value in enumerate(iterable): 179 | if (max_value is None) or max_value < value: 180 | max_value = value 181 | max_index = index 182 | return max_index 183 | 184 | 185 | def round_up(x: float, decimal_places: int) -> float: 186 | """ 187 | Round a float up to decimal_places. 188 | 189 | Parameters 190 | ---------- 191 | x : float 192 | decimal_places : int 193 | 194 | Returns 195 | ------- 196 | rounded_float : float 197 | 198 | Examples 199 | -------- 200 | >>> round_up(1.2344, 3) 201 | 1.235 202 | >>> round_up(1.234, 3) 203 | 1.234 204 | >>> round_up(1.23456, 3) 205 | 1.235 206 | >>> round_up(1.23456, 2) 207 | 1.24 208 | """ 209 | return round(x + 5 * 10 ** (-1 * (decimal_places + 1)), decimal_places) 210 | 211 | 212 | def round_down(x: float, decimal_places: int) -> float: 213 | """ 214 | Round a float down to decimal_places. 215 | 216 | Parameters 217 | ---------- 218 | x : float 219 | decimal_places : int 220 | 221 | Returns 222 | ------- 223 | rounded_float : float 224 | 225 | Examples 226 | -------- 227 | >>> round_down(1.23456, 3) 228 | 1.234 229 | >>> round_down(1.23456, 2) 230 | 1.23 231 | """ 232 | d = int("1" + ("0" * decimal_places)) 233 | return math_stl.floor(x * d) / d 234 | 235 | 236 | def gcd(a: int, b: int) -> int: 237 | """ 238 | Calculate the greatest common divisor. 239 | 240 | Currently, this uses the Euclidean algorithm. 241 | 242 | Parameters 243 | ---------- 244 | a : int 245 | Non-zero 246 | b : int 247 | Non-zero 248 | 249 | Returns 250 | ------- 251 | greatest_common_divisor : int 252 | 253 | Examples 254 | -------- 255 | >>> gcd(1, 7) 256 | 1 257 | >>> gcd(-1, -1) 258 | 1 259 | >>> gcd(1337, 42) 260 | 7 261 | >>> gcd(-1337, -42) 262 | 7 263 | >>> gcd(120, 364) 264 | 4 265 | >>> gcd(273, 1870) 266 | 1 267 | """ 268 | if a == 0 or b == 0: 269 | raise ValueError(f"gcd(a={a}, b={b}) is undefined") 270 | while b != 0: 271 | a, b = b, a % b 272 | return abs(a) 273 | -------------------------------------------------------------------------------- /tests/test_units.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Test the mpu.units module.""" 4 | 5 | # Third party 6 | import pytest 7 | import simplejson # has for_json 8 | 9 | # First party 10 | from mpu.units import Currency, Money, get_currency 11 | 12 | 13 | def test_get_currency(): 14 | a = Money("0.1", "EUR") 15 | assert str(a) == "0.10 Euro" 16 | b = Money("0.1", "USD") 17 | assert str(b) == "0.10 US Dollar" 18 | with pytest.raises(ValueError): 19 | Money("0.1", "foobar") 20 | c = Money((1, 100), "EUR") 21 | d = Money(5, "ESP") 22 | assert str(c) == "0.01 Euro" 23 | assert repr(c) == "0.01 Euro" 24 | assert str(d) == "5.00 Spanish Peseta" 25 | with pytest.raises(ValueError): 26 | Money((5, 100, 42), "EUR") 27 | with pytest.raises(ValueError): 28 | Money(0.1, "EUR") 29 | non_currency = Money("0.1", None) 30 | assert str(non_currency) == "0.10" 31 | with pytest.raises(ValueError): 32 | Money(1, a) 33 | 34 | 35 | def test_currency_for_json(): 36 | usd = get_currency("USD") 37 | dump = simplejson.dumps(usd, for_json=True) 38 | dict_ = simplejson.loads(dump) 39 | undump = Currency.from_json(dict_) 40 | assert usd == undump 41 | 42 | 43 | def test_money_json_magic(): 44 | usd = Money("0.1", "USD") 45 | usd_dict = usd.__json__() 46 | dump = simplejson.dumps(usd_dict) 47 | dict_ = simplejson.loads(dump) 48 | undump = Money.from_json(dict_) 49 | assert usd == undump 50 | 51 | 52 | def test_money_json_magic_none(): 53 | usd = Money("0.1", None) 54 | usd_dict = usd.__json__() 55 | dump = simplejson.dumps(usd_dict) 56 | dict_ = simplejson.loads(dump) 57 | undump = Money.from_json(dict_) 58 | assert usd == undump 59 | 60 | 61 | def test_money_conversion_float(): 62 | """Test if one can convert Money instances to float.""" 63 | a = Money("1337.00", None) 64 | assert float(a) == 1337.0 65 | b = Money("42.00", "USD") 66 | assert float(b) == 42.0 67 | 68 | 69 | def test_money_floatingpoint_issue1(): 70 | """The test is the reason why one should not use float for money.""" 71 | a = Money("10.00", None) 72 | b = Money("1.2", None) 73 | assert str(a + b - a) == str(b) 74 | 75 | 76 | def test_money_floatingpoint_issue2(): 77 | """The test is the reason why one should not use float for money.""" 78 | a = Money("10.00", None) 79 | b = Money("1.2", None) 80 | assert str((a + b - a) * 10**14 - b * 10**14) == "0.00" 81 | 82 | 83 | def test_currency_operations(): 84 | a = Money("0.5", "EUR") 85 | aneg = Money("-0.5", "EUR") 86 | b = Money("0.1", "EUR") 87 | c = Money("0.1", "USD") 88 | d = Money("0.5", "EUR") 89 | assert (a == b) is False 90 | with pytest.raises(ValueError): 91 | a == 0.5 92 | assert a == d 93 | with pytest.raises(ValueError): 94 | a == c 95 | assert a != b 96 | assert (a != d) is False 97 | with pytest.raises(ValueError): 98 | a != c 99 | assert str(a - b) == "0.40 Euro" 100 | assert -a == aneg 101 | assert +a == a 102 | with pytest.raises(ValueError): 103 | a - c 104 | with pytest.raises(ValueError): 105 | a - 2 106 | with pytest.raises(ValueError): 107 | a - 2.0 108 | assert str(a + b) == "0.60 Euro" 109 | with pytest.raises(ValueError): 110 | a + c 111 | with pytest.raises(ValueError): 112 | a + 2 113 | with pytest.raises(ValueError): 114 | a + 2.0 115 | assert str(2 * a) == "1.00 Euro" 116 | assert str(a / b) == "5" 117 | with pytest.raises(ValueError): 118 | a / c 119 | with pytest.raises(ValueError): 120 | a * 3.141 121 | with pytest.raises(ValueError): 122 | 3.141 * a 123 | with pytest.raises(ValueError): 124 | a / "0.1" 125 | assert str(a / 2) == "0.25 Euro" 126 | 127 | 128 | def test_currency_comperators(): 129 | a = Money("0.5", "EUR") 130 | b = Money("0.1", "EUR") 131 | c = Money("0.5", "EUR") 132 | d = Money("0.5", "USD") 133 | assert a > b 134 | assert (a < b) is False 135 | assert a >= b 136 | assert (a <= b) is False 137 | assert (a > c) is False 138 | assert (a < c) is False 139 | assert a >= c 140 | assert a <= c 141 | 142 | with pytest.raises(ValueError): 143 | is_smaller = c < d 144 | with pytest.raises(ValueError): 145 | is_smaller = c < d 146 | with pytest.raises(ValueError): 147 | is_equal = c == d 148 | assert (c < 1) is False 149 | assert (c > 1) is False 150 | 151 | 152 | def test_currency(): 153 | eur = Currency( 154 | name="Euro", 155 | code="EUR", 156 | numeric_code=123, 157 | symbol="€", 158 | exponent=2, 159 | entities=["Germany"], 160 | withdrawal_date=None, 161 | subunits=2, 162 | ) 163 | usd = Currency( 164 | name="US Dollar", 165 | code="USD", 166 | numeric_code=456, 167 | symbol="$", 168 | exponent=2, 169 | entities=["United States of America"], 170 | withdrawal_date=None, 171 | subunits=2, 172 | ) 173 | repr(eur) 174 | assert (eur == usd) is False 175 | assert (eur == 2) is False 176 | assert eur != usd 177 | with pytest.raises(ValueError): 178 | Currency( 179 | name=2, 180 | code="EUR", 181 | numeric_code=123, 182 | symbol="€", 183 | exponent=2, 184 | entities=["Germany"], 185 | withdrawal_date=None, 186 | subunits=2, 187 | ) 188 | with pytest.raises(ValueError): 189 | Currency( 190 | name="Euro", 191 | code=2, 192 | numeric_code=123, 193 | symbol="€", 194 | exponent=2, 195 | entities=["Germany"], 196 | withdrawal_date=None, 197 | subunits=2, 198 | ) 199 | with pytest.raises(ValueError): 200 | Currency( 201 | name="Euro", 202 | code="EUR", 203 | numeric_code=123, 204 | symbol="€", 205 | exponent="2", 206 | entities=["Germany"], 207 | withdrawal_date=None, 208 | subunits=2, 209 | ) 210 | 211 | 212 | def test_formatting(): 213 | non_currency = Money("12.2", None) 214 | assert f"{non_currency}" == "12.20" 215 | assert f"{non_currency:0.2f,symbol}" == "12.20" 216 | assert f"{non_currency:0.2f,postsymbol}" == "12.20" 217 | assert f"{non_currency:0.2f,shortcode}" == "12.20" 218 | assert f"{non_currency:0.2f,postshortcode}" == "12.20" 219 | 220 | a = Money("12.20", "USD") 221 | assert f"{a}" == "12.20 USD" 222 | assert f"{a:0.2f,symbol}" == "$12.20" 223 | assert f"{a:0.2f,postsymbol}" == "12.20$" 224 | assert f"{a:0.2f,shortcode}" == "USD 12.20" 225 | assert f"{a:0.2f,postshortcode}" == "12.20 USD" 226 | 227 | 228 | def test_gt_other_currency_fail(): 229 | a = Money("12.45", "USD") 230 | b = Money("67.89", "EUR") 231 | with pytest.raises(ValueError) as exinfo: 232 | a > b 233 | error_msg = ( 234 | "Left has currency=US Dollar, right has currency=Euro. " 235 | "You need to convert to the same currency first." 236 | ) 237 | assert str(exinfo.value) == error_msg 238 | -------------------------------------------------------------------------------- /mpu/__init__.py: -------------------------------------------------------------------------------- 1 | """mpu: Martins Python Utilities.""" 2 | 3 | 4 | # Core Library 5 | import logging 6 | import math as math_stl 7 | import multiprocessing.pool 8 | import random 9 | import traceback 10 | from contextlib import closing 11 | from types import TracebackType 12 | from typing import Any, Callable, List, Optional, Tuple, TypeVar, Union 13 | 14 | # First party 15 | from mpu import io, shell, string, units # noqa 16 | from mpu._version import __version__ # noqa 17 | from mpu.type import Comparable 18 | 19 | T = TypeVar("T") 20 | 21 | 22 | def parallel_for( 23 | loop_function: Callable[[Any], T], 24 | parameters: List[Tuple[Any, ...]], 25 | nb_threads: int = 100, 26 | ) -> List[T]: 27 | """ 28 | Execute the loop body in parallel. 29 | 30 | .. note:: Race-Conditions 31 | Executing code in parallel can cause an error class called 32 | "race-condition". 33 | 34 | Parameters 35 | ---------- 36 | loop_function : Callable 37 | Python function which takes a tuple as input 38 | parameters : List[Tuple] 39 | Each element here should be executed in parallel. 40 | nb_threads : int (default: 100) 41 | The number of threads to use. 42 | 43 | Returns 44 | ------- 45 | return_values : list of return values 46 | """ 47 | with closing(multiprocessing.pool.ThreadPool(nb_threads)) as pool: 48 | return pool.map(loop_function, parameters) 49 | 50 | 51 | def clip( 52 | number: Union[int, float], 53 | lowest: Union[None, int, float] = None, 54 | highest: Union[None, int, float] = None, 55 | ) -> Union[int, float]: 56 | """ 57 | Clip a number to a given lowest / highest value. 58 | 59 | Parameters 60 | ---------- 61 | number : number 62 | lowest : number, optional 63 | highest : number, optional 64 | 65 | Returns 66 | ------- 67 | clipped_number : number 68 | 69 | Examples 70 | -------- 71 | >>> clip(42, lowest=0, highest=10) 72 | 10 73 | """ 74 | if lowest is not None: 75 | number = max(number, lowest) 76 | if highest is not None: 77 | number = min(number, highest) 78 | return number 79 | 80 | 81 | def consistent_shuffle(*lists: List[List[Any]]) -> Tuple[List[Any], ...]: 82 | """ 83 | Shuffle lists consistently. 84 | 85 | Parameters 86 | ---------- 87 | *lists 88 | Variable length number of lists 89 | 90 | Returns 91 | ------- 92 | shuffled_lists : tuple of lists 93 | All of the lists are shuffled consistently 94 | 95 | Examples 96 | -------- 97 | >>> import mpu, random; random.seed(8) 98 | >>> mpu.consistent_shuffle([1,2,3], ['a', 'b', 'c'], ['A', 'B', 'C']) 99 | ([3, 2, 1], ['c', 'b', 'a'], ['C', 'B', 'A']) 100 | """ 101 | LEN = len(lists[0]) 102 | if any(len(l) != LEN for l in lists): 103 | raise ValueError("All lists need to have the same length") 104 | perm = list(range(LEN)) 105 | random.shuffle(perm) 106 | lists = tuple([sublist[index] for index in perm] for sublist in lists) 107 | return lists 108 | 109 | 110 | class Location: 111 | """ 112 | Define a single point. 113 | 114 | Parameters 115 | ---------- 116 | latitude : float 117 | in [-90, 90] - from North to South 118 | longitude : float 119 | in [-180, 180] - from West to East 120 | """ 121 | 122 | MIN_LATITUDE = -90 123 | MAX_LATITUDE = 90 124 | MIN_LONGITUDE = -180 125 | MAX_LONGITUDE = 180 126 | 127 | def __init__(self, latitude: float, longitude: float): 128 | self.latitude = latitude 129 | self.longitude = longitude 130 | 131 | @property 132 | def latitude(self) -> float: 133 | """Getter for latitude.""" 134 | return self._latitude 135 | 136 | @latitude.setter 137 | def latitude(self, latitude: float) -> None: 138 | """Setter for latitude.""" 139 | if not (Location.MIN_LATITUDE <= latitude <= Location.MAX_LATITUDE): 140 | raise ValueError(f"latitude was {latitude}, but has to be in [-90, 90]") 141 | self._latitude = latitude 142 | 143 | @property 144 | def longitude(self) -> float: 145 | """Getter for longitude.""" 146 | return self._longitude 147 | 148 | @longitude.setter 149 | def longitude(self, longitude: float) -> None: 150 | """Setter for longitude.""" 151 | if not (Location.MIN_LONGITUDE <= longitude <= Location.MAX_LONGITUDE): 152 | raise ValueError(f"longitude was {longitude}, but has to be in [-180, 180]") 153 | self._longitude = longitude 154 | 155 | def get_google_maps_link(self) -> str: 156 | """Get a Google Maps link to this location.""" 157 | return f"https://www.google.com/maps/place/{self.latitude},{self.longitude}" 158 | 159 | def distance(self, there: "Location") -> float: 160 | """ 161 | Calculate the distance from this location to there. 162 | 163 | Parameters 164 | ---------- 165 | there : Location 166 | 167 | Returns 168 | ------- 169 | distance_in_m : float 170 | """ 171 | return haversine_distance( 172 | (self.latitude, self.longitude), (there.latitude, there.longitude) 173 | ) 174 | 175 | def __repr__(self) -> str: 176 | """Get an unambiguous representation.""" 177 | return f"Location({self.latitude}, {self.longitude})" 178 | 179 | __str__ = __repr__ 180 | 181 | 182 | def haversine_distance( 183 | origin: Tuple[float, float], destination: Tuple[float, float] 184 | ) -> float: 185 | """ 186 | Calculate the Haversine distance. 187 | 188 | Parameters 189 | ---------- 190 | origin : Tuple[float, float] 191 | (lat, long) 192 | destination : Tuple[float, float] 193 | (lat, long) 194 | 195 | Returns 196 | ------- 197 | distance_in_km : float 198 | 199 | Examples 200 | -------- 201 | >>> munich = (48.1372, 11.5756) 202 | >>> berlin = (52.5186, 13.4083) 203 | >>> round(haversine_distance(munich, berlin), 1) 204 | 504.2 205 | 206 | >>> new_york_city = (40.712777777778, -74.005833333333) # NYC 207 | >>> round(haversine_distance(berlin, new_york_city), 1) 208 | 6385.3 209 | """ 210 | lat1, lon1 = origin 211 | lat2, lon2 = destination 212 | if not (Location.MIN_LATITUDE <= lat1 <= Location.MAX_LATITUDE): 213 | raise ValueError(f"lat1={lat1:2.2f}, but must be in [-90,+90]") 214 | if not (Location.MIN_LATITUDE <= lat2 <= Location.MAX_LATITUDE): 215 | raise ValueError(f"lat2={lat2:2.2f}, but must be in [-90,+90]") 216 | if not (Location.MIN_LONGITUDE <= lon1 <= Location.MAX_LONGITUDE): 217 | raise ValueError(f"lon1={lat1:2.2f}, but must be in [-180,+180]") 218 | if not (Location.MIN_LONGITUDE <= lon2 <= Location.MAX_LONGITUDE): 219 | raise ValueError(f"lon1={lat1:2.2f}, but must be in [-180,+180]") 220 | radius = 6371 # km 221 | 222 | dlat = math_stl.radians(lat2 - lat1) 223 | dlon = math_stl.radians(lon2 - lon1) 224 | a = math_stl.sin(dlat / 2) * math_stl.sin(dlat / 2) + math_stl.cos( 225 | math_stl.radians(lat1) 226 | ) * math_stl.cos(math_stl.radians(lat2)) * math_stl.sin(dlon / 2) * math_stl.sin( 227 | dlon / 2 228 | ) 229 | c = 2 * math_stl.atan2(math_stl.sqrt(a), math_stl.sqrt(1 - a)) 230 | d = radius * c 231 | 232 | return d 233 | 234 | 235 | def is_in_interval( 236 | value: Comparable, 237 | min_value: Comparable, 238 | max_value: Comparable, 239 | name: str = "variable", 240 | ) -> None: 241 | """ 242 | Raise an exception if value is not in an interval. 243 | 244 | Parameters 245 | ---------- 246 | value : Comparable 247 | min_value : Comparable 248 | max_value : Comparable 249 | name : str 250 | Name of the variable to print in exception. 251 | """ 252 | if not (min_value <= value <= max_value): 253 | raise ValueError(f"{name}={value} is not in [{min_value}, {max_value}]") 254 | 255 | 256 | def exception_logging(exctype: Any, value: Any, tb: Optional[TracebackType]) -> None: 257 | """ 258 | Log exception by using the root logger. 259 | 260 | Use it as `sys.excepthook = exception_logging`. 261 | 262 | Parameters 263 | ---------- 264 | exctype : type 265 | value : NameError 266 | tb : traceback 267 | """ 268 | write_val = { 269 | "exception_type": str(exctype), 270 | "message": str(traceback.format_tb(tb, 10)), 271 | } 272 | logging.exception(str(write_val)) 273 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " epub3 to make an epub3" 36 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 37 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 38 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 39 | @echo " text to make text files" 40 | @echo " man to make manual pages" 41 | @echo " texinfo to make Texinfo files" 42 | @echo " info to make Texinfo files and run them through makeinfo" 43 | @echo " gettext to make PO message catalogs" 44 | @echo " changes to make an overview of all changed/added/deprecated items" 45 | @echo " xml to make Docutils-native XML files" 46 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 47 | @echo " linkcheck to check all external links for integrity" 48 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 49 | @echo " coverage to run coverage check of the documentation (if enabled)" 50 | @echo " dummy to check syntax errors of document sources" 51 | 52 | .PHONY: clean 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | .PHONY: html 57 | html: 58 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 61 | 62 | .PHONY: dirhtml 63 | dirhtml: 64 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 65 | @echo 66 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 67 | 68 | .PHONY: singlehtml 69 | singlehtml: 70 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 71 | @echo 72 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 73 | 74 | .PHONY: pickle 75 | pickle: 76 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 77 | @echo 78 | @echo "Build finished; now you can process the pickle files." 79 | 80 | .PHONY: json 81 | json: 82 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 83 | @echo 84 | @echo "Build finished; now you can process the JSON files." 85 | 86 | .PHONY: htmlhelp 87 | htmlhelp: 88 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 89 | @echo 90 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 91 | ".hhp project file in $(BUILDDIR)/htmlhelp." 92 | 93 | .PHONY: qthelp 94 | qthelp: 95 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 96 | @echo 97 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 98 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 99 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/mpu.qhcp" 100 | @echo "To view the help file:" 101 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/mpu.qhc" 102 | 103 | .PHONY: applehelp 104 | applehelp: 105 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 106 | @echo 107 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 108 | @echo "N.B. You won't be able to view it unless you put it in" \ 109 | "~/Library/Documentation/Help or install it in your application" \ 110 | "bundle." 111 | 112 | .PHONY: devhelp 113 | devhelp: 114 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 115 | @echo 116 | @echo "Build finished." 117 | @echo "To view the help file:" 118 | @echo "# mkdir -p $$HOME/.local/share/devhelp/mpu" 119 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/mpu" 120 | @echo "# devhelp" 121 | 122 | .PHONY: epub 123 | epub: 124 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 125 | @echo 126 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 127 | 128 | .PHONY: epub3 129 | epub3: 130 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 131 | @echo 132 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 133 | 134 | .PHONY: latex 135 | latex: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo 138 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 139 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 140 | "(use \`make latexpdf' here to do that automatically)." 141 | 142 | .PHONY: latexpdf 143 | latexpdf: 144 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 145 | @echo "Running LaTeX files through pdflatex..." 146 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 147 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 148 | 149 | .PHONY: latexpdfja 150 | latexpdfja: 151 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 152 | @echo "Running LaTeX files through platex and dvipdfmx..." 153 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 154 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 155 | 156 | .PHONY: text 157 | text: 158 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 159 | @echo 160 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 161 | 162 | .PHONY: man 163 | man: 164 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 165 | @echo 166 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 167 | 168 | .PHONY: texinfo 169 | texinfo: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo 172 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 173 | @echo "Run \`make' in that directory to run these through makeinfo" \ 174 | "(use \`make info' here to do that automatically)." 175 | 176 | .PHONY: info 177 | info: 178 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 179 | @echo "Running Texinfo files through makeinfo..." 180 | make -C $(BUILDDIR)/texinfo info 181 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 182 | 183 | .PHONY: gettext 184 | gettext: 185 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 186 | @echo 187 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 188 | 189 | .PHONY: changes 190 | changes: 191 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 192 | @echo 193 | @echo "The overview file is in $(BUILDDIR)/changes." 194 | 195 | .PHONY: linkcheck 196 | linkcheck: 197 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 198 | @echo 199 | @echo "Link check complete; look for any errors in the above output " \ 200 | "or in $(BUILDDIR)/linkcheck/output.txt." 201 | 202 | .PHONY: doctest 203 | doctest: 204 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 205 | @echo "Testing of doctests in the sources finished, look at the " \ 206 | "results in $(BUILDDIR)/doctest/output.txt." 207 | 208 | .PHONY: coverage 209 | coverage: 210 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 211 | @echo "Testing of coverage in the sources finished, look at the " \ 212 | "results in $(BUILDDIR)/coverage/python.txt." 213 | 214 | .PHONY: xml 215 | xml: 216 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 217 | @echo 218 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 219 | 220 | .PHONY: pseudoxml 221 | pseudoxml: 222 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 223 | @echo 224 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 225 | 226 | .PHONY: dummy 227 | dummy: 228 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 229 | @echo 230 | @echo "Build finished. Dummy builder generates no files." 231 | 232 | apidoc: 233 | sphinx-apidoc -o source/ ../mpu 234 | -------------------------------------------------------------------------------- /mpu/datastructures/trie/string_trie.py: -------------------------------------------------------------------------------- 1 | """Implementation of a trie which has multi-character strings as node elements.""" 2 | 3 | # Core Library 4 | import logging 5 | from typing import List, Set, Tuple 6 | 7 | # First party 8 | from mpu.datastructures.trie.base import AbstractTrie 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | class TrieNode: 14 | def __init__( 15 | self, 16 | value, 17 | is_word: bool = False, 18 | count: int = 0, 19 | children=None, 20 | freeze: bool = False, 21 | ): 22 | if children is None: 23 | children = set() 24 | self._value = value 25 | self.children: Set[TrieNode] = children 26 | self.is_word = is_word 27 | self.count = count 28 | self.is_frozen = freeze 29 | 30 | def get_subtrie( 31 | self, search_prefix: str, current_trie_node_prefix: str = "" 32 | ) -> Tuple[str, "TrieNode"]: 33 | """ 34 | Get the TrieNodes which represents the given prefix. 35 | 36 | If the search_prefix is not in the trie, return ("", EMPTY_NODE). 37 | The found_prefix is a prefix of search_prefix or equal to it. 38 | 39 | Parameters 40 | ---------- 41 | search_prefix : str 42 | current_trie_node_prefix : str, optional (default: "") 43 | 44 | Returns 45 | ------- 46 | found_prefix, subtrie: Tuple[str, TrieNode] 47 | """ 48 | if search_prefix == self._value[: len(search_prefix)]: 49 | # search_prefix is a prefix of the current node (or equal to it) 50 | return (current_trie_node_prefix, self) 51 | elif self._value == search_prefix[: len(self._value)]: 52 | # The current node is a prefix of the search_prefix 53 | remainder = search_prefix[len(self._value) :] 54 | children = sorted(self.children, key=lambda node: node._value) 55 | for child in children: 56 | if child._value == remainder[: len(child._value)]: 57 | new_prefix = current_trie_node_prefix + self._value 58 | return child.get_subtrie( 59 | remainder, current_trie_node_prefix=new_prefix 60 | ) 61 | elif remainder == child._value[: len(remainder)]: 62 | # The remainder is a prefix of the child 63 | return (current_trie_node_prefix, child) 64 | return ("", EMPTY_NODE) 65 | 66 | def push(self, value: str): 67 | if self.is_frozen: 68 | raise RuntimeError("The node is frozen. You may not edit it.") 69 | if value == self._value: 70 | logger.debug("The inserted value is the value of the current node") 71 | self.count += 1 72 | self.is_word = True 73 | return 74 | shared_prefix = get_shared_prefix(self._value, value) 75 | 76 | if len(value) == len(shared_prefix): 77 | logger.debug("The new value is a prefix of the current node") 78 | new_child = TrieNode( 79 | self._value[len(shared_prefix) :], 80 | is_word=self.is_word, 81 | count=self.count, 82 | children=self.children, 83 | ) 84 | self._value = shared_prefix 85 | self.count = 1 86 | self.is_word = True 87 | self.children = {new_child} 88 | elif len(shared_prefix) == len(self._value): 89 | logger.debug( 90 | f"The current node={self._value} is a prefix " 91 | f"of the new value={value}" 92 | ) 93 | # Do I have a child which also is a prefix of this? 94 | remainder = value[len(shared_prefix) :] 95 | for child_trie in self.children: 96 | if len(get_shared_prefix(child_trie._value, remainder)) > 0: 97 | child_trie.push(remainder) 98 | return 99 | trie_node = TrieNode(value[len(shared_prefix) :], is_word=True, count=1) 100 | self.children.add(trie_node) 101 | else: 102 | logger.debug(f"No shared prefix for {self._value} and {value}") 103 | # Current node will become its child 104 | old_data = TrieNode( 105 | self._value[len(shared_prefix) :], 106 | is_word=self.is_word, 107 | count=self.count, 108 | children=self.children, 109 | ) 110 | 111 | # New data 112 | new_data = TrieNode(value[len(shared_prefix) :], is_word=True, count=1) 113 | 114 | # Clean up current node 115 | self.is_word = False 116 | self.count = 0 117 | self._value = shared_prefix 118 | self.children = {old_data, new_data} 119 | 120 | def __iter__(self): 121 | self._iteration_queue: List[Tuple[TrieNode, str]] = [(self, "")] 122 | while self._iteration_queue: 123 | trie_node, prefix = self._iteration_queue.pop() 124 | for child in trie_node.children: 125 | self._iteration_queue.append((child, prefix + trie_node._value)) 126 | if trie_node.is_word: 127 | for _ in range(trie_node.count): 128 | yield prefix + trie_node._value 129 | 130 | def print(self, _indent: int = 0): 131 | string = "" 132 | if self.is_word: 133 | string += " " * _indent + self._value + "\n" 134 | children = sorted(self.children, key=lambda child: child._value) 135 | for i, child in enumerate(children): 136 | if i < len(self.children) - 1: 137 | string += child.print(_indent=_indent + 1) 138 | else: 139 | string += child.print(_indent=_indent + 1) 140 | return string 141 | 142 | def __str__(self): 143 | return f"TrieNode(value='{self._value}', nb_children='{len(self.children)}')" 144 | 145 | __repr__ = __str__ 146 | 147 | 148 | EMPTY_NODE = TrieNode(value="", is_word=False, count=0, freeze=True) 149 | 150 | 151 | class Trie(AbstractTrie): 152 | def __init__(self, container=None): 153 | if container is None: 154 | container = [] 155 | self._root = None 156 | self._length = 0 157 | for element in container: 158 | self.push(element) 159 | 160 | def __len__(self): 161 | return self._length 162 | 163 | def __contains__(self, element) -> bool: 164 | found_prefix, subtrie = self.get_subtrie(element) 165 | return subtrie.is_word and found_prefix + subtrie._value == element 166 | 167 | def autocomplete(self, prefix): 168 | found_prefix, subtrie = self.get_subtrie(prefix) 169 | for word in subtrie: 170 | yield found_prefix + word 171 | 172 | def get_subtrie( 173 | self, prefix 174 | ) -> Tuple[str, TrieNode]: # TODO: Should this be private? 175 | if self._root is None: 176 | return ("", EMPTY_NODE) 177 | return self._root.get_subtrie(prefix) 178 | 179 | def __iter__(self): 180 | self._iteration_index = -1 181 | self._child_values = [] 182 | if self._root is not None: 183 | self._child_values = [element for element in self._root] 184 | return self 185 | 186 | def __next__(self): 187 | """Return the next value from the Trie.""" 188 | self._iteration_index += 1 189 | if self._iteration_index < self._length: 190 | return self._child_values[self._iteration_index] 191 | raise StopIteration 192 | 193 | def push(self, element: str): 194 | if self._root is None: 195 | self._root = TrieNode(value=element, is_word=True, count=1) 196 | else: 197 | self._root.push(element) 198 | self._length += 1 199 | 200 | def print(self, print_stdout=True) -> str: 201 | string = "Trie\n" 202 | if self._root is not None: 203 | string += self._root.print() 204 | string = string.strip() 205 | if print_stdout: 206 | print(string) 207 | return string 208 | 209 | def __str__(self): 210 | return f"Trie(len={self._length}, {self._root})" 211 | 212 | __repr__ = __str__ 213 | 214 | 215 | def get_shared_prefix(word1: str, word2: str) -> str: 216 | """ 217 | Get the substring in the beginning of word1 and word2 which both share. 218 | 219 | Parameters 220 | ---------- 221 | word1 : str 222 | word2 : str 223 | 224 | Returns 225 | ------- 226 | shared_prefix : str 227 | 228 | Examples 229 | -------- 230 | >>> get_shared_prefix("foo", "bar") 231 | '' 232 | >>> get_shared_prefix("foobar", "bar") 233 | '' 234 | >>> get_shared_prefix("foobar", "foo") 235 | 'foo' 236 | >>> get_shared_prefix("steamship", "steampowered") 237 | 'steam' 238 | """ 239 | shared_prefix = "" 240 | for char1, char2 in zip(word1, word2): 241 | if char1 == char2: 242 | shared_prefix += char1 243 | else: 244 | break 245 | return shared_prefix 246 | -------------------------------------------------------------------------------- /mpu/pd.py: -------------------------------------------------------------------------------- 1 | """Pandas utility functions.""" 2 | 3 | # Core Library 4 | import datetime as dt 5 | import logging 6 | from typing import Any, Dict, List, Optional, Tuple 7 | 8 | # Third party 9 | import pandas as pd 10 | import pkg_resources 11 | 12 | # First party 13 | import mpu.shell 14 | 15 | countries_file = pkg_resources.resource_filename("mpu", "data/countries.csv") 16 | countries = pd.read_csv(countries_file) 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | def example_df() -> pd.DataFrame: 21 | """Create an example dataframe.""" 22 | country_names = ["Germany", "France", "Indonesia", "Ireland", "Spain", "Vatican"] 23 | population = [82521653, 66991000, 255461700, 4761865, 46549045, None] 24 | population_time = [ 25 | dt.datetime(2016, 12, 1), 26 | dt.datetime(2017, 1, 1), 27 | dt.datetime(2017, 1, 1), 28 | None, # Ireland 29 | dt.datetime(2017, 6, 1), # Spain 30 | None, 31 | ] 32 | euro = [True, True, False, True, True, True] 33 | df = pd.DataFrame( 34 | { 35 | "country": country_names, 36 | "population": population, 37 | "population_time": population_time, 38 | "EUR": euro, 39 | } 40 | ) 41 | df = df[["country", "population", "population_time", "EUR"]] 42 | return df 43 | 44 | 45 | def describe(df: pd.DataFrame, dtype: Optional[Dict] = None) -> Dict: 46 | """ 47 | Print a description of a Pandas dataframe. 48 | 49 | Parameters 50 | ---------- 51 | df : pd.DataFrame 52 | dtype : Optional[Dict] 53 | Maps column names to types 54 | """ 55 | if dtype is None: 56 | dtype = {} 57 | print(f"Number of datapoints: {len(df)}") 58 | column_info, column_info_meta = _get_column_info(df, dtype) 59 | 60 | if len(column_info["int"]) > 0: 61 | _describe_int(df, column_info) 62 | 63 | if len(column_info["float"]) > 0: 64 | _describe_float(df, column_info) 65 | 66 | if len(column_info["category"]) > 0: 67 | _describe_category(df, column_info, column_info_meta) 68 | 69 | if len(column_info["time"]) > 0: 70 | _describe_time(df, column_info, column_info_meta) 71 | 72 | if len(column_info["other"]) > 0: 73 | _describe_other(df, column_info, column_info_meta) 74 | 75 | column_types = {} 76 | for column_type, columns in column_info.items(): 77 | for column_name in columns: 78 | if column_type == "other": 79 | column_type = "str" 80 | column_types[column_name] = column_type 81 | return column_types 82 | 83 | 84 | def _get_column_info(df: pd.DataFrame, dtype: Dict[str, str]) -> Tuple[Dict, Dict]: 85 | column_info: Dict[str, List[str]] = { 86 | "int": [], 87 | "float": [], 88 | "category": [], 89 | "other": [], 90 | "time": [], 91 | } 92 | float_types = ["float64"] 93 | integer_types = ["int64", "uint8"] 94 | time_types = ["datetime64[ns]"] 95 | other_types = ["object", "category"] 96 | column_info_meta: Dict[str, Dict[str, Any]] = {} 97 | for column_name in df: 98 | column_info_meta[column_name] = {} 99 | counter_obj = df[column_name].value_counts() 100 | value_list = [ 101 | key 102 | for key, value in sorted( 103 | counter_obj.items(), key=lambda n: (str(n[1]), str(n[0])) 104 | ) 105 | ] 106 | value_count = len(value_list) 107 | is_suspicious_cat = ( 108 | value_count <= 50 109 | and str(df[column_name].dtype) != "category" 110 | and column_name not in dtype 111 | ) 112 | if is_suspicious_cat: 113 | logger.warning( 114 | f"Column '{column_name}' has only {value_count} different " 115 | f"values ({value_list}). " 116 | "You might want to make it a 'category'" 117 | ) 118 | if len(value_list) > 0: 119 | top_count_val = counter_obj.tolist()[0] 120 | else: 121 | top_count_val = None 122 | column_info_meta[column_name]["top_count_val"] = top_count_val 123 | column_info_meta[column_name]["value_list"] = value_list 124 | column_info_meta[column_name]["value_count"] = value_count 125 | is_int_type = ( 126 | df[column_name].dtype in integer_types 127 | or column_name in dtype 128 | and dtype[column_name] in integer_types 129 | ) 130 | is_float_type = ( 131 | df[column_name].dtype in float_types 132 | or column_name in dtype 133 | and dtype[column_name] in float_types 134 | ) 135 | is_cat_type = ( 136 | str(df[column_name].dtype) in ["category", "bool"] 137 | or column_name in dtype 138 | and dtype[column_name] in ["category", "bool"] 139 | ) 140 | is_time_type = str(df[column_name].dtype) in time_types 141 | is_other_type = ( 142 | str(df[column_name].dtype) in other_types 143 | or column_name in dtype 144 | and dtype[column_name] in other_types 145 | ) 146 | if is_int_type: 147 | column_info["int"].append(column_name) 148 | elif is_float_type: 149 | column_info["float"].append(column_name) 150 | elif is_cat_type: 151 | column_info["category"].append(column_name) 152 | elif is_other_type: 153 | column_info["other"].append(column_name) 154 | elif is_time_type: 155 | column_info["time"].append(column_name) 156 | else: 157 | logger.warning( 158 | f"mpu.pd.describe does not know type '{df[column_name].dtype}'" 159 | ) 160 | return column_info, column_info_meta 161 | 162 | 163 | def _describe_int(df: pd.DataFrame, column_info: Dict) -> None: 164 | print("\n## Integer Columns") 165 | table = [ 166 | ["Column name", "Non-nan", "mean", "std", "min", "25%", "50%", "75%", "max"] 167 | ] 168 | for column_name in column_info["int"]: 169 | row = [] 170 | row.append(column_name) 171 | row.append(sum(df[column_name].notnull())) 172 | row.append(df[column_name].mean()) 173 | row.append(df[column_name].std()) 174 | row.append(df[column_name].min()) 175 | row.append(df[column_name].quantile(0.25)) 176 | row.append(df[column_name].quantile(0.50)) 177 | row.append(df[column_name].quantile(0.75)) 178 | row.append(max(df[column_name])) 179 | table.append(row) 180 | mpu.shell.print_table(table) 181 | 182 | 183 | def _describe_float(df: pd.DataFrame, column_info: Dict) -> None: 184 | print("\n## Float Columns") 185 | table = [ 186 | ["Column name", "Non-nan", "mean", "std", "min", "25%", "50%", "75%", "max"] 187 | ] 188 | for column_name in column_info["float"]: 189 | row = [] 190 | row.append(column_name) 191 | row.append(sum(df[column_name].notnull())) 192 | row.append(f"{df[column_name].mean():0.2f}") 193 | row.append(f"{df[column_name].std():0.2f}") 194 | row.append(f"{df[column_name].min():0.2f}") 195 | row.append(f"{df[column_name].quantile(0.25):0.2f}") 196 | row.append(f"{df[column_name].quantile(0.50):0.2f}") 197 | row.append(f"{df[column_name].quantile(0.75):0.2f}") 198 | row.append(f"{max(df[column_name]):0.2f}") 199 | table.append(row) 200 | mpu.shell.print_table(table) 201 | 202 | 203 | def _describe_category( 204 | df: pd.DataFrame, column_info: Dict, column_info_meta: Dict 205 | ) -> None: 206 | print("\n## Category Columns") 207 | table = [["Column name", "Non-nan", "unique", "top el", "top (count)", "rest"]] 208 | for column_name in column_info["category"]: 209 | row = [] 210 | row.append(column_name) 211 | row.append(sum(df[column_name].notnull())) 212 | row.append(len(df[column_name].unique())) 213 | row.append(column_info_meta[column_name]["value_list"][0]) 214 | row.append(column_info_meta[column_name]["top_count_val"]) 215 | rest = str(column_info_meta[column_name]["value_list"][1:])[:40] 216 | row.append(rest) 217 | table.append(row) 218 | mpu.shell.print_table(table) 219 | 220 | 221 | def _describe_time(df: pd.DataFrame, column_info: Dict, column_info_meta: Dict) -> None: 222 | print("\n## Time Columns") 223 | table = [ 224 | ["Column name", "Non-nan", "unique", "top el", "top (count)", "min", "max"] 225 | ] 226 | for column_name in column_info["time"]: 227 | row = [] 228 | row.append(column_name) 229 | row.append(sum(df[column_name].notnull())) 230 | row.append(len(df[column_name].unique())) 231 | row.append(column_info_meta[column_name]["value_list"][0]) 232 | row.append(column_info_meta[column_name]["top_count_val"]) 233 | row.append(df[column_name].min()) 234 | row.append(df[column_name].max()) 235 | table.append(row) 236 | mpu.shell.print_table(table) 237 | 238 | 239 | def _describe_other( 240 | df: pd.DataFrame, column_info: Dict, column_info_meta: Dict 241 | ) -> None: 242 | print("\n## Other Columns") 243 | table = [["Column name", "Non-nan", "unique", "top", "(count)", "rest"]] 244 | for column_name in column_info["other"]: 245 | row = [] 246 | row.append(column_name) 247 | row.append(sum(df[column_name].notnull())) 248 | row.append(len(df[column_name].unique())) 249 | row.append(column_info_meta[column_name]["value_list"][0]) 250 | row.append(column_info_meta[column_name]["top_count_val"]) 251 | rest = str(column_info_meta[column_name]["value_list"][1:])[:40] 252 | row.append(rest) 253 | table.append(row) 254 | mpu.shell.print_table(table) 255 | -------------------------------------------------------------------------------- /tests/test_io.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """Test the mpu.io module.""" 4 | 5 | # Core Library 6 | import datetime 7 | import os 8 | import sys 9 | from unittest import mock 10 | 11 | # Third party 12 | import pkg_resources 13 | import pytest 14 | 15 | # First party 16 | import mpu.io 17 | from mpu.io import ( 18 | _write_jsonl, 19 | download, 20 | get_file_meta, 21 | gzip_file, 22 | read, 23 | urlread, 24 | write, 25 | ) 26 | 27 | 28 | def test_download_with_path(jpg_tempfile): 29 | source = ( 30 | "https://upload.wikimedia.org/wikipedia/commons/e/e9/" 31 | "Aurelia-aurita-3-1-style.jpg" 32 | ) 33 | download(source, jpg_tempfile) 34 | assert os.path.getsize(jpg_tempfile) == 116087 35 | 36 | 37 | def test_get_file_meta(): 38 | path = "files/example.json" 39 | source = pkg_resources.resource_filename(__name__, path) 40 | with mock.patch.dict(sys.modules, {"magic": None}): 41 | meta = get_file_meta(source) 42 | meta["filepath"] = None 43 | meta["last_access_datetime"] = None 44 | meta["modification_datetime"] = None 45 | 46 | # Exists on Windows, but not on Linux 47 | meta["creation_datetime"] = None 48 | 49 | expected = { 50 | "filepath": None, 51 | "creation_datetime": None, 52 | "last_access_datetime": None, 53 | "modification_datetime": None, 54 | } 55 | assert meta == expected 56 | 57 | 58 | def test_urlread(): 59 | url = "http://example.com" 60 | sample = urlread(url) 61 | assert sample.startswith("") 62 | 63 | 64 | def test_download_without_path(): 65 | source = ( 66 | "https://upload.wikimedia.org/wikipedia/commons/e/e9/" 67 | "Aurelia-aurita-3-1-style.jpg" 68 | ) 69 | sink = download(source) 70 | download(source, sink) 71 | assert os.path.getsize(sink) == 116087 72 | os.remove(sink) # cleanup of mkstemp 73 | 74 | 75 | def test_read_csv(): 76 | path = "files/example.csv" 77 | source = pkg_resources.resource_filename(__name__, path) 78 | data_real = read(source) 79 | data_exp = [ 80 | ["a", "b", "c"], # 0 81 | ["1", "A towel,", "1.0"], # 1 82 | ["42", " it says, ", "2.0"], # 2 83 | ["1337", "is about the most ", "-1"], # 3 84 | ["0", "massively useful thing ", "123"], # 4 85 | ["-2", "an interstellar hitchhiker can have.\n", "3"], # 5 86 | ["3.141", "Special char test: €üößł", "2.7"], # 6 87 | ] 88 | assert len(data_real) == len(data_exp) 89 | assert data_real[0] == data_exp[0] 90 | assert data_real[1] == data_exp[1] 91 | assert data_real[2] == data_exp[2] 92 | assert data_real[3] == data_exp[3] 93 | assert data_real[4] == data_exp[4] 94 | assert data_real[5] == data_exp[5] 95 | assert data_real[6] == data_exp[6] 96 | assert data_real == data_exp 97 | data_real = read(source, skiprows=1) 98 | assert data_real == data_exp[1:] 99 | data_real = read(source, skiprows=1, delimiter=",", quotechar='"') 100 | assert data_real == data_exp[1:] 101 | 102 | 103 | def test_read_csv_dicts(): 104 | path = "files/example.csv" 105 | source = pkg_resources.resource_filename(__name__, path) 106 | data_real = read(source, format="dicts") 107 | data_exp = [ 108 | {"a": "1", "b": "A towel,", "c": "1.0"}, 109 | {"a": "42", "b": " it says, ", "c": "2.0"}, 110 | {"a": "1337", "b": "is about the most ", "c": "-1"}, 111 | {"a": "0", "b": "massively useful thing ", "c": "123"}, 112 | {"a": "-2", "b": "an interstellar hitchhiker can have.\n", "c": "3"}, 113 | {"a": "3.141", "b": "Special char test: €üößł", "c": "2.7"}, 114 | ] 115 | assert len(data_real) == len(data_exp) 116 | assert data_real[0] == data_exp[0] 117 | assert data_real == data_exp 118 | 119 | 120 | def test_write_csv(csv_tempfile): 121 | newline = "\n" 122 | data = [ 123 | ["1", "A towel,", "1.0"], 124 | ["42", " it says, ", "2.0"], 125 | ["1337", "is about the most ", "-1"], 126 | ["0", "massively useful thing ", "123"], 127 | ["-2", "an interstellar hitchhiker can have.\n", "3"], 128 | ] 129 | write(csv_tempfile, data, newline=newline) 130 | data_read = read(csv_tempfile, newline=newline) 131 | assert data == data_read 132 | 133 | 134 | def test_write_h5(hdf5_tempfile): 135 | data = [ 136 | ["1", "A towel,", "1.0"], 137 | ["42", " it says, ", "2.0"], 138 | ["1337", "is about the most ", "-1"], 139 | ["0", "massively useful thing ", "123"], 140 | ["-2", "an interstellar hitchhiker can have.\n", "3"], 141 | ] 142 | with pytest.raises(NotImplementedError): 143 | write(hdf5_tempfile, data) 144 | 145 | 146 | def test_write_csv_params(csv_tempfile): 147 | data = [ 148 | ["1", "A towel,", "1.0"], 149 | ["42", " it says, ", "2.0"], 150 | ["1337", "is about the most ", "-1"], 151 | ["0", "massively useful thing ", "123"], 152 | ["-2", "an interstellar hitchhiker can have.\n", "3"], 153 | ] 154 | newline = "\n" 155 | write(csv_tempfile, data, delimiter=",", quotechar='"', newline=newline) 156 | data_read = read(csv_tempfile, delimiter=",", quotechar='"', newline=newline) 157 | assert data == data_read 158 | 159 | 160 | def test_read_hdf5(): 161 | path = "files/example.hdf5" 162 | source = pkg_resources.resource_filename(__name__, path) 163 | with pytest.raises(NotImplementedError): 164 | read(source) 165 | 166 | 167 | def test_read_json(): 168 | path = "files/example.json" 169 | source = pkg_resources.resource_filename(__name__, path) 170 | data_real = read(source) 171 | 172 | data_exp = { 173 | "a list": [1, 42, 3.141, 1337, "help", "€"], 174 | "a string": "bla", 175 | "another dict": {"foo": "bar", "key": "value", "the answer": 42}, 176 | } 177 | assert data_real == data_exp 178 | 179 | 180 | def test_read_jsonl(): 181 | path = "files/example.jsonl" 182 | source = pkg_resources.resource_filename(__name__, path) 183 | data_real = read(source) 184 | data_exp = [ 185 | {"some": "thing"}, 186 | {"foo": 17, "bar": False, "quux": True}, 187 | {"may": {"include": "nested", "objects": ["and", "arrays"]}}, 188 | ] 189 | assert len(data_real) == len(data_exp) 190 | for real, exp_ in zip(data_real, data_exp): 191 | assert real == exp_ 192 | 193 | 194 | def test_read_pickle(): 195 | path = "files/example.pickle" 196 | source = pkg_resources.resource_filename(__name__, path) 197 | data_real = read(source) 198 | 199 | data_exp = { 200 | "a list": [1, 42, 3.141, 1337, "help", "€"], 201 | "a string": "bla", 202 | "another dict": {"foo": "bar", "key": "value", "the answer": 42}, 203 | } 204 | assert data_real == data_exp 205 | 206 | 207 | def test_write_json(json_tempfile): 208 | data = { 209 | "a list": [1, 42, 3.141, 1337, "help", "€"], 210 | "a string": "bla", 211 | "another dict": {"foo": "bar", "key": "value", "the answer": 42}, 212 | } 213 | write(json_tempfile, data) 214 | data_read = read(json_tempfile) 215 | assert data == data_read 216 | 217 | 218 | def test_write_jsonl(jsonl_tempfile): 219 | data = [ 220 | {"some": "thing"}, 221 | {"foo": 17, "bar": False, "quux": True}, 222 | {"may": {"include": "nested", "objects": ["and", "arrays"]}}, 223 | ] 224 | write(jsonl_tempfile, data) 225 | data_read = read(jsonl_tempfile) 226 | assert data == data_read 227 | 228 | 229 | def test_write_jsonl_all_params(jsonl_tempfile): 230 | data = [ 231 | {"some": "thing"}, 232 | {"foo": 17, "bar": False, "quux": True}, 233 | {"may": {"include": "nested", "objects": ["and", "arrays"]}}, 234 | ] 235 | _write_jsonl( 236 | jsonl_tempfile, 237 | data, 238 | kwargs={"sort_keys": True, "separators": (",", ": "), "ensure_ascii": True}, 239 | ) 240 | data_read = read(jsonl_tempfile) 241 | assert data == data_read 242 | 243 | 244 | def test_write_json_params(json_tempfile): 245 | data = { 246 | "a list": [1, 42, 3.141, 1337, "help", "€"], 247 | "a string": "bla", 248 | "another dict": {"foo": "bar", "key": "value", "the answer": 42}, 249 | } 250 | write( 251 | json_tempfile, 252 | data, 253 | indent=4, 254 | sort_keys=True, 255 | separators=(",", ":"), 256 | ensure_ascii=False, 257 | ) 258 | data_read = read(json_tempfile) 259 | assert data == data_read 260 | 261 | 262 | def test_write_pickle(pickle_tempfile): 263 | data = { 264 | "a list": [1, 42, 3.141, 1337, "help", "€"], 265 | "a string": "bla", 266 | "another dict": {"foo": "bar", "key": "value", "the answer": 42}, 267 | } 268 | write(pickle_tempfile, data) 269 | data_read = read(pickle_tempfile) 270 | assert data == data_read 271 | 272 | 273 | def test_write_pickle_protocol(pickle_tempfile): 274 | data = { 275 | "a list": [1, 42, 3.141, 1337, "help", "€"], 276 | "a string": "bla", 277 | "another dict": {"foo": "bar", "key": "value", "the answer": 42}, 278 | } 279 | write(pickle_tempfile, data, protocol=0) 280 | data_read = read(pickle_tempfile) 281 | assert data == data_read 282 | 283 | 284 | def test_read_h5(): 285 | source = pkg_resources.resource_filename("mpu", "io.py") 286 | with pytest.raises(NotImplementedError): 287 | read(source) 288 | 289 | 290 | def test_gzip(pickle_tempfile): 291 | path = "files/example.csv" 292 | source = pkg_resources.resource_filename(__name__, path) 293 | gzip_file(source, pickle_tempfile) 294 | 295 | 296 | def test_hash(): 297 | path = "files/example.pickle" 298 | source = pkg_resources.resource_filename(__name__, path) 299 | assert mpu.io.hash(source) == "e845794fde22e7a33dd389ed0f5381ae042154c1" 300 | expected_hash_md5 = "c59db499d09531a5937c2ae2342cb18b" 301 | assert mpu.io.hash(source, method="md5") == expected_hash_md5 302 | 303 | 304 | def test_get_creation_datetime(): 305 | ret_val = mpu.io.get_creation_datetime(__file__) 306 | assert isinstance(ret_val, datetime.datetime) or ret_val is None 307 | 308 | 309 | def test_get_creation_datetime_windows(): 310 | with mock.patch("platform.system", mock.MagicMock(return_value="Windows")): 311 | ret_val = mpu.io.get_creation_datetime(__file__) 312 | assert isinstance(ret_val, datetime.datetime) or ret_val is None 313 | 314 | 315 | def test_get_modification_datetime(): 316 | ret_val = mpu.io.get_modification_datetime(__file__) 317 | assert isinstance(ret_val, datetime.datetime) 318 | 319 | 320 | def test_get_access_datetime(): 321 | ret_val = mpu.io.get_access_datetime(__file__) 322 | assert isinstance(ret_val, datetime.datetime) 323 | -------------------------------------------------------------------------------- /mpu/geometry.py: -------------------------------------------------------------------------------- 1 | """ 2 | Create and manipulate two-dimensional geometrical entities such as lines. 3 | 4 | For more advanced use cases, see: 5 | 6 | * `sympy.geometry `_ 7 | * `Shapely `_ 8 | """ 9 | 10 | from __future__ import annotations 11 | 12 | # Core Library 13 | import math 14 | from typing import Any, cast 15 | 16 | # First party 17 | from mpu.datastructures import Interval 18 | 19 | EPSILON = 0.000001 20 | FULL_ROTATION = 360 21 | 22 | 23 | class Point: 24 | """ 25 | A point in a 2-dimensional Euclidean space. 26 | 27 | Parameters 28 | ---------- 29 | x : float 30 | y : float 31 | """ 32 | 33 | def __init__(self, x: float, y: float): 34 | self.x = x 35 | self.y = y 36 | 37 | def __str__(self) -> str: 38 | return f"({self.x}|{self.y})" 39 | 40 | __repr__ = __str__ 41 | 42 | def __eq__(self, other: Any) -> bool: 43 | if not isinstance(other, Point): 44 | return False 45 | return self.x == other.x and self.y == other.y 46 | 47 | def __hash__(self) -> int: 48 | return hash((self.x, self.y)) 49 | 50 | def simplify(self) -> Point: 51 | return self 52 | 53 | 54 | class LineSegment: 55 | """ 56 | A line segment a a 2-dimensional Euclidean space. 57 | 58 | Parameters 59 | ---------- 60 | p1 : Point 61 | p2 : Point 62 | """ 63 | 64 | def __init__(self, p1: Point, p2: Point, name: str = "LineSegment"): 65 | self.p1 = p1 66 | self.p2 = p2 67 | self.name = name 68 | 69 | def length(self) -> float: 70 | """Get the length of this line segment.""" 71 | return ((self.p1.x - self.p2.x) ** 2 + (self.p1.y - self.p2.y) ** 2) ** 0.5 72 | 73 | def is_point(self) -> bool: 74 | """Check if this LineSegment is a point.""" 75 | return self.p1 == self.p2 76 | 77 | def angle(self) -> float: 78 | """Get the angle of this line.""" 79 | dx = self.p2.x - self.p1.x 80 | dy = self.p2.y - self.p1.y 81 | theta = math.atan2(dy, dx) 82 | angle = math.degrees(theta) # angle is in (-180, 180] 83 | if angle < 0: 84 | angle = FULL_ROTATION + angle 85 | return angle 86 | 87 | def _get_equation_parameters(self) -> tuple[float, float]: 88 | """ 89 | Get the slope and the intercept of a line. 90 | 91 | y1 = m*x1 + t 92 | y2 = m*x2 + t 93 | => y1 = m*x1 + (y2-m*x2) 94 | <=> m = (y1 - y2) /(x1-x2) 95 | t = y1 - m*x1 96 | """ 97 | if self.p1.x == self.p2.x: 98 | raise ValueError("The given points have the same x-coordinate") 99 | 100 | y1 = self.p1.y 101 | y2 = self.p2.y 102 | x1 = self.p1.x 103 | x2 = self.p2.x 104 | m = (y1 - y2) / (x1 - x2) 105 | t = y1 - m * x1 106 | return m, t 107 | 108 | def simplify(self) -> Point | LineSegment: 109 | """Simplify this line segment to a point, if possible.""" 110 | if self.is_point(): 111 | return self.p1 112 | if self.p1.x > self.p2.x: 113 | return LineSegment(p1=self.p2, p2=self.p1) 114 | else: 115 | return self 116 | 117 | def intersect(self, other: LineSegment) -> None | LineSegment | Point: 118 | """ 119 | Get the intersection between this LineSegment and another LineSegment. 120 | 121 | Parameters 122 | ---------- 123 | other : LineSegment 124 | 125 | Returns 126 | ------- 127 | intersection : None | LineSegment | Point 128 | """ 129 | if not do_lines_intersect(self, other): 130 | return None 131 | if self.is_point(): 132 | p1 = self.simplify() 133 | return p1 # we know they intersect 134 | elif other.is_point(): 135 | return other.intersect(self) 136 | elif self.angle() == other.angle(): 137 | # The overlap is a line segment or a point! 138 | if self.angle() in [90, 270]: 139 | # The line segment is not a function 140 | x = self.p1.x 141 | return _get_straight_line_intersection( 142 | x, other.p1.y, other.p2.y, self.p1.y, self.p2.y 143 | ) 144 | else: 145 | # The LineSegment is a function 146 | x_start = max(min(self.p1.x, self.p2.x), min(other.p1.x, other.p2.x)) 147 | x_end = min(max(self.p1.x, self.p2.x), max(other.p1.x, other.p2.x)) 148 | m, t = self._get_equation_parameters() 149 | p1 = Point(x_start, m * x_start + t) 150 | p2 = Point(x_end, m * x_end + t) 151 | return LineSegment(p1, p2) 152 | else: 153 | # We know that we have to real line segments, that those intersect 154 | # and that their angle is different. Hence the return value 155 | # must be a point 156 | if self.angle() in [90, 270]: 157 | x = self.p1.x 158 | 159 | if other.angle() in [90, 270]: 160 | return _get_straight_line_intersection( 161 | x, other.p1.y, other.p2.y, self.p1.y, self.p2.y 162 | ) 163 | else: 164 | m, t = other._get_equation_parameters() 165 | y = m * x + t 166 | return Point(x, y) 167 | elif other.angle() in [90, 270]: 168 | x = other.p1.x 169 | m, t = self._get_equation_parameters() 170 | y = m * x + t 171 | return Point(x, y) 172 | else: 173 | # The overlap is a point 174 | m1, t1 = self._get_equation_parameters() 175 | m2, t2 = other._get_equation_parameters() 176 | # m1 * x + t1 = m2 * x + t2 177 | # <=> (m1 - m2) * x = t2 - t1 178 | # <=> x = (t2 - t1) / (m1 - m2) 179 | x = (t2 - t1) / (m1 - m2) 180 | y = m1 * x + t1 181 | return Point(x, y) 182 | 183 | def bounding_box(self) -> tuple[Point, Point]: 184 | """ 185 | Get the bounding box of this line represented by two points. 186 | 187 | The p1 point is in the lower left corner, the p2 one at the 188 | upper right corner. 189 | """ 190 | result = ( 191 | Point(min(self.p1.x, self.p2.x), min(self.p1.y, self.p2.y)), 192 | Point(max(self.p1.x, self.p2.x), max(self.p1.y, self.p2.y)), 193 | ) 194 | return result 195 | 196 | def __str__(self) -> str: 197 | if self.name == "LineSegment": 198 | return f"LineSegment [{self.p1} to {self.p2}]" 199 | else: 200 | return self.name 201 | 202 | __repr__ = __str__ 203 | 204 | def __hash__(self) -> int: 205 | return hash((self.p1, self.p2, self.name)) 206 | 207 | def __eq__(self, other: Any) -> bool: 208 | if not isinstance(other, LineSegment): 209 | return False 210 | return self.name == other.name and ( 211 | (self.p1 == other.p1 and self.p2 == other.p2) 212 | or (self.p1 == other.p2 and self.p2 == other.p1) 213 | ) 214 | 215 | 216 | def _get_straight_line_intersection( 217 | x: float, other_y1: float, other_y2: float, self_y1: float, self_y2: float 218 | ) -> Point | LineSegment: 219 | """Get the intersection point of two straight vertical lines.""" 220 | self_y = Interval(left=min(self_y1, self_y2), right=max(self_y1, self_y2)) 221 | other_y = Interval(left=min(other_y1, other_y2), right=max(other_y1, other_y2)) 222 | 223 | intersection = self_y.intersection(other_y) 224 | if intersection.left == intersection.right: 225 | return Point(x, cast(float, intersection.left)) 226 | else: 227 | return LineSegment( 228 | Point(x, cast(float, intersection.left)), 229 | Point(x, cast(float, intersection.right)), 230 | ) 231 | 232 | 233 | def do_bounding_boxes_intersect(a: tuple[Point, Point], b: tuple[Point, Point]) -> bool: 234 | """ 235 | Check if bounding boxes do intersect. 236 | 237 | If one bounding box touches the other, they do intersect. 238 | """ 239 | return ( 240 | a[0].x <= b[1].x and a[1].x >= b[0].x and a[0].y <= b[1].y and a[1].y >= b[0].y 241 | ) 242 | 243 | 244 | def crossproduct(a: Point, b: Point) -> float: 245 | """Get the cross product of two points.""" 246 | return a.x * b.y - b.x * a.y 247 | 248 | 249 | def is_point_on_line(a: LineSegment, b: Point) -> bool: 250 | """Check if point b is on LineSegment a.""" 251 | # Move the image, so that a.p1 is on (0|0) 252 | p2 = Point(a.p2.x - a.p1.x, a.p2.y - a.p1.y) 253 | a_tmp = LineSegment(Point(0, 0), p2) 254 | b_tmp = Point(b.x - a.p1.x, b.y - a.p1.y) 255 | r = crossproduct(a_tmp.p2, b_tmp) 256 | return abs(r) < EPSILON 257 | 258 | 259 | def is_point_right_of_line(a: LineSegment, b: Point) -> bool: 260 | """Check if point b is right of line a.""" 261 | # Move the image, so that a.p1 is on (0|0) 262 | a_tmp = LineSegment(Point(0, 0), Point(a.p2.x - a.p1.x, a.p2.y - a.p1.y)) 263 | b_tmp = Point(b.x - a.p1.x, b.y - a.p1.y) 264 | return crossproduct(a_tmp.p2, b_tmp) < 0 265 | 266 | 267 | def line_segment_touches_or_crosses_line(a: LineSegment, b: LineSegment) -> bool: 268 | """Check if line segment a touches or crosses line segment b.""" 269 | return ( 270 | is_point_on_line(a, b.p1) 271 | or is_point_on_line(a, b.p2) 272 | or (is_point_right_of_line(a, b.p1) ^ is_point_right_of_line(a, b.p2)) 273 | ) 274 | 275 | 276 | def do_lines_intersect(a: LineSegment, b: LineSegment) -> bool: 277 | """Check if LineSegments a and b intersect.""" 278 | box1 = a.bounding_box() 279 | box2 = b.bounding_box() 280 | return ( 281 | do_bounding_boxes_intersect(box1, box2) 282 | and line_segment_touches_or_crosses_line(a, b) 283 | and line_segment_touches_or_crosses_line(b, a) 284 | ) 285 | 286 | 287 | def get_all_intersecting_lines_by_brute_force( 288 | lines: list[LineSegment], 289 | ) -> set[frozenset[LineSegment]]: 290 | """ 291 | Get all intersecting lines by applying a brute force algorithm. 292 | 293 | Parameters 294 | ---------- 295 | lines : all lines you want to check, in no order 296 | 297 | Returns 298 | ------- 299 | intersections : a list that contains all pairs of intersecting lines 300 | """ 301 | intersections: set[frozenset[LineSegment]] = set() 302 | 303 | for i in range(len(lines)): 304 | for j in range(i + 1, len(lines)): 305 | if do_lines_intersect(lines[i], lines[j]): 306 | tmp = frozenset({lines[i], lines[j]}) 307 | intersections.add(tmp) 308 | return intersections 309 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # mpu documentation build configuration file, created by 4 | # sphinx-quickstart on Wed May 2 22:11:51 2018. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # Core Library 16 | import os 17 | import sys 18 | from typing import Any, Dict, List 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | sys.path.insert(0, os.path.abspath(".")) 24 | sys.path.insert(0, os.path.abspath("../")) 25 | sys.path.insert(0, os.path.abspath("../../")) 26 | 27 | # The version info for the project you're documenting, acts as replacement for 28 | # |version| and |release|, also used in various other places throughout the 29 | # built documents. 30 | import mpu # isort:skip # noqa 31 | 32 | # -- General configuration ------------------------------------------------ 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | "sphinx.ext.autodoc", 43 | "sphinx.ext.autosummary", 44 | "sphinx.ext.coverage", 45 | "sphinx.ext.mathjax", 46 | "sphinx.ext.viewcode", 47 | "sphinx.ext.napoleon", 48 | ] 49 | 50 | # Add any paths that contain templates here, relative to this directory. 51 | templates_path = ["_templates"] 52 | 53 | # The suffix(es) of source filenames. 54 | # You can specify multiple suffix as a list of string: 55 | # 56 | # source_suffix = ['.rst', '.md'] 57 | source_suffix = ".rst" 58 | 59 | # The encoding of source files. 60 | # source_encoding = 'utf-8-sig' 61 | 62 | # The master toctree document. 63 | master_doc = "index" 64 | 65 | # General information about the project. 66 | project = "mpu" 67 | copyright = "2018, Martin Thoma" 68 | author = "Martin Thoma" 69 | 70 | # The short X.Y version. 71 | version = ".".join(mpu.__version__.split(".", 2)[:2]) 72 | # The full version, including alpha/beta/rc tags. 73 | release = mpu.__version__ 74 | 75 | # The language for content autogenerated by Sphinx. Refer to documentation 76 | # for a list of supported languages. 77 | # 78 | # This is also used if you do content translation via gettext catalogs. 79 | # Usually you set "language" from the command line for these cases. 80 | language = None 81 | 82 | # There are two options for replacing |today|: either, you set today to some 83 | # non-false value, then it is used: 84 | # today = '' 85 | # Else, today_fmt is used as the format for a strftime call. 86 | # today_fmt = '%B %d, %Y' 87 | 88 | # List of patterns, relative to source directory, that match files and 89 | # directories to ignore when looking for source files. 90 | # This patterns also effect to html_static_path and html_extra_path 91 | exclude_patterns: List[Any] = [] 92 | 93 | # The reST default role (used for this markup: `text`) to use for all 94 | # documents. 95 | # default_role = None 96 | 97 | # If true, '()' will be appended to :func: etc. cross-reference text. 98 | # add_function_parentheses = True 99 | 100 | # If true, the current module name will be prepended to all description 101 | # unit titles (such as .. function::). 102 | # add_module_names = True 103 | 104 | # If true, sectionauthor and moduleauthor directives will be shown in the 105 | # output. They are ignored by default. 106 | # show_authors = False 107 | 108 | # The name of the Pygments (syntax highlighting) style to use. 109 | pygments_style = "sphinx" 110 | 111 | # A list of ignored prefixes for module index sorting. 112 | # modindex_common_prefix = [] 113 | 114 | # If true, keep warnings as "system message" paragraphs in the built documents. 115 | # keep_warnings = False 116 | 117 | # If true, `todo` and `todoList` produce output, else they produce nothing. 118 | todo_include_todos = False 119 | 120 | 121 | # -- Options for HTML output ---------------------------------------------- 122 | 123 | # The theme to use for HTML and HTML Help pages. See the documentation for 124 | # a list of builtin themes. 125 | html_theme = "sphinx_rtd_theme" 126 | 127 | # Theme options are theme-specific and customize the look and feel of a theme 128 | # further. For a list of options available for each theme, see the 129 | # documentation. 130 | html_theme_options = { 131 | "canonical_url": "", 132 | "analytics_id": "", 133 | "logo_only": False, 134 | "display_version": True, 135 | "prev_next_buttons_location": "bottom", 136 | "style_external_links": False, 137 | # Toc options 138 | "collapse_navigation": True, 139 | "sticky_navigation": True, 140 | "navigation_depth": 4, 141 | "includehidden": True, 142 | "titles_only": False, 143 | } 144 | 145 | # Add any paths that contain custom themes here, relative to this directory. 146 | # html_theme_path = [] 147 | 148 | # The name for this set of Sphinx documents. 149 | # " v documentation" by default. 150 | # html_title = u'mpu v0.1.0' 151 | 152 | # A shorter title for the navigation bar. Default is the same as html_title. 153 | # html_short_title = None 154 | 155 | # The name of an image file (relative to this directory) to place at the top 156 | # of the sidebar. 157 | # html_logo = None 158 | 159 | # The name of an image file (relative to this directory) to use as a favicon of 160 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 161 | # pixels large. 162 | # html_favicon = None 163 | 164 | # Add any paths that contain custom static files (such as style sheets) here, 165 | # relative to this directory. They are copied after the builtin static files, 166 | # so a file named "default.css" will overwrite the builtin "default.css". 167 | html_static_path = ["_static"] 168 | 169 | # Add any extra paths that contain custom files (such as robots.txt or 170 | # .htaccess) here, relative to this directory. These files are copied 171 | # directly to the root of the documentation. 172 | # html_extra_path = [] 173 | 174 | # If not None, a 'Last updated on:' timestamp is inserted at every page 175 | # bottom, using the given strftime format. 176 | # The empty string is equivalent to '%b %d, %Y'. 177 | # html_last_updated_fmt = None 178 | 179 | # If true, SmartyPants will be used to convert quotes and dashes to 180 | # typographically correct entities. 181 | # html_use_smartypants = True 182 | 183 | # Custom sidebar templates, maps document names to template names. 184 | # html_sidebars = {} 185 | 186 | # Additional templates that should be rendered to pages, maps page names to 187 | # template names. 188 | # html_additional_pages = {} 189 | 190 | # If false, no module index is generated. 191 | html_domain_indices = True 192 | 193 | # If false, no index is generated. 194 | # html_use_index = True 195 | 196 | # If true, the index is split into individual pages for each letter. 197 | # html_split_index = False 198 | 199 | # If true, links to the reST sources are added to the pages. 200 | # html_show_sourcelink = True 201 | 202 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 203 | # html_show_sphinx = True 204 | 205 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 206 | # html_show_copyright = True 207 | 208 | # If true, an OpenSearch description file will be output, and all pages will 209 | # contain a tag referring to it. The value of this option must be the 210 | # base URL from which the finished HTML is served. 211 | # html_use_opensearch = '' 212 | 213 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 214 | # html_file_suffix = None 215 | 216 | # Language to be used for generating the HTML full-text search index. 217 | # Sphinx supports the following languages: 218 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' 219 | # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' 220 | # html_search_language = 'en' 221 | 222 | # A dictionary with options for the search language support, empty by default. 223 | # 'ja' uses this config value. 224 | # 'zh' user can custom change `jieba` dictionary path. 225 | # html_search_options = {'type': 'default'} 226 | 227 | # The name of a javascript file (relative to the configuration directory) that 228 | # implements a search results scorer. If empty, the default will be used. 229 | # html_search_scorer = 'scorer.js' 230 | 231 | # Output file base name for HTML help builder. 232 | htmlhelp_basename = "mpudoc" 233 | 234 | # -- Options for LaTeX output --------------------------------------------- 235 | 236 | latex_elements: Dict[str, Any] = { 237 | # The paper size ('letterpaper' or 'a4paper'). 238 | # 'papersize': 'letterpaper', 239 | # The font size ('10pt', '11pt' or '12pt'). 240 | # 'pointsize': '10pt', 241 | # Additional stuff for the LaTeX preamble. 242 | # 'preamble': '', 243 | # Latex figure (float) alignment 244 | # 'figure_align': 'htbp', 245 | } 246 | 247 | # Grouping the document tree into LaTeX files. List of tuples 248 | # (source start file, target name, title, 249 | # author, documentclass [howto, manual, or own class]). 250 | latex_documents = [ 251 | (master_doc, "mpu.tex", "mpu Documentation", "Martin Thoma", "manual") 252 | ] 253 | 254 | # The name of an image file (relative to this directory) to place at the top of 255 | # the title page. 256 | # latex_logo = None 257 | 258 | # For "manual" documents, if this is true, then toplevel headings are parts, 259 | # not chapters. 260 | # latex_use_parts = False 261 | 262 | # If true, show page references after internal links. 263 | # latex_show_pagerefs = False 264 | 265 | # If true, show URL addresses after external links. 266 | # latex_show_urls = False 267 | 268 | # Documents to append as an appendix to all manuals. 269 | # latex_appendices = [] 270 | 271 | # If false, no module index is generated. 272 | # latex_domain_indices = True 273 | 274 | 275 | # -- Options for manual page output --------------------------------------- 276 | 277 | # One entry per manual page. List of tuples 278 | # (source start file, name, description, authors, manual section). 279 | man_pages = [(master_doc, "mpu", "mpu Documentation", [author], 1)] 280 | 281 | # If true, show URL addresses after external links. 282 | # man_show_urls = False 283 | 284 | 285 | # -- Options for Texinfo output ------------------------------------------- 286 | 287 | # Grouping the document tree into Texinfo files. List of tuples 288 | # (source start file, target name, title, author, 289 | # dir menu entry, description, category) 290 | texinfo_documents = [ 291 | ( 292 | master_doc, 293 | "mpu", 294 | "mpu Documentation", 295 | author, 296 | "mpu", 297 | "One line description of project.", 298 | "Miscellaneous", 299 | ) 300 | ] 301 | 302 | # Documents to append as an appendix to all manuals. 303 | # texinfo_appendices = [] 304 | 305 | # If false, no module index is generated. 306 | # texinfo_domain_indices = True 307 | 308 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 309 | # texinfo_show_urls = 'footnote' 310 | 311 | # If true, do not generate a @detailmenu in the "Top" node's menu. 312 | # texinfo_no_detailmenu = False 313 | -------------------------------------------------------------------------------- /mpu/string.py: -------------------------------------------------------------------------------- 1 | """ 2 | String manipulation, verification and formatting. 3 | 4 | For more complex checks, you might want to use the 5 | [validators](http://validators.readthedocs.io) package. 6 | """ 7 | 8 | # Core Library 9 | import socket 10 | from email.utils import parseaddr 11 | from typing import List, Optional, Union 12 | 13 | # Third party 14 | import pkg_resources 15 | from typing_extensions import Literal # necessary until 3.8 16 | 17 | # First party 18 | import mpu.io 19 | 20 | email_regex = r"[^@]*[^@\.]+@[^@]+\.[^@]+" 21 | 22 | 23 | def is_email(potential_email_address: str) -> bool: 24 | """ 25 | Check if potential_email_address is a valid e-mail address. 26 | 27 | Please note that this function has no false-negatives but many 28 | false-positives. So if it returns that the input is not a valid 29 | e-mail adress, it certainly isn't. If it returns True, it might still be 30 | invalid. For example, the domain could not be registered. 31 | 32 | Parameters 33 | ---------- 34 | potential_email_address : str 35 | 36 | Returns 37 | ------- 38 | is_email : bool 39 | 40 | Examples 41 | -------- 42 | >>> is_email('') 43 | False 44 | >>> is_email('info@martin-thoma.de') 45 | True 46 | >>> is_email('info@math.martin-thoma.de') 47 | True 48 | >>> is_email('Martin Thoma ') 49 | False 50 | >>> is_email('info@martin-thoma') 51 | False 52 | >>> is_email('Martin <>') 53 | False 54 | """ 55 | context, mail = parseaddr(potential_email_address) 56 | first_condition = len(context) == 0 57 | dot_after_at = ( 58 | "@" in potential_email_address and "." in potential_email_address.split("@")[1] 59 | ) 60 | return first_condition and dot_after_at 61 | 62 | 63 | def is_int(potential_int: str) -> bool: 64 | """ 65 | Check if potential_int is a valid integer. 66 | 67 | Parameters 68 | ---------- 69 | potential_int : str 70 | 71 | Returns 72 | ------- 73 | is_int : bool 74 | 75 | Examples 76 | -------- 77 | >>> is_int('123') 78 | True 79 | >>> is_int('1234567890123456789') 80 | True 81 | >>> is_int('0') 82 | True 83 | >>> is_int('-123') 84 | True 85 | >>> is_int('123.45') 86 | False 87 | >>> is_int('a') 88 | False 89 | >>> is_int('0x8') 90 | False 91 | """ 92 | try: 93 | int(potential_int) 94 | return True 95 | except ValueError: 96 | return False 97 | 98 | 99 | def is_float(potential_float: str) -> bool: 100 | """ 101 | Check if potential_float is a valid float. 102 | 103 | Returns 104 | ------- 105 | is_float : bool 106 | 107 | Examples 108 | -------- 109 | >>> is_float('123') 110 | True 111 | >>> is_float('1234567890123456789') 112 | True 113 | >>> is_float('0') 114 | True 115 | >>> is_float('-123') 116 | True 117 | >>> is_float('123.45') 118 | True 119 | >>> is_float('a') 120 | False 121 | >>> is_float('0x8') 122 | False 123 | """ 124 | try: 125 | float(potential_float) 126 | return True 127 | except ValueError: 128 | return False 129 | 130 | 131 | def str2bool(string_: str, default: Union[str, bool] = "raise") -> bool: 132 | """ 133 | Convert a string to a bool. 134 | 135 | Parameters 136 | ---------- 137 | string_ : str 138 | default : {'raise', False} 139 | Default behaviour if none of the "true" strings is detected. 140 | 141 | Returns 142 | ------- 143 | boolean : bool 144 | 145 | Examples 146 | -------- 147 | >>> str2bool('True') 148 | True 149 | >>> str2bool('1') 150 | True 151 | >>> str2bool('0') 152 | False 153 | """ 154 | if default not in ["raise", False]: 155 | raise ValueError(f"default was '{default}', but should be 'raise' or False") 156 | true = ["true", "t", "1", "y", "yes", "enabled", "enable", "on"] 157 | false = ["false", "f", "0", "n", "no", "disabled", "disable", "off"] 158 | if string_.lower() in true: 159 | return True 160 | elif string_.lower() in false or (not default): 161 | return False 162 | else: 163 | raise ValueError(f"The value '{string_}' cannot be mapped to boolean.") 164 | 165 | 166 | def str2str_or_none(string_: str) -> Optional[str]: 167 | """ 168 | Convert a string to a str or to None. 169 | 170 | Parameters 171 | ---------- 172 | string_ : str 173 | 174 | Returns 175 | ------- 176 | str_or_none : bool or None 177 | 178 | Examples 179 | -------- 180 | >>> str2str_or_none('True') 181 | 'True' 182 | >>> str2str_or_none('1') 183 | '1' 184 | >>> str2str_or_none('0') 185 | '0' 186 | >>> str2str_or_none('undefined') 187 | """ 188 | if is_none(string_, default=False): 189 | return None 190 | else: 191 | return string_ 192 | 193 | 194 | def str2bool_or_none( 195 | string_: str, default: Literal["raise", False] = "raise" 196 | ) -> Optional[bool]: 197 | """ 198 | Convert a string to a bool or to None. 199 | 200 | Parameters 201 | ---------- 202 | string_ : str 203 | default : {'raise', False} 204 | Default behaviour if none of the "true" or "none" strings is detected. 205 | 206 | Returns 207 | ------- 208 | bool_or_none : bool or None 209 | 210 | Examples 211 | -------- 212 | >>> str2bool_or_none('True') 213 | True 214 | >>> str2bool_or_none('1') 215 | True 216 | >>> str2bool_or_none('0') 217 | False 218 | >>> str2bool_or_none('undefined') 219 | """ 220 | if default not in ["raise", False]: 221 | raise ValueError(f"default was '{default}', but should be 'raise' or False") 222 | if is_none(string_, default=False): 223 | return None 224 | else: 225 | return str2bool(string_, default) 226 | 227 | 228 | def str2float_or_none(string_: str) -> Optional[float]: 229 | """ 230 | Convert a string to a float or to None. 231 | 232 | Parameters 233 | ---------- 234 | string_ : str 235 | 236 | Returns 237 | ------- 238 | float_or_none : float or None 239 | 240 | Examples 241 | -------- 242 | >>> str2float_or_none('1') 243 | 1.0 244 | >>> str2float_or_none('1.2') 245 | 1.2 246 | >>> str2float_or_none('undefined') 247 | """ 248 | if is_none(string_, default=False): 249 | return None 250 | else: 251 | return float(string_) 252 | 253 | 254 | def str2int_or_none(string_: str) -> Optional[int]: 255 | """ 256 | Convert a string to a int or to None. 257 | 258 | Parameters 259 | ---------- 260 | string_ : str 261 | 262 | Returns 263 | ------- 264 | int_or_none : int or None 265 | 266 | Examples 267 | -------- 268 | >>> str2int_or_none('2') 269 | 2 270 | >>> str2int_or_none('undefined') 271 | """ 272 | if is_none(string_, default=False): 273 | return None 274 | else: 275 | return int(string_) 276 | 277 | 278 | def is_none(string_: str, default: Literal["raise", False] = "raise") -> bool: 279 | """ 280 | Check if a string is equivalent to None. 281 | 282 | Parameters 283 | ---------- 284 | string_ : str 285 | default : {'raise', False} 286 | Default behaviour if none of the "None" strings is detected. 287 | 288 | Returns 289 | ------- 290 | is_none : bool 291 | 292 | Examples 293 | -------- 294 | >>> is_none('2', default=False) 295 | False 296 | >>> is_none('undefined', default=False) 297 | True 298 | """ 299 | if default not in ["raise", False]: 300 | raise ValueError(f"default was '{default}', but should be 'raise' or False") 301 | none = ["none", "undefined", "unknown", "null", ""] 302 | if string_.lower() in none: 303 | return True 304 | elif not default: 305 | return False 306 | else: 307 | raise ValueError(f"The value '{string_}' cannot be mapped to none.") 308 | 309 | 310 | def is_iban(potential_iban: str) -> bool: 311 | """ 312 | Check if a string is a valid IBAN number. 313 | 314 | IBAN is described in ISO 13616-1:2007 Part 1. 315 | 316 | Spaces are ignored. 317 | 318 | # CODE 319 | 0 = always zero 320 | b = BIC or National Bank code 321 | c = Account number 322 | i = holder's kennitala (national identification number) 323 | k = IBAN check digits 324 | n = Branch number 325 | t = Account type 326 | x = National check digit or character 327 | 328 | Examples 329 | -------- 330 | >>> is_iban('DE89 3704 0044 0532 0130 00') 331 | True 332 | >>> is_iban('DE89 3704 0044 0532 0130 01') 333 | False 334 | """ 335 | path = "data/iban.csv" # always use slash in Python packages 336 | filepath = pkg_resources.resource_filename("mpu", path) 337 | data = mpu.io.read(filepath, delimiter=";", format="dicts") 338 | potential_iban = potential_iban.replace(" ", "") # Remove spaces 339 | if len(potential_iban) < min(int(el["length"]) for el in data): 340 | return False 341 | country = None 342 | for element in data: 343 | if element["iban_fields"][:2] == potential_iban[:2]: 344 | country = element 345 | break 346 | if country is None: 347 | return False 348 | if len(potential_iban) != int(country["length"]): 349 | return False 350 | if country["country_en"] == "Germany": 351 | checksum_vals = [ 352 | value 353 | for field_type, value in zip(country["iban_fields"], potential_iban) 354 | if field_type == "k" 355 | ] 356 | checksum_val = "".join(checksum_vals) 357 | checksum_exp = _calculate_german_iban_checksum( 358 | potential_iban, country["iban_fields"] 359 | ) 360 | return checksum_val == checksum_exp 361 | return True 362 | 363 | 364 | def is_ipv4( 365 | potential_ipv4: str, 366 | allow_leading_zeros: bool = False, 367 | allow_shortened_addresses: bool = False, 368 | ) -> bool: 369 | """ 370 | Check if a string is a valid IPv4 address. 371 | 372 | Parameters 373 | ---------- 374 | potential_ipv4 : str 375 | allow_leading_zeros : bool (default: False) 376 | allow_shortened_addresses : bool (default: False) 377 | 378 | Returns 379 | ------- 380 | is_valid : bool 381 | 382 | Examples 383 | -------- 384 | >>> is_ipv4("192.168.0.4") 385 | True 386 | >>> is_ipv4("192.168..4") 387 | False 388 | >>> is_ipv4("192.168.01.4", allow_leading_zeros=True) 389 | True 390 | >>> is_ipv4("192.168.01.4", allow_leading_zeros=False) 391 | False 392 | >>> is_ipv4("256.168.01.4") 393 | False 394 | >>> is_ipv4("4", allow_shortened_addresses=True) 395 | True 396 | >>> is_ipv4("4", allow_shortened_addresses=False) 397 | False 398 | """ 399 | if not allow_shortened_addresses and potential_ipv4.count(".") != 3: 400 | return False 401 | try: 402 | socket.inet_aton(potential_ipv4) 403 | except OSError: 404 | return False 405 | if allow_leading_zeros: 406 | return True 407 | else: 408 | return all( 409 | len(block) == 1 or block[0] != "0" for block in potential_ipv4.split(".") 410 | ) 411 | 412 | 413 | def _calculate_german_iban_checksum( 414 | iban: str, iban_fields: str = "DEkkbbbbbbbbcccccccccc" 415 | ) -> str: 416 | """ 417 | Calculate the checksum of the German IBAN format. 418 | 419 | Examples 420 | -------- 421 | >>> iban = 'DE41500105170123456789' 422 | >>> _calculate_german_iban_checksum(iban) 423 | '41' 424 | """ 425 | numbers: List[str] = [ 426 | value 427 | for field_type, value in zip(iban_fields, iban) 428 | if field_type in ["b", "c"] 429 | ] 430 | translate = { 431 | "0": "0", 432 | "1": "1", 433 | "2": "2", 434 | "3": "3", 435 | "4": "4", 436 | "5": "5", 437 | "6": "6", 438 | "7": "7", 439 | "8": "8", 440 | "9": "9", 441 | } 442 | for i in range(ord("A"), ord("Z") + 1): 443 | translate[chr(i)] = str(i - ord("A") + 10) 444 | for val in "DE00": 445 | translated = translate[val] 446 | for char in translated: 447 | numbers.append(char) 448 | number = sum(int(value) * 10**i for i, value in enumerate(numbers[::-1])) 449 | checksum = 98 - (number % 97) 450 | return str(checksum) 451 | 452 | 453 | def human_readable_bytes(nb_bytes: Union[int, float], suffix: str = "B") -> str: 454 | """ 455 | Convert a byte number into a human readable format. 456 | 457 | Parameters 458 | ---------- 459 | nb_bytes : Union[int, float] 460 | suffix : str, optional (default: "B") 461 | 462 | Returns 463 | ------- 464 | size_str : str 465 | 466 | Examples 467 | -------- 468 | >>> human_readable_bytes(123) 469 | '123.0 B' 470 | 471 | >>> human_readable_bytes(1025) 472 | '1.0 KiB' 473 | 474 | >>> human_readable_bytes(9671406556917033397649423) 475 | '8.0 YiB' 476 | """ 477 | for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: 478 | if abs(nb_bytes) < 1024.0: 479 | return f"{nb_bytes:3.1f} {unit}{suffix}" 480 | nb_bytes /= 1024.0 481 | return f"{nb_bytes:.1f} Yi{suffix}" 482 | -------------------------------------------------------------------------------- /mpu/io.py: -------------------------------------------------------------------------------- 1 | """Reading and writing common file formats.""" 2 | 3 | # Core Library 4 | import csv 5 | import hashlib 6 | import json 7 | import os 8 | import pickle 9 | import platform 10 | from datetime import datetime 11 | from typing import Any, Dict, List, Optional, Union 12 | 13 | # Third party 14 | from typing_extensions import Literal 15 | 16 | # First party 17 | from mpu.datastructures import EList 18 | 19 | 20 | def read(filepath: str, **kwargs: Any) -> Any: 21 | """ 22 | Read a file. 23 | 24 | Supported formats: 25 | 26 | * CSV 27 | * JSON, JSONL 28 | * pickle 29 | 30 | Parameters 31 | ---------- 32 | filepath : str 33 | Path to the file that should be read. This methods action depends 34 | mainly on the file extension. 35 | kwargs : Dict 36 | Any keywords for the specific file format. For CSV, this is 37 | 'delimiter', 'quotechar', 'skiprows', 'format' 38 | 39 | Returns 40 | ------- 41 | data : Union[str, bytes] or other (e.g. format=dicts) 42 | """ 43 | supported_formats = [".csv", ".json", ".jsonl", ".pickle"] 44 | if filepath.lower().endswith(".csv"): 45 | return _read_csv(filepath, kwargs) 46 | elif filepath.lower().endswith(".json"): 47 | with open(filepath, encoding="utf8") as data_file: 48 | data: Any = json.load(data_file, **kwargs) 49 | return data 50 | elif filepath.lower().endswith(".jsonl"): 51 | return _read_jsonl(filepath, kwargs) 52 | elif filepath.lower().endswith(".pickle"): 53 | with open(filepath, "rb") as handle: 54 | data_pkl = pickle.load(handle) 55 | return data_pkl 56 | elif filepath.lower().endswith(".yml") or filepath.lower().endswith(".yaml"): 57 | raise NotImplementedError( 58 | "YAML is not supported, because you need " 59 | "PyYAML in Python3. " 60 | "See " 61 | "https://stackoverflow.com/a/42054860/562769" 62 | " as a guide how to use it." 63 | ) 64 | elif filepath.lower().endswith(".h5") or filepath.lower().endswith(".hdf5"): 65 | raise NotImplementedError( 66 | "HDF5 is not supported. See " 67 | "https://stackoverflow.com/a/41586571/562769" 68 | " as a guide how to use it." 69 | ) 70 | else: 71 | raise NotImplementedError( 72 | f"File '{filepath}' does not end with one " 73 | f"of the supported file name extensions. " 74 | f"Supported are: {supported_formats}" 75 | ) 76 | 77 | 78 | def _read_csv(filepath: str, kwargs: Dict) -> Union[List, Dict]: 79 | """See documentation of mpu.io.read.""" 80 | if "delimiter" not in kwargs: 81 | kwargs["delimiter"] = "," 82 | if "quotechar" not in kwargs: 83 | kwargs["quotechar"] = '"' 84 | if "skiprows" not in kwargs: 85 | kwargs["skiprows"] = [] 86 | if isinstance(kwargs["skiprows"], int): 87 | kwargs["skiprows"] = list(range(kwargs["skiprows"])) 88 | if "format" in kwargs: 89 | format_ = kwargs["format"] 90 | kwargs.pop("format", None) 91 | else: 92 | format_ = "default" 93 | skiprows = kwargs["skiprows"] 94 | kwargs.pop("skiprows", None) 95 | 96 | newline = None 97 | if "newline" in kwargs: 98 | newline = kwargs["newline"] 99 | del kwargs["newline"] 100 | 101 | with open(filepath, encoding="utf8", newline=newline) as fp: 102 | if format_ == "default": 103 | reader = csv.reader(fp, **kwargs) 104 | data_tmp = EList(list(reader)) 105 | data: Union[List, Dict] = data_tmp.remove_indices(skiprows) 106 | elif format_ == "dicts": 107 | reader_list = csv.DictReader(fp, **kwargs) 108 | data = list(reader_list) 109 | else: 110 | raise NotImplementedError(f"Format '{format_}' unknown") 111 | return data 112 | 113 | 114 | def _read_jsonl(filepath: str, kwargs: Dict) -> List: 115 | """See documentation of mpu.io.read.""" 116 | with open(filepath, encoding="utf8") as data_file: 117 | data = [json.loads(line, **kwargs) for line in data_file if len(line) > 0] 118 | return data 119 | 120 | 121 | def write(filepath: str, data: Union[Dict, List], **kwargs: Any) -> Any: 122 | """ 123 | Write a file. 124 | 125 | Supported formats: 126 | 127 | * CSV 128 | * JSON, JSONL 129 | * pickle 130 | 131 | Parameters 132 | ---------- 133 | filepath : str 134 | Path to the file that should be read. This methods action depends 135 | mainly on the file extension. Make sure that it ends in .csv, .json, 136 | .jsonl, or .pickle. 137 | data : Union[Dict, List] 138 | Content that should be written 139 | kwargs : Dict 140 | Any keywords for the specific file format. 141 | 142 | Returns 143 | ------- 144 | data : str or bytes 145 | """ 146 | supported_formats = [".csv", ".json", ".jsonl", ".pickle"] 147 | if filepath.lower().endswith(".csv"): 148 | return _write_csv(filepath, data, kwargs) 149 | elif filepath.lower().endswith(".json"): 150 | return _write_json(filepath, data, kwargs) 151 | elif filepath.lower().endswith(".jsonl"): 152 | return _write_jsonl(filepath, data, kwargs) 153 | elif filepath.lower().endswith(".pickle"): 154 | return _write_pickle(filepath, data, kwargs) 155 | elif filepath.lower().endswith(".yml") or filepath.lower().endswith(".yaml"): 156 | raise NotImplementedError( 157 | "YAML is not supported, because you need " 158 | "PyYAML in Python3. " 159 | "See " 160 | "https://stackoverflow.com/a/42054860/562769" 161 | " as a guide how to use it." 162 | ) 163 | elif filepath.lower().endswith(".h5") or filepath.lower().endswith(".hdf5"): 164 | raise NotImplementedError( 165 | "HDF5 is not supported. See " 166 | "https://stackoverflow.com/a/41586571/562769" 167 | " as a guide how to use it." 168 | ) 169 | else: 170 | raise NotImplementedError( 171 | f"File '{filepath}' does not end in one of the " 172 | f"supported formats. Supported are: {supported_formats}" 173 | ) 174 | 175 | 176 | def _write_csv(filepath: str, data: Any, kwargs: Dict) -> Any: 177 | """See documentation of mpu.io.write.""" 178 | newline = None 179 | if "newline" in kwargs: 180 | newline = kwargs["newline"] 181 | del kwargs["newline"] 182 | with open(filepath, "w", encoding="utf8", newline=newline) as fp: 183 | if "delimiter" not in kwargs: 184 | kwargs["delimiter"] = "," 185 | if "quotechar" not in kwargs: 186 | kwargs["quotechar"] = '"' 187 | writer = csv.writer(fp, **kwargs) 188 | writer.writerows(data) 189 | return data 190 | 191 | 192 | def _write_json(filepath: str, data: Any, kwargs: Dict) -> Any: 193 | """See documentation of mpu.io.write.""" 194 | with open(filepath, "w", encoding="utf8") as outfile: 195 | if "indent" not in kwargs: 196 | kwargs["indent"] = 4 197 | if "sort_keys" not in kwargs: 198 | kwargs["sort_keys"] = True 199 | if "separators" not in kwargs: 200 | kwargs["separators"] = (",", ": ") 201 | if "ensure_ascii" not in kwargs: 202 | kwargs["ensure_ascii"] = False 203 | str_ = json.dumps(data, **kwargs) 204 | outfile.write(str_) 205 | return data 206 | 207 | 208 | def _write_jsonl(filepath: str, data: Any, kwargs: Dict) -> Any: 209 | """See documentation of mpu.io.write.""" 210 | with open(filepath, "w", encoding="utf8") as outfile: 211 | kwargs["indent"] = None # JSON has to be on one line! 212 | if "sort_keys" not in kwargs: 213 | kwargs["sort_keys"] = True 214 | if "separators" not in kwargs: 215 | kwargs["separators"] = (",", ": ") 216 | if "ensure_ascii" not in kwargs: 217 | kwargs["ensure_ascii"] = False 218 | for line in data: 219 | str_ = json.dumps(line, **kwargs) 220 | outfile.write(str_) 221 | outfile.write("\n") 222 | return data 223 | 224 | 225 | def _write_pickle(filepath: str, data: Any, kwargs: Dict) -> Any: 226 | """See documentation of mpu.io.write.""" 227 | if "protocol" not in kwargs: 228 | kwargs["protocol"] = pickle.HIGHEST_PROTOCOL 229 | with open(filepath, "wb") as handle: 230 | pickle.dump(data, handle, **kwargs) 231 | return data 232 | 233 | 234 | def urlread(url: str, encoding: str = "utf8") -> str: 235 | """ 236 | Read the content of an URL. 237 | 238 | Parameters 239 | ---------- 240 | url : str 241 | encoding : str (default: "utf8") 242 | 243 | Returns 244 | ------- 245 | content : str 246 | """ 247 | # Core Library 248 | from urllib.request import urlopen 249 | 250 | response = urlopen(url) 251 | content = response.read() 252 | content = content.decode(encoding) 253 | return content 254 | 255 | 256 | def download(source: str, sink: Optional[str] = None) -> str: 257 | """ 258 | Download a file. 259 | 260 | Parameters 261 | ---------- 262 | source : str 263 | Where the file comes from. Some URL. 264 | sink : str, optional (default: same filename in current directory) 265 | Where the file gets stored. Some filepath in the local file system. 266 | """ 267 | # Core Library 268 | from urllib.request import urlretrieve 269 | 270 | if sink is None: 271 | sink = os.path.abspath(os.path.split(source)[1]) 272 | urlretrieve(source, sink) 273 | return sink 274 | 275 | 276 | def hash( 277 | filepath: str, method: Literal["sha1", "md5"] = "sha1", buffer_size: int = 65536 278 | ) -> str: 279 | """ 280 | Calculate a hash of a local file. 281 | 282 | Parameters 283 | ---------- 284 | filepath : str 285 | method : {'sha1', 'md5'} 286 | buffer_size : int, optional (default: 65536 byte = 64 KiB) 287 | in byte 288 | 289 | Returns 290 | ------- 291 | hash : str 292 | """ 293 | if method == "sha1": 294 | hash_function = hashlib.sha1() 295 | elif method == "md5": 296 | hash_function = hashlib.md5() 297 | else: 298 | raise NotImplementedError( 299 | f"Only md5 and sha1 hashes are known, but '{method}' was specified." 300 | ) 301 | 302 | with open(filepath, "rb") as fp: 303 | while True: 304 | data = fp.read(buffer_size) 305 | if not data: 306 | break 307 | hash_function.update(data) 308 | return hash_function.hexdigest() 309 | 310 | 311 | def get_creation_datetime(filepath: str) -> Optional[datetime]: 312 | """ 313 | Get the date that a file was created. 314 | 315 | Parameters 316 | ---------- 317 | filepath : str 318 | 319 | Returns 320 | ------- 321 | creation_datetime : Optional[datetime] 322 | """ 323 | if platform.system() == "Windows": 324 | return datetime.fromtimestamp(os.path.getctime(filepath)) 325 | else: 326 | stat = os.stat(filepath) 327 | try: 328 | return datetime.fromtimestamp(stat.st_birthtime) 329 | except AttributeError: 330 | # We're probably on Linux. No easy way to get creation dates here, 331 | # so we'll settle for when its content was last modified. 332 | return None 333 | 334 | 335 | def get_modification_datetime(filepath: str) -> datetime: 336 | """ 337 | Get the datetime that a file was last modified. 338 | 339 | Parameters 340 | ---------- 341 | filepath : str 342 | 343 | Returns 344 | ------- 345 | modification_datetime : datetime 346 | 347 | """ 348 | # Third party 349 | import tzlocal 350 | 351 | timezone = tzlocal.get_localzone() 352 | mtime = datetime.fromtimestamp(os.path.getmtime(filepath)) 353 | return mtime.replace(tzinfo=timezone) 354 | 355 | 356 | def get_access_datetime(filepath: str) -> datetime: 357 | """ 358 | Get the last time filepath was accessed. 359 | 360 | Parameters 361 | ---------- 362 | filepath : str 363 | 364 | Returns 365 | ------- 366 | access_datetime : datetime 367 | """ 368 | # Third party 369 | import tzlocal 370 | 371 | tz = tzlocal.get_localzone() 372 | mtime = datetime.fromtimestamp(os.path.getatime(filepath)) 373 | return mtime.replace(tzinfo=tz) 374 | 375 | 376 | def get_file_meta(filepath: str) -> Dict[str, Any]: 377 | """ 378 | Get meta-information about a file. 379 | 380 | Parameters 381 | ---------- 382 | filepath : str 383 | 384 | Returns 385 | ------- 386 | meta : dict 387 | """ 388 | meta: Dict[str, Any] = { 389 | "filepath": os.path.abspath(filepath), 390 | "creation_datetime": get_creation_datetime(filepath), 391 | "last_access_datetime": get_access_datetime(filepath), 392 | "modification_datetime": get_modification_datetime(filepath), 393 | } 394 | try: 395 | # Third party 396 | import magic 397 | 398 | f_mime = magic.Magic(mime=True, uncompress=True) 399 | f_other = magic.Magic(mime=False, uncompress=True) 400 | meta["mime"] = f_mime.from_file(meta["filepath"]) 401 | meta["magic-type"] = f_other.from_file(meta["filepath"]) 402 | except ImportError: 403 | pass 404 | return meta 405 | 406 | 407 | def gzip_file(source: str, sink: str) -> None: 408 | """ 409 | Create a GZIP file from a source file. 410 | 411 | Parameters 412 | ---------- 413 | source : str 414 | Filepath 415 | sink : str 416 | Filepath 417 | """ 418 | # Core Library 419 | import gzip 420 | 421 | with open(source, "rb") as f_in, gzip.open(sink, "wb") as f_out: 422 | f_out.writelines(f_in) 423 | --------------------------------------------------------------------------------