├── toolz ├── tests │ ├── __init__.py │ ├── test_utils.py │ ├── test_compatibility.py │ ├── test_curried_doctests.py │ ├── test_package.py │ ├── test_recipes.py │ ├── test_tlz.py │ ├── test_signatures.py │ ├── test_curried.py │ ├── test_serialization.py │ ├── test_dicttoolz.py │ ├── test_inspect_args.py │ └── test_itertoolz.py ├── sandbox │ ├── tests │ │ ├── __init__.py │ │ ├── test_parallel.py │ │ └── test_core.py │ ├── __init__.py │ ├── parallel.py │ └── core.py ├── utils.py ├── curried │ ├── exceptions.py │ ├── operator.py │ └── __init__.py ├── __init__.py ├── compatibility.py ├── recipes.py └── dicttoolz.py ├── doc ├── requirements.txt ├── source │ ├── install.rst │ ├── references.rst │ ├── index.rst │ ├── api.rst │ ├── heritage.rst │ ├── purity.rst │ ├── tips-and-tricks.rst │ ├── laziness.rst │ ├── curry.rst │ ├── parallelism.rst │ ├── composition.rst │ ├── control.rst │ ├── conf.py │ └── streaming-analytics.rst ├── make.bat └── Makefile ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ ├── pre-commit.yml │ ├── test.yml │ └── publish_pypi.yml ├── bench ├── test_groupby.py ├── test_sliding_window.py ├── test_get.py ├── test_get_list.py ├── test_memoize.py ├── test_curry.py ├── test_memoize_kwargs.py ├── test_curry_baseline.py ├── test_first.py ├── test_pluck.py ├── test_frequencies.py ├── test_first_iter.py ├── test_wordcount.py └── test_join.py ├── MANIFEST.in ├── tox.ini ├── tlz ├── __init__.py └── _build_tlz.py ├── examples ├── wordcount.py ├── graph.py └── fib.py ├── .readthedocs.yaml ├── LICENSE.txt ├── AUTHORS.md ├── release-notes ├── pyproject.toml ├── .pre-commit-config.yaml └── README.rst /toolz/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolz/sandbox/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | furo 3 | -------------------------------------------------------------------------------- /toolz/sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import EqualityHashKey, unzip 2 | from .parallel import fold 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build/ 3 | dist/ 4 | *.egg-info/ 5 | bench/shakespeare.txt 6 | .coverage 7 | *.sw? 8 | .DS_STORE 9 | \.tox/ 10 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: 'github-actions' 4 | directory: '/' 5 | schedule: 6 | interval: 'monthly' 7 | -------------------------------------------------------------------------------- /bench/test_groupby.py: -------------------------------------------------------------------------------- 1 | from toolz import groupby, identity 2 | 3 | 4 | data = list(range(1000)) * 1000 5 | 6 | 7 | def test_groupby(): 8 | groupby(identity, data) 9 | -------------------------------------------------------------------------------- /bench/test_sliding_window.py: -------------------------------------------------------------------------------- 1 | from toolz import sliding_window 2 | 3 | seq = range(1000000) 4 | 5 | 6 | def test_sliding_window(): 7 | list(sliding_window(3, seq)) 8 | -------------------------------------------------------------------------------- /bench/test_get.py: -------------------------------------------------------------------------------- 1 | from toolz import get 2 | 3 | tuples = [(1, 2, 3) for i in range(100000)] 4 | 5 | 6 | def test_get(): 7 | for tup in tuples: 8 | get(1, tup) 9 | -------------------------------------------------------------------------------- /bench/test_get_list.py: -------------------------------------------------------------------------------- 1 | from toolz import get 2 | 3 | tuples = [(1, 2, 3) for i in range(100000)] 4 | 5 | 6 | def test_get(): 7 | for tup in tuples: 8 | get([1, 2], tup) 9 | -------------------------------------------------------------------------------- /toolz/utils.py: -------------------------------------------------------------------------------- 1 | def raises(err, lamda): 2 | try: 3 | lamda() 4 | return False 5 | except err: 6 | return True 7 | 8 | 9 | no_default = '__no__default__' 10 | -------------------------------------------------------------------------------- /toolz/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from toolz.utils import raises 2 | 3 | 4 | def test_raises(): 5 | assert raises(ZeroDivisionError, lambda: 1 / 0) 6 | assert not raises(ZeroDivisionError, lambda: 1) 7 | -------------------------------------------------------------------------------- /bench/test_memoize.py: -------------------------------------------------------------------------------- 1 | from toolz import memoize 2 | 3 | 4 | def test_memoize_no_kwargs(): 5 | @memoize 6 | def f(x): 7 | return x 8 | 9 | for i in range(100000): 10 | f(3) 11 | -------------------------------------------------------------------------------- /bench/test_curry.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import get 2 | 3 | 4 | pairs = [(1, 2) for i in range(100000)] 5 | 6 | 7 | def test_get_curried(): 8 | first = get(0) 9 | for p in pairs: 10 | first(p) 11 | -------------------------------------------------------------------------------- /bench/test_memoize_kwargs.py: -------------------------------------------------------------------------------- 1 | from toolz import memoize 2 | 3 | 4 | def test_memoize_kwargs(): 5 | @memoize 6 | def f(x, y=3): 7 | return x 8 | 9 | for i in range(100000): 10 | f(3) 11 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include toolz * 2 | recursive-include tlz * 3 | include AUTHORS.md 4 | include LICENSE.txt 5 | include MANIFEST.in 6 | include README.rst 7 | include pyproject.toml 8 | global-exclude *.pyc *~ *.bak *.swp *.swo *.pyo *.so 9 | -------------------------------------------------------------------------------- /bench/test_curry_baseline.py: -------------------------------------------------------------------------------- 1 | from toolz import get 2 | from functools import partial 3 | 4 | 5 | pairs = [(1, 2) for i in range(100000)] 6 | 7 | 8 | def test_get(): 9 | first = partial(get, 0) 10 | for p in pairs: 11 | first(p) 12 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = 3 | py39 4 | py310 5 | py311 6 | py312 7 | py313 8 | py314 9 | pypy3 10 | 11 | skip_missing_interpreters = true 12 | 13 | 14 | [testenv] 15 | deps = pytest 16 | commands = py.test {posargs} 17 | -------------------------------------------------------------------------------- /bench/test_first.py: -------------------------------------------------------------------------------- 1 | from toolz import first, second 2 | 3 | pairs = [(1, 2) for i in range(1000000)] 4 | 5 | 6 | def test_first(): 7 | for p in pairs: 8 | first(p) 9 | 10 | 11 | def test_second(): 12 | for p in pairs: 13 | second(p) 14 | -------------------------------------------------------------------------------- /bench/test_pluck.py: -------------------------------------------------------------------------------- 1 | from toolz import pluck 2 | 3 | tuples = [(1, 2, 3) for i in range(100000)] 4 | less_tuples = [(1, 2, 3) for i in range(100)] 5 | 6 | 7 | def test_pluck(): 8 | for i in pluck(2, tuples): 9 | pass 10 | 11 | for i in range(1000): 12 | tuple(pluck(2, less_tuples)) 13 | -------------------------------------------------------------------------------- /toolz/tests/test_compatibility.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import importlib 3 | 4 | def test_compat_warn(): 5 | with pytest.warns(DeprecationWarning): 6 | # something else is importing this, 7 | import toolz.compatibility 8 | # reload to be sure we warn 9 | importlib.reload(toolz.compatibility) 10 | -------------------------------------------------------------------------------- /bench/test_frequencies.py: -------------------------------------------------------------------------------- 1 | from toolz import frequencies, identity 2 | 3 | 4 | big_data = list(range(1000)) * 1000 5 | small_data = list(range(100)) 6 | 7 | 8 | def test_frequencies(): 9 | frequencies(big_data) 10 | 11 | 12 | def test_frequencies_small(): 13 | for i in range(1000): 14 | frequencies(small_data) 15 | -------------------------------------------------------------------------------- /toolz/tests/test_curried_doctests.py: -------------------------------------------------------------------------------- 1 | import doctest 2 | import toolz 3 | 4 | 5 | def test_doctests(): 6 | toolz.__test__ = {} 7 | for name, func in vars(toolz).items(): 8 | if isinstance(func, toolz.curry): 9 | toolz.__test__[name] = func.func 10 | assert doctest.testmod(toolz).failed == 0 11 | del toolz.__test__ 12 | -------------------------------------------------------------------------------- /toolz/tests/test_package.py: -------------------------------------------------------------------------------- 1 | import toolz 2 | 3 | 4 | def test_has_version(): 5 | # If this test fails, then toolz probably isn't installed properly. 6 | # For local development, try `pip install -e .` from the project directory. 7 | version = toolz.__version__ 8 | assert isinstance(version, str) 9 | assert version.startswith("1.") 10 | -------------------------------------------------------------------------------- /bench/test_first_iter.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from toolz import first, second 3 | 4 | 5 | def test_first_iter(): 6 | iters = map(iter, [(1, 2) for i in range(1000000)]) 7 | for p in iters: 8 | first(p) 9 | 10 | 11 | def test_second_iter(): 12 | iters = map(iter, [(1, 2) for i in range(1000000)]) 13 | for p in iters: 14 | second(p) 15 | -------------------------------------------------------------------------------- /tlz/__init__.py: -------------------------------------------------------------------------------- 1 | """``tlz`` mirrors the ``toolz`` API and uses ``cytoolz`` if possible. 2 | 3 | The ``tlz`` package is installed when ``toolz`` is installed. It provides 4 | a convenient way to use functions from ``cytoolz``--a faster Cython 5 | implementation of ``toolz``--if it is installed, otherwise it uses 6 | functions from ``toolz``. 7 | """ 8 | 9 | from . import _build_tlz 10 | -------------------------------------------------------------------------------- /examples/wordcount.py: -------------------------------------------------------------------------------- 1 | from toolz import * 2 | 3 | 4 | def stem(word): 5 | """ Stem word to primitive form """ 6 | return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 7 | 8 | wordcount = comp(frequencies, partial(map, stem), str.split) 9 | 10 | if __name__ == '__main__': 11 | print(wordcount("This cat jumped over this other cat!")) 12 | # prints {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} 13 | -------------------------------------------------------------------------------- /toolz/curried/exceptions.py: -------------------------------------------------------------------------------- 1 | import toolz 2 | 3 | 4 | __all__ = ['merge_with', 'merge'] 5 | 6 | 7 | @toolz.curry 8 | def merge_with(func, d, *dicts, **kwargs): 9 | return toolz.merge_with(func, d, *dicts, **kwargs) 10 | 11 | 12 | @toolz.curry 13 | def merge(d, *dicts, **kwargs): 14 | return toolz.merge(d, *dicts, **kwargs) 15 | 16 | 17 | merge_with.__doc__ = toolz.merge_with.__doc__ 18 | merge.__doc__ = toolz.merge.__doc__ 19 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 2 | version: 2 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.12" 7 | 8 | sphinx: 9 | configuration: doc/source/conf.py 10 | # Temporarily turning off to get docs build passing 11 | # fail_on_warning: true 12 | 13 | python: 14 | install: 15 | - requirements: doc/requirements.txt 16 | - method: pip 17 | path: . 18 | -------------------------------------------------------------------------------- /doc/source/install.rst: -------------------------------------------------------------------------------- 1 | Installation and Dependencies 2 | ============================= 3 | 4 | Toolz is pure Python and so is easily installable by the standard 5 | dependency manager ``pip``:: 6 | 7 | pip install toolz 8 | 9 | Toolz endeavors to be a very light dependency. It accomplishes this in 10 | three ways: 11 | 12 | 1. Toolz is pure Python 13 | 2. Toolz relies only on the standard library 14 | 3. Toolz simultaneously supports Python versions 3.9+ and PyPy 15 | -------------------------------------------------------------------------------- /bench/test_wordcount.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import * 2 | import os 3 | 4 | if not os.path.exists('bench/shakespeare.txt'): 5 | os.system('wget http://www.gutenberg.org/files/100/100-0.txt' 6 | ' -O bench/shakespeare.txt') 7 | 8 | 9 | def stem(word): 10 | """ Stem word to primitive form """ 11 | return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 12 | 13 | wordcount = comp(frequencies, map(stem), concat, map(str.split)) 14 | 15 | 16 | def test_shakespeare(): 17 | with open('bench/shakespeare.txt') as f: 18 | counts = wordcount(f) 19 | -------------------------------------------------------------------------------- /toolz/curried/operator.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | from toolz.functoolz import curry 4 | 5 | 6 | # Tests will catch if/when this needs updated 7 | IGNORE = { 8 | "__abs__", "__index__", "__inv__", "__invert__", "__neg__", "__not__", 9 | "__pos__", "_abs", "abs", "attrgetter", "index", "inv", "invert", 10 | "is_none", "is_not_none", "itemgetter", "neg", "not_", "pos", "truth" 11 | } 12 | locals().update( 13 | {name: f if name in IGNORE else curry(f) 14 | for name, f in vars(operator).items() if callable(f)} 15 | ) 16 | 17 | # Clean up the namespace. 18 | del IGNORE 19 | del curry 20 | del operator 21 | -------------------------------------------------------------------------------- /toolz/__init__.py: -------------------------------------------------------------------------------- 1 | from .itertoolz import * 2 | 3 | from .functoolz import * 4 | 5 | from .dicttoolz import * 6 | 7 | from .recipes import * 8 | 9 | from functools import partial, reduce 10 | 11 | sorted = sorted 12 | 13 | map = map 14 | 15 | filter = filter 16 | 17 | # Aliases 18 | comp = compose 19 | 20 | from . import curried, sandbox 21 | 22 | functoolz._sigs.create_signature_registry() 23 | 24 | 25 | def __getattr__(name): 26 | if name == "__version__": 27 | from importlib.metadata import version 28 | 29 | rv = version("toolz") 30 | globals()[name] = rv 31 | return rv 32 | raise AttributeError(f"module {__name__!r} has no attribute {name!r}") 33 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # Alternatively, consider using https://pre-commit.ci/ 2 | name: pre-commit checks 3 | 4 | on: 5 | pull_request: 6 | push: 7 | branches: [master] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | pre-commit: 14 | name: pre-commit-hooks 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v5 18 | with: 19 | fetch-depth: 0 20 | persist-credentials: false 21 | - uses: actions/setup-python@v6 22 | with: 23 | python-version: "3.13" 24 | - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 25 | env: 26 | SKIP: "no-commit-to-branch" 27 | -------------------------------------------------------------------------------- /bench/test_join.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import * 2 | 3 | try: 4 | xrange 5 | except NameError: 6 | xrange = range 7 | 8 | def burn(seq): 9 | for item in seq: 10 | pass 11 | 12 | 13 | small = [(i, str(i)) for i in range(100)] * 10 14 | big = pipe([110]*10000, map(range), concat, list) 15 | 16 | 17 | def test_many_to_many_large(): 18 | burn(join(get(0), small, identity, big)) 19 | 20 | 21 | def test_one_to_one_tiny(): 22 | A = list(range(20)) 23 | B = A[::2] + A[1::2][::-1] 24 | 25 | for i in xrange(50000): 26 | burn(join(identity, A, identity, B)) 27 | 28 | 29 | def test_one_to_many(): 30 | A = list(range(20)) 31 | B = pipe([20]*1000, map(range), concat, list) 32 | 33 | for i in xrange(100): 34 | burn(join(identity, A, identity, B)) 35 | -------------------------------------------------------------------------------- /examples/graph.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import * 2 | a, b, c, d, e, f, g = 'abcdefg' 3 | 4 | edges = [(a, b), (b, a), (a, c), (a, d), (d, a), (d, e), (e, f), (d, f), 5 | (f, d), (d, g), (e, g)] 6 | 7 | 8 | out_degrees = countby(first, edges) 9 | # {'a': 3, 'b': 1, 'd': 4, 'e': 2, 'f': 1} 10 | 11 | in_degrees = countby(second, edges) 12 | # {'a': 2, 'b': 1, 'c': 1, 'd': 2, 'e': 1, 'f': 2, 'g': 2} 13 | 14 | 15 | out_neighbors = valmap(comp(tuple, map(second)), 16 | groupby(first, edges)) 17 | # {'a': ('b', 'c', 'd'), 18 | # 'b': ('a',), 19 | # 'd': ('a', 'e', 'f', 'g'), 20 | # 'e': ('f', 'g'), 21 | # 'f': ('d',)} 22 | 23 | in_neighbors = valmap(comp(tuple, map(first)), 24 | groupby(second, edges)) 25 | # {'a': ('b', 'd'), 26 | # 'b': ('a',), 27 | # 'c': ('a',), 28 | # 'd': ('a', 'f'), 29 | # 'e': ('d',), 30 | # 'f': ('e', 'd'), 31 | # 'g': ('d', 'e')} 32 | -------------------------------------------------------------------------------- /toolz/tests/test_recipes.py: -------------------------------------------------------------------------------- 1 | from toolz import first, identity, countby, partitionby 2 | 3 | 4 | def iseven(x): 5 | return x % 2 == 0 6 | 7 | 8 | def test_countby(): 9 | assert countby(iseven, [1, 2, 3]) == {True: 1, False: 2} 10 | assert countby(len, ['cat', 'dog', 'mouse']) == {3: 2, 5: 1} 11 | assert countby(0, ('ab', 'ac', 'bc')) == {'a': 2, 'b': 1} 12 | 13 | 14 | def test_partitionby(): 15 | assert list(partitionby(identity, [])) == [] 16 | 17 | vowels = "aeiou" 18 | assert (list(partitionby(vowels.__contains__, "abcdefghi")) == 19 | [("a",), ("b", "c", "d"), ("e",), ("f", "g", "h"), ("i",)]) 20 | 21 | assert (list(map(first, 22 | partitionby(identity, 23 | [1, 1, 1, 2, 3, 3, 2, 2, 3]))) == 24 | [1, 2, 3, 2, 3]) 25 | 26 | assert ''.join(map(first, 27 | partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!' 28 | -------------------------------------------------------------------------------- /examples/fib.py: -------------------------------------------------------------------------------- 1 | # / 0 if i is 0 2 | # fib(i) = | 1 if i is 1 3 | # \ fib(i - 1) + fib(i - 2) otherwise 4 | 5 | 6 | def fib(n): 7 | """ Imperative definition of Fibonacci numbers """ 8 | a, b = 0, 1 9 | for i in range(n): 10 | a, b = b, a + b 11 | return a 12 | 13 | 14 | # This is intuitive but VERY slow 15 | def fib(n): 16 | """ Functional definition of Fibonacci numbers """ 17 | if n == 0 or n == 1: 18 | return n 19 | else: 20 | return fib(n - 1) + fib(n - 2) 21 | 22 | from toolz import memoize 23 | 24 | # Oh wait, it's fast again 25 | fib = memoize(fib) 26 | 27 | 28 | # Provide a cache with initial values to `memoize` 29 | @memoize(cache={0: 0, 1: 1}) 30 | def fib(n): 31 | """ Functional definition of Fibonacci numbers with initial terms cached. 32 | 33 | fib(0) == 0 34 | fib(1) == 1 35 | ... 36 | fib(n) == fib(n - 1) + fib(n - 2) 37 | """ 38 | return fib(n - 1) + fib(n - 2) 39 | -------------------------------------------------------------------------------- /toolz/sandbox/tests/test_parallel.py: -------------------------------------------------------------------------------- 1 | from toolz.sandbox.parallel import fold 2 | from toolz import reduce 3 | from operator import add 4 | from pickle import dumps, loads 5 | from multiprocessing import Pool 6 | 7 | 8 | # is comparison will fail between this and no_default 9 | no_default2 = loads(dumps('__no__default__')) 10 | 11 | 12 | def test_fold(): 13 | assert fold(add, range(10), 0) == reduce(add, range(10), 0) 14 | 15 | with Pool() as pool: 16 | assert fold(add, range(10), 0, map=pool.map) == reduce(add, range(10), 0) 17 | 18 | assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0) 19 | assert fold(add, range(10)) == fold(add, range(10), 0) 20 | 21 | def setadd(s, item): 22 | s = s.copy() 23 | s.add(item) 24 | return s 25 | 26 | assert fold(setadd, [1, 2, 3], set()) == {1, 2, 3} 27 | assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union) 28 | == {1, 2, 3}) 29 | 30 | assert fold(add, range(10), default=no_default2) == fold(add, range(10)) 31 | -------------------------------------------------------------------------------- /toolz/compatibility.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.warn("The toolz.compatibility module is no longer " 3 | "needed in Python 3 and has been deprecated. Please " 4 | "import these utilities directly from the standard library. " 5 | "This module will be removed in a future release.", 6 | category=DeprecationWarning, stacklevel=2) 7 | 8 | import operator 9 | import sys 10 | 11 | PY3 = sys.version_info[0] > 2 12 | PY34 = sys.version_info[0] == 3 and sys.version_info[1] == 4 13 | PYPY = hasattr(sys, 'pypy_version_info') and PY3 14 | 15 | __all__ = ('map', 'filter', 'range', 'zip', 'reduce', 'zip_longest', 16 | 'iteritems', 'iterkeys', 'itervalues', 'filterfalse', 17 | 'PY3', 'PY34', 'PYPY') 18 | 19 | 20 | map = map 21 | filter = filter 22 | range = range 23 | zip = zip 24 | from functools import reduce 25 | from itertools import zip_longest 26 | from itertools import filterfalse 27 | iteritems = operator.methodcaller('items') 28 | iterkeys = operator.methodcaller('keys') 29 | itervalues = operator.methodcaller('values') 30 | from collections.abc import Sequence 31 | -------------------------------------------------------------------------------- /doc/source/references.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | - `Underscore.js `__: A similar library for 5 | JavaScript 6 | - `Enumerable `__: A 7 | similar library for Ruby 8 | - `Clojure `__: A functional language whose 9 | standard library has several counterparts in ``toolz`` 10 | - `itertools `__: The 11 | Python standard library for iterator tools 12 | - `functools `__: The 13 | Python standard library for function tools 14 | - `Functional Programming HOWTO `__: 15 | The description of functional programming features from the official 16 | Python docs. 17 | 18 | Contemporary Projects 19 | --------------------- 20 | 21 | These projects also provide iterator and functional utilities within 22 | Python. Their functionality overlaps substantially with that of PyToolz. 23 | 24 | - `funcy `__ 25 | - `fn.py `__ 26 | - `more\_itertools `__ 27 | -------------------------------------------------------------------------------- /toolz/recipes.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from .itertoolz import frequencies, pluck, getter 3 | 4 | 5 | __all__ = ('countby', 'partitionby') 6 | 7 | 8 | def countby(key, seq): 9 | """ Count elements of a collection by a key function 10 | 11 | >>> countby(len, ['cat', 'mouse', 'dog']) 12 | {3: 2, 5: 1} 13 | 14 | >>> def iseven(x): return x % 2 == 0 15 | >>> countby(iseven, [1, 2, 3]) # doctest:+SKIP 16 | {True: 1, False: 2} 17 | 18 | See Also: 19 | groupby 20 | """ 21 | if not callable(key): 22 | key = getter(key) 23 | return frequencies(map(key, seq)) 24 | 25 | 26 | def partitionby(func, seq): 27 | """ Partition a sequence according to a function 28 | 29 | Partition `s` into a sequence of lists such that, when traversing 30 | `s`, every time the output of `func` changes a new list is started 31 | and that and subsequent items are collected into that list. 32 | 33 | >>> is_space = lambda c: c == " " 34 | >>> list(partitionby(is_space, "I have space")) 35 | [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')] 36 | 37 | >>> is_large = lambda x: x > 10 38 | >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5])) 39 | [(1, 2, 1), (99, 88, 33, 99), (-1, 5)] 40 | 41 | See also: 42 | partition 43 | groupby 44 | itertools.groupby 45 | """ 46 | return map(tuple, pluck(1, itertools.groupby(seq, key=func))) 47 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Matthew Rocklin 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | a. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | b. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | c. Neither the name of toolz nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 28 | DAMAGE. 29 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | [Matthew Rocklin](http://matthewrocklin.com) [@mrocklin](http://github.com/mrocklin/) 2 | 3 | [John Jacobsen](http://eigenhombre.com) [@eigenhombre](http://github.com/eigenhombre/) 4 | 5 | Erik Welch [@eriknw](https://github.com/eriknw/) 6 | 7 | John Crichton [@jcrichton](https://github.com/jcrichton/) 8 | 9 | Han Semaj [@microamp](https://github.com/microamp/) 10 | 11 | [Graeme Coupar](https://twitter.com/obmarg) [@obmarg](https://github.com/obmarg/) 12 | 13 | [Leonid Shvechikov](http://brainstorage.me/shvechikov) [@shvechikov](https://github.com/shvechikov) 14 | 15 | Lars Buitinck [@larsmans](http://github.com/larsmans) 16 | 17 | José Ricardo [@josericardo](https://github.com/josericardo) 18 | 19 | Tom Prince [@tomprince](https://github.com/tomprince) 20 | 21 | Bart van Merriënboer [@bartvm](https://github.com/bartvm) 22 | 23 | Nikolaos-Digenis Karagiannis [@digenis](https://github.com/digenis/) 24 | 25 | [Antonio Lima](https://twitter.com/themiurgo) [@themiurgo](https://github.com/themiurgo/) 26 | 27 | Joe Jevnik [@llllllllll](https://github.com/llllllllll) 28 | 29 | Rory Kirchner [@roryk](https://github.com/roryk) 30 | 31 | [Steven Cutting](http://steven-cutting.github.io) [@steven_cutting](https://github.com/steven-cutting) 32 | 33 | Aric Coady [@coady](https://github.com/coady) 34 | -------------------------------------------------------------------------------- /release-notes: -------------------------------------------------------------------------------- 1 | New in 0.4.2 2 | 3 | Removed intersection 4 | 5 | 6 | New in 0.5.3 7 | 8 | * get_in function 9 | * add itervalues, iterkeys, iteritems to compatibility 10 | * Add do function, remove side_effects from sandbox 11 | * Add juxt, partner to map 12 | * Performance improvements to merge_with 13 | * Errors from curried functions propagate upwards 14 | * keyfilter, valfilter 15 | * do 16 | 17 | New Authors: 18 | 19 | Graeme Coupar, @obmarg 20 | 21 | 22 | New in 0.6.0 23 | 24 | * memoize is curried by default 25 | * memoize support `key` keyword argument 26 | * Cleaned up issues in curried namespace 27 | * Unary functions memoize with just the single argument, not a tuple 28 | * Flattened directory structure 29 | * Add `pluck` function from underscore.js 30 | * Remove `sandbox.jackknife` 31 | 32 | 33 | New in 0.6.1 34 | 35 | 36 | * Python 3.4 support 37 | * New `join` operation 38 | * `join`, `groupby`, ... accept non-callable key functions. 39 | * Many speed improvements: 40 | * Cache method lookup 41 | * Faster `merge_sorted` without key 42 | * An additional round of tuning on `groupby` 43 | * Toolz builds on binstar build under mrocklin channel 44 | * Avoid generators, favor map. Assists in debugging. 45 | * Cleaner `curry` implementation 46 | * Fix serialization issues for `juxt`, `complement` 47 | * `reduceby` no longer requires `default` keyword argument 48 | * Fix bug in `get` where `get([1], coll)` used to return element rather than 49 | length-one tuple 50 | * `EqualityHashKey` added to sandbox 51 | * `juxt` returns a tuple, not a generator 52 | 53 | 54 | New Authors: 55 | 56 | Leonid Shvechikov, José Ricardo, Lars Buitinck, Tom Prince 57 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | 8 | permissions: 9 | contents: read 10 | 11 | jobs: 12 | test: 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: ["ubuntu-latest"] 18 | python-version: 19 | - "3.9" 20 | - "3.10" 21 | - "3.11" 22 | - "3.12" 23 | - "3.13" 24 | - "3.13t" 25 | - "3.14" 26 | - "3.14t" 27 | - "pypy-3.9" 28 | - "pypy-3.10" 29 | - "pypy-3.11" 30 | steps: 31 | - name: Checkout 32 | uses: actions/checkout@v5 33 | with: 34 | fetch-depth: 0 35 | persist-credentials: false 36 | - name: Set up Python 37 | uses: actions/setup-python@v6 38 | with: 39 | python-version: ${{ matrix.python-version }} 40 | - name: Install dependencies 41 | run: | 42 | python -m pip install --upgrade pip setuptools wheel 43 | pip install coverage pycodestyle pytest 44 | pip install -e . 45 | - name: Pytest 46 | run: | 47 | coverage run -m pytest --doctest-modules toolz/ 48 | pytest bench/ 49 | pycodestyle --ignore="E731,W503,W504,E402" --exclude=conf.py,tests,examples,bench -r --show-source . 50 | - name: Coverage 51 | if: (! contains(matrix.python-version, 'pypy')) 52 | run: | 53 | coverage xml 54 | - name: codecov 55 | if: (! contains(matrix.python-version, 'pypy')) 56 | uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7 # v5.5.1 57 | with: 58 | token: ${{ secrets.CODECOV_TOKEN }} 59 | -------------------------------------------------------------------------------- /toolz/tests/test_tlz.py: -------------------------------------------------------------------------------- 1 | import toolz 2 | 3 | 4 | def test_tlz(): 5 | import tlz 6 | tlz.curry 7 | tlz.functoolz.curry 8 | assert tlz.__package__ == 'tlz' 9 | assert tlz.__name__ == 'tlz' 10 | import tlz.curried 11 | assert tlz.curried.__package__ == 'tlz.curried' 12 | assert tlz.curried.__name__ == 'tlz.curried' 13 | tlz.curried.curry 14 | import tlz.curried.operator 15 | assert tlz.curried.operator.__package__ in (None, 'tlz.curried') 16 | assert tlz.curried.operator.__name__ == 'tlz.curried.operator' 17 | assert tlz.functoolz.__name__ == 'tlz.functoolz' 18 | m1 = tlz.functoolz 19 | import tlz.functoolz as m2 20 | assert m1 is m2 21 | import tlz.sandbox 22 | try: 23 | import tlzthisisabadname.curried 24 | 1/0 25 | except ImportError: 26 | pass 27 | try: 28 | import tlz.curry 29 | 1/0 30 | except ImportError: 31 | pass 32 | try: 33 | import tlz.badsubmodulename 34 | 1/0 35 | except ImportError: 36 | pass 37 | 38 | assert toolz.__package__ == 'toolz' 39 | assert toolz.curried.__package__ == 'toolz.curried' 40 | assert toolz.functoolz.__name__ == 'toolz.functoolz' 41 | try: 42 | import cytoolz 43 | assert cytoolz.__package__ == 'cytoolz' 44 | assert cytoolz.curried.__package__ == 'cytoolz.curried' 45 | assert cytoolz.functoolz.__name__ == 'cytoolz.functoolz' 46 | except ImportError: 47 | pass 48 | 49 | if hasattr(tlz, '__file__'): 50 | assert tlz.__file__ == toolz.__file__ 51 | if hasattr(tlz.functoolz, '__file__'): 52 | assert tlz.functoolz.__file__ == toolz.functoolz.__file__ 53 | 54 | assert tlz.pipe is toolz.pipe 55 | 56 | assert 'tlz' in tlz.__doc__ 57 | assert tlz.curried.__doc__ is not None 58 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | PyToolz API Documentation 3 | ========================= 4 | 5 | Toolz provides a set of utility functions for iterators, functions, 6 | and dictionaries. These functions interoperate well and form 7 | the building blocks of common data analytic operations. They extend the 8 | standard libraries `itertools` and `functools` and borrow heavily from the 9 | standard libraries of contemporary functional languages. 10 | 11 | Toolz provides a suite of functions which have the following functional virtues: 12 | 13 | - **Composable:** They interoperate due to their use of core data structures. 14 | - **Pure:** They don't change their inputs or rely on external state. 15 | - **Lazy:** They don't run until absolutely necessary, allowing them to support large streaming data sets. 16 | 17 | Toolz functions are *pragmatic*. They understand that most programmers 18 | have deadlines. 19 | 20 | - **Low Tech:** They're just functions, no syntax or magic tricks to learn 21 | - **Tuned:** They're profiled and optimized 22 | - **Serializable:** They support common solutions for parallel computing 23 | 24 | This gives developers the power to write *powerful* programs to solve *complex 25 | problems* with relatively *simple code*. This code can be *easy to understand* 26 | without sacrificing *performance*. Toolz enables this approach, commonly 27 | associated with functional programming, within a natural Pythonic style 28 | suitable for most developers. 29 | 30 | BSD licensed source code is available at http://github.com/pytoolz/toolz/ . 31 | 32 | 33 | Contents 34 | ^^^^^^^^ 35 | 36 | .. toctree:: 37 | :maxdepth: 2 38 | 39 | heritage.rst 40 | install.rst 41 | composition.rst 42 | purity.rst 43 | laziness.rst 44 | control.rst 45 | curry.rst 46 | streaming-analytics.rst 47 | parallelism.rst 48 | api.rst 49 | tips-and-tricks.rst 50 | references.rst 51 | -------------------------------------------------------------------------------- /doc/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | This page contains a comprehensive list of all functions within ``toolz``. 5 | Docstrings should provide sufficient understanding for any individual function. 6 | 7 | Itertoolz 8 | --------- 9 | 10 | .. currentmodule:: toolz.itertoolz 11 | 12 | .. autosummary:: 13 | accumulate 14 | concat 15 | concatv 16 | cons 17 | count 18 | diff 19 | drop 20 | first 21 | frequencies 22 | get 23 | groupby 24 | interleave 25 | interpose 26 | isdistinct 27 | isiterable 28 | iterate 29 | join 30 | last 31 | mapcat 32 | merge_sorted 33 | nth 34 | partition 35 | partition_all 36 | peek 37 | peekn 38 | pluck 39 | random_sample 40 | reduceby 41 | remove 42 | second 43 | sliding_window 44 | tail 45 | take 46 | take_nth 47 | topk 48 | unique 49 | 50 | Functoolz 51 | --------- 52 | 53 | .. currentmodule:: toolz.functoolz 54 | 55 | .. autosummary:: 56 | apply 57 | complement 58 | compose 59 | compose_left 60 | curry 61 | do 62 | excepts 63 | flip 64 | identity 65 | juxt 66 | memoize 67 | pipe 68 | thread_first 69 | thread_last 70 | 71 | Dicttoolz 72 | --------- 73 | 74 | .. currentmodule:: toolz.dicttoolz 75 | 76 | .. autosummary:: 77 | assoc 78 | assoc_in 79 | dissoc 80 | get_in 81 | itemfilter 82 | itemmap 83 | keyfilter 84 | keymap 85 | merge 86 | merge_with 87 | update_in 88 | valfilter 89 | valmap 90 | 91 | Recipes 92 | --------- 93 | 94 | .. currentmodule:: toolz.recipes 95 | 96 | .. autosummary:: 97 | countby 98 | partitionby 99 | 100 | Sandbox 101 | ------- 102 | 103 | .. currentmodule:: toolz.sandbox 104 | 105 | .. autosummary:: 106 | parallel.fold 107 | core.EqualityHashKey 108 | core.unzip 109 | 110 | 111 | Definitions 112 | ----------- 113 | 114 | .. automodule:: toolz.itertoolz 115 | :members: 116 | 117 | .. automodule:: toolz.recipes 118 | :members: 119 | 120 | .. automodule:: toolz.functoolz 121 | :members: 122 | 123 | .. automodule:: toolz.dicttoolz 124 | :members: 125 | 126 | .. automodule:: toolz.sandbox.core 127 | :members: 128 | 129 | .. automodule:: toolz.sandbox.parallel 130 | :members: 131 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "setuptools.build_meta" 3 | requires = [ 4 | "setuptools >=77", 5 | "setuptools-git-versioning >=2.0", 6 | ] 7 | 8 | [project] 9 | name = "toolz" 10 | dynamic = ["version"] 11 | description = "List processing tools and functional utilities" 12 | readme = "README.rst" 13 | requires-python = ">=3.9" 14 | license = "BSD-3-Clause" 15 | license-files = ["LICENSE.txt"] 16 | authors = [ 17 | { name = "PyToolz Contributors" }, 18 | ] 19 | maintainers = [ 20 | { name = "Erik Welch", email = "erik.n.welch@gmail.com" }, 21 | ] 22 | keywords = [ 23 | "functional", 24 | "utility", 25 | "itertools", 26 | "functools", 27 | ] 28 | classifiers = [ 29 | "Development Status :: 5 - Production/Stable", 30 | "Programming Language :: Python", 31 | "Programming Language :: Python :: 3", 32 | "Programming Language :: Python :: 3.9", 33 | "Programming Language :: Python :: 3.10", 34 | "Programming Language :: Python :: 3.11", 35 | "Programming Language :: Python :: 3.12", 36 | "Programming Language :: Python :: 3.13", 37 | "Programming Language :: Python :: 3.14", 38 | "Programming Language :: Python :: Implementation :: CPython", 39 | "Programming Language :: Python :: Implementation :: PyPy", 40 | ] 41 | 42 | [project.urls] 43 | homepage = "https://github.com/pytoolz/toolz" 44 | repository = "https://github.com/pytoolz/toolz" 45 | documentation = "https://toolz.readthedocs.io/en/latest/" 46 | changelog = "https://github.com/pytoolz/toolz/releases" 47 | 48 | [tool.setuptools-git-versioning] 49 | enabled = true 50 | dev_template = "{tag}+{ccount}.g{sha}" 51 | dirty_template = "{tag}+{ccount}.g{sha}.dirty" 52 | 53 | [tool.setuptools] 54 | packages = [ 55 | "toolz", 56 | "toolz.curried", 57 | "toolz.sandbox", 58 | "toolz.sandbox.tests", 59 | "toolz.tests", 60 | "tlz", 61 | ] 62 | 63 | [tool.coverage.run] 64 | source = ["toolz"] 65 | omit = [ 66 | "toolz/tests/test*", 67 | "toolz/*/tests/test*", 68 | "toolz/compatibility.py", 69 | ] 70 | 71 | [tool.pytest.ini_options] 72 | minversion = "6.0" 73 | testpaths = ["toolz"] 74 | xfail_strict = true 75 | addopts = [ 76 | "--strict-config", # Force error if config is mispelled 77 | "--strict-markers", # Force error if marker is mispelled (must be defined in config) 78 | "-ra", # Print summary of all fails/errors 79 | ] 80 | log_cli_level = "info" 81 | filterwarnings = [ 82 | "error", 83 | "ignore:The toolz.compatibility module is no longer needed:DeprecationWarning:", 84 | ] 85 | 86 | [tool.coverage.report] 87 | exclude_lines = [ 88 | "pragma: no cover", 89 | ] 90 | 91 | [tool.codespell] 92 | ignore-words-list = "juxt,lamda" 93 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_prs: false 3 | skip: [no-commit-to-branch] 4 | fail_fast: false 5 | default_language_version: 6 | python: python3 7 | repos: 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v6.0.0 10 | hooks: 11 | # Sanity checks 12 | - id: check-added-large-files 13 | - id: check-case-conflict 14 | - id: check-illegal-windows-names 15 | - id: check-merge-conflict 16 | # Checks based on file type 17 | - id: check-ast 18 | - id: check-toml 19 | - id: check-yaml 20 | # Detect mistakes 21 | - id: check-vcs-permalinks 22 | - id: debug-statements 23 | - id: destroyed-symlinks 24 | - id: detect-private-key 25 | - id: forbid-submodules 26 | # Automatic fixes 27 | - id: end-of-file-fixer 28 | - id: mixed-line-ending 29 | args: [--fix=lf] 30 | - id: trailing-whitespace 31 | - id: name-tests-test 32 | args: ["--pytest-test-first"] 33 | - repo: https://github.com/abravalheri/validate-pyproject 34 | rev: v0.24.1 35 | hooks: 36 | - id: validate-pyproject 37 | name: Validate pyproject.toml 38 | - repo: https://github.com/asottile/pyupgrade 39 | rev: v3.21.0 40 | hooks: 41 | - id: pyupgrade 42 | args: [--py39-plus] 43 | - repo: https://github.com/codespell-project/codespell 44 | rev: v2.4.1 45 | hooks: 46 | - id: codespell 47 | types_or: [python, markdown, rst, toml, yaml] 48 | additional_dependencies: 49 | - tomli; python_version<'3.11' 50 | files: ^(toolz|tlz|docs)/ 51 | - repo: https://github.com/rhysd/actionlint 52 | rev: v1.7.8 53 | hooks: 54 | - id: actionlint 55 | - repo: https://github.com/adrienverge/yamllint 56 | rev: v1.37.1 57 | hooks: 58 | - id: yamllint 59 | args: [-d, "{extends: default, rules: {line-length: disable}}"] 60 | - repo: https://github.com/woodruffw/zizmor-pre-commit 61 | rev: v1.15.2 62 | hooks: 63 | - id: zizmor 64 | - repo: https://github.com/pre-commit/pygrep-hooks 65 | rev: v1.10.0 66 | hooks: 67 | - id: rst-directive-colons 68 | - id: rst-inline-touching-normal 69 | - id: python-check-blanket-noqa 70 | - id: python-check-blanket-type-ignore 71 | - id: python-no-eval 72 | - id: python-no-log-warn 73 | - id: text-unicode-replacement-char 74 | - repo: https://github.com/python-jsonschema/check-jsonschema 75 | rev: 0.34.1 76 | hooks: 77 | - id: check-dependabot 78 | - id: check-github-workflows 79 | - id: check-readthedocs 80 | - repo: meta 81 | hooks: 82 | - id: check-hooks-apply 83 | - id: check-useless-excludes 84 | - repo: https://github.com/pre-commit/pre-commit-hooks 85 | rev: v6.0.0 86 | hooks: 87 | - id: no-commit-to-branch 88 | args: [--branch, master] 89 | -------------------------------------------------------------------------------- /doc/source/heritage.rst: -------------------------------------------------------------------------------- 1 | Heritage 2 | ======== 3 | 4 | While Python was originally intended as an imperative language 5 | [`Guido`_], it contains all elements necessary to support a rich set of features 6 | from the functional paradigm. In particular its core data structures, lazy 7 | iterators, and functions as first class objects can be combined to implement a 8 | common standard library of functions shared among many functional languages. 9 | 10 | This was first recognized and supported through the standard libraries 11 | itertools_ and `functools`_ which contain functions like ``permutations``, 12 | ``chain`` and ``partial`` to complement the standard ``map``, ``filter``, 13 | ``reduce`` already found in the core language. While these libraries contain 14 | substantial functionality they do not achieve the same level of adoption found 15 | in similar projects in other languages. This may be because they are 16 | incomplete and lack a number of commonly related functions like ``compose`` and 17 | ``groupby`` which often complement these core operations. 18 | 19 | A completion of this set of functions was first attempted in the projects 20 | `itertoolz`_ and `functoolz`_ (note the z). These libraries contained 21 | several functions that were absent in the standard itertools_ / `functools`_ 22 | libraries. The ``itertoolz``/``functoolz`` libraries were eventually merged 23 | into the monolithic ``toolz`` project described here. 24 | 25 | Most contemporary functional languages (Haskell, Scala, Clojure, ...) contain 26 | some variation of the functions found in ``toolz``. The ``toolz`` project 27 | generally adheres closely to the API found in the Clojure standard library (see 28 | `cheatsheet`_) and where disagreements occur that API usually dominates. The 29 | ``toolz`` API is also strongly affected by the principles of the Python 30 | language itself, and often makes deviations in order to be more approachable to 31 | that community. 32 | 33 | The development of a functional standard library within a popular imperative 34 | language is not unique. Similar projects have arisen in other 35 | imperative-by-design languages that contain the necessary elements to support a 36 | functional standard library. `Underscore.js `_ in JavaScript has attained 37 | notable popularity in the web community. ``LINQ`` in C# follows a similar 38 | philosophy but mimics declarative database languages rather than functional 39 | ones. `Enumerable `_ is is the closest project in Ruby. Other excellent projects 40 | also exist within the Python ecosystem, most notably `Fn.py `_ and `Funcy `_. 41 | 42 | .. _itertools: https://docs.python.org/library/itertools.html 43 | .. _functools: https://docs.python.org/library/functools.html 44 | .. _itertoolz: https://github.com/mrocklin/itertoolz 45 | .. _functoolz: https://github.com/mrocklin/functoolz 46 | .. _cheatsheet: https://clojure.org/cheatsheet 47 | .. _Guido: https://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html 48 | -------------------------------------------------------------------------------- /toolz/tests/test_signatures.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import toolz._signatures as _sigs 3 | from toolz._signatures import builtins, _is_valid_args, _is_partial_args 4 | 5 | 6 | def test_is_valid(check_valid=_is_valid_args, incomplete=False): 7 | orig_check_valid = check_valid 8 | check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs) 9 | 10 | assert check_valid(lambda x: None) is None 11 | 12 | f = builtins.abs 13 | assert check_valid(f) is incomplete 14 | assert check_valid(f, 1) 15 | assert check_valid(f, x=1) is False 16 | assert check_valid(f, 1, 2) is False 17 | 18 | f = builtins.complex 19 | assert check_valid(f) 20 | assert check_valid(f, 1) 21 | assert check_valid(f, real=1) 22 | assert check_valid(f, 1, 2) 23 | assert check_valid(f, 1, imag=2) 24 | assert check_valid(f, 1, real=2) is False 25 | assert check_valid(f, 1, 2, 3) is False 26 | assert check_valid(f, 1, 2, imag=3) is False 27 | 28 | f = builtins.int 29 | assert check_valid(f) 30 | assert check_valid(f, 1) 31 | assert check_valid(f, x=1) 32 | assert check_valid(f, 1, 2) 33 | assert check_valid(f, 1, base=2) 34 | assert check_valid(f, x=1, base=2) 35 | assert check_valid(f, base=2) is incomplete 36 | assert check_valid(f, 1, 2, 3) is False 37 | 38 | f = builtins.map 39 | assert check_valid(f) is incomplete 40 | assert check_valid(f, 1) is incomplete 41 | assert check_valid(f, 1, 2) 42 | assert check_valid(f, 1, 2, 3) 43 | assert check_valid(f, 1, 2, 3, 4) 44 | 45 | f = builtins.min 46 | assert check_valid(f) is incomplete 47 | assert check_valid(f, 1) 48 | assert check_valid(f, iterable=1) is False 49 | assert check_valid(f, 1, 2) 50 | assert check_valid(f, 1, 2, 3) 51 | assert check_valid(f, key=None) is incomplete 52 | assert check_valid(f, 1, key=None) 53 | assert check_valid(f, 1, 2, key=None) 54 | assert check_valid(f, 1, 2, 3, key=None) 55 | assert check_valid(f, key=None, default=None) is incomplete 56 | assert check_valid(f, 1, key=None, default=None) 57 | assert check_valid(f, 1, 2, key=None, default=None) is False 58 | assert check_valid(f, 1, 2, 3, key=None, default=None) is False 59 | 60 | f = builtins.range 61 | assert check_valid(f) is incomplete 62 | assert check_valid(f, 1) 63 | assert check_valid(f, 1, 2) 64 | assert check_valid(f, 1, 2, 3) 65 | assert check_valid(f, 1, 2, step=3) is False 66 | assert check_valid(f, 1, 2, 3, 4) is False 67 | 68 | f = functools.partial 69 | assert orig_check_valid(f, (), {}) is incomplete 70 | assert orig_check_valid(f, (), {'func': 1}) is incomplete 71 | assert orig_check_valid(f, (1,), {}) 72 | assert orig_check_valid(f, (1,), {'func': 1}) 73 | assert orig_check_valid(f, (1, 2), {}) 74 | 75 | 76 | def test_is_partial(): 77 | test_is_valid(check_valid=_is_partial_args, incomplete=True) 78 | 79 | 80 | def test_for_coverage(): # :) 81 | assert _sigs._is_arity(1, 1) is None 82 | assert _sigs._is_arity(1, all) 83 | assert _sigs._has_varargs(None) is None 84 | assert _sigs._has_keywords(None) is None 85 | assert _sigs._num_required_args(None) is None 86 | -------------------------------------------------------------------------------- /toolz/curried/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Alternate namespace for toolz such that all functions are curried 3 | 4 | Currying provides implicit partial evaluation of all functions 5 | 6 | Example: 7 | 8 | Get usually requires two arguments, an index and a collection 9 | >>> from toolz.curried import get 10 | >>> get(0, ('a', 'b')) 11 | 'a' 12 | 13 | When we use it in higher order functions we often want to pass a partially 14 | evaluated form 15 | >>> data = [(1, 2), (11, 22), (111, 222)] 16 | >>> list(map(lambda seq: get(0, seq), data)) 17 | [1, 11, 111] 18 | 19 | The curried version allows simple expression of partial evaluation 20 | >>> list(map(get(0), data)) 21 | [1, 11, 111] 22 | 23 | See Also: 24 | toolz.functoolz.curry 25 | """ 26 | import toolz 27 | from . import operator 28 | from toolz import ( 29 | apply, 30 | comp, 31 | complement, 32 | compose, 33 | compose_left, 34 | concat, 35 | concatv, 36 | count, 37 | curry, 38 | diff, 39 | first, 40 | flip, 41 | frequencies, 42 | identity, 43 | interleave, 44 | isdistinct, 45 | isiterable, 46 | juxt, 47 | last, 48 | memoize, 49 | merge_sorted, 50 | peek, 51 | pipe, 52 | second, 53 | thread_first, 54 | thread_last, 55 | ) 56 | from .exceptions import merge, merge_with 57 | 58 | accumulate = toolz.curry(toolz.accumulate) 59 | assoc = toolz.curry(toolz.assoc) 60 | assoc_in = toolz.curry(toolz.assoc_in) 61 | cons = toolz.curry(toolz.cons) 62 | countby = toolz.curry(toolz.countby) 63 | dissoc = toolz.curry(toolz.dissoc) 64 | do = toolz.curry(toolz.do) 65 | drop = toolz.curry(toolz.drop) 66 | excepts = toolz.curry(toolz.excepts) 67 | filter = toolz.curry(toolz.filter) 68 | get = toolz.curry(toolz.get) 69 | get_in = toolz.curry(toolz.get_in) 70 | groupby = toolz.curry(toolz.groupby) 71 | interpose = toolz.curry(toolz.interpose) 72 | itemfilter = toolz.curry(toolz.itemfilter) 73 | itemmap = toolz.curry(toolz.itemmap) 74 | iterate = toolz.curry(toolz.iterate) 75 | join = toolz.curry(toolz.join) 76 | keyfilter = toolz.curry(toolz.keyfilter) 77 | keymap = toolz.curry(toolz.keymap) 78 | map = toolz.curry(toolz.map) 79 | mapcat = toolz.curry(toolz.mapcat) 80 | nth = toolz.curry(toolz.nth) 81 | partial = toolz.curry(toolz.partial) 82 | partition = toolz.curry(toolz.partition) 83 | partition_all = toolz.curry(toolz.partition_all) 84 | partitionby = toolz.curry(toolz.partitionby) 85 | peekn = toolz.curry(toolz.peekn) 86 | pluck = toolz.curry(toolz.pluck) 87 | random_sample = toolz.curry(toolz.random_sample) 88 | reduce = toolz.curry(toolz.reduce) 89 | reduceby = toolz.curry(toolz.reduceby) 90 | remove = toolz.curry(toolz.remove) 91 | sliding_window = toolz.curry(toolz.sliding_window) 92 | sorted = toolz.curry(toolz.sorted) 93 | tail = toolz.curry(toolz.tail) 94 | take = toolz.curry(toolz.take) 95 | take_nth = toolz.curry(toolz.take_nth) 96 | topk = toolz.curry(toolz.topk) 97 | unique = toolz.curry(toolz.unique) 98 | update_in = toolz.curry(toolz.update_in) 99 | valfilter = toolz.curry(toolz.valfilter) 100 | valmap = toolz.curry(toolz.valmap) 101 | 102 | del exceptions 103 | del toolz 104 | -------------------------------------------------------------------------------- /doc/source/purity.rst: -------------------------------------------------------------------------------- 1 | Function Purity 2 | =============== 3 | 4 | We call a function *pure* if it meets the following criteria 5 | 6 | 1. It does not depend on hidden state, or equivalently it only depends on its 7 | inputs. 8 | 2. Evaluation of the function does not cause side effects 9 | 10 | In short the internal work of a pure function is isolated from the rest of the 11 | program. 12 | 13 | Examples 14 | -------- 15 | 16 | This is made clear by two examples: 17 | 18 | .. code:: 19 | 20 | # A pure function 21 | def min(x, y): 22 | if x < y: 23 | return x 24 | else: 25 | return y 26 | 27 | 28 | # An impure function 29 | exponent = 2 30 | 31 | def powers(L): 32 | for i in range(len(L)): 33 | L[i] = L[i]**exponent 34 | return L 35 | 36 | The function ``min`` is pure. It always produces the same result given the 37 | same inputs and it doesn't affect any external variable. 38 | 39 | The function ``powers`` is impure for two reasons. First, it depends on a 40 | global variable, ``exponent``, which can change [*]_. Second, it changes the 41 | input ``L`` which may have external state. Consider the following execution: 42 | 43 | .. code:: 44 | 45 | >>> data = [1, 2, 3] 46 | >>> result = powers(data) 47 | 48 | >>> print(result) 49 | [1, 4, 9] 50 | >>> print(data) 51 | [1, 4, 9] 52 | 53 | We see that ``powers`` affected the variable ``data``. Users of our function 54 | might be surprised by this. Usually we expect our inputs to be unchanged. 55 | 56 | Another problem occurs when we run this code in a different context: 57 | 58 | .. code:: 59 | 60 | >>> data = [1, 2, 3] 61 | >>> result = powers(data) 62 | >>> print(result) 63 | [1, 8, 27] 64 | 65 | When we give ``powers`` the same inputs we receive different outputs; how could 66 | this be? Someone must have changed the value of ``exponent`` to be ``3``, 67 | producing cubes rather than squares. At first this flexibility may seem like a 68 | feature and indeed in many cases it may be. The cost for this flexibility is 69 | that we need to keep track of the ``exponent`` variable separately whenever we 70 | use ``powers``. As we use more functions these extra variables become a 71 | burden. 72 | 73 | .. [*] A function depending on a global value can be pure if the value never 74 | changes, i.e. is immutable. 75 | 76 | State 77 | ----- 78 | 79 | Impure functions are often more efficient but also require that the programmer 80 | "keep track" of the state of several variables. Keeping track of this state 81 | becomes increasingly difficult as programs grow in size. By eschewing state 82 | programmers are able to conceptually scale out to solve much larger problems. 83 | The loss of performance is often negligible compared to the freedom to trust 84 | that your functions work as expected on your inputs. 85 | 86 | Maintaining state provides efficiency at the cost of surprises. Pure 87 | functions produce no surprises and so lighten the mental load of the 88 | programmer. 89 | 90 | 91 | Testing 92 | ------- 93 | 94 | As an added bonus, testing pure functions is substantially simpler than testing 95 | impure ones. A programmer who has tried to test functions that include 96 | randomness will know this first-hand. 97 | -------------------------------------------------------------------------------- /toolz/sandbox/parallel.py: -------------------------------------------------------------------------------- 1 | import functools 2 | from toolz.itertoolz import partition_all 3 | from toolz.utils import no_default 4 | 5 | 6 | def _reduce(func, seq, initial=None): 7 | if initial is None: 8 | return functools.reduce(func, seq) 9 | else: 10 | return functools.reduce(func, seq, initial) 11 | 12 | 13 | def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): 14 | """ 15 | Reduce without guarantee of ordered reduction. 16 | 17 | Parameters 18 | ---------- 19 | binops 20 | Associative operator. The associative property allows us to 21 | leverage a parallel map to perform reductions in parallel. 22 | 23 | 24 | inputs: 25 | 26 | ``binop`` - associative operator. The associative property allows us to 27 | leverage a parallel map to perform reductions in parallel. 28 | 29 | ``seq`` - a sequence to be aggregated 30 | ``default`` - an identity element like 0 for ``add`` or 1 for mul 31 | 32 | ``map`` - an implementation of ``map``. This may be parallel and 33 | determines how work is distributed. 34 | ``chunksize`` - Number of elements of ``seq`` that should be handled 35 | within a single function call 36 | ``combine`` - Binary operator to combine two intermediate results. 37 | If ``binop`` is of type (total, item) -> total 38 | then ``combine`` is of type (total, total) -> total 39 | Defaults to ``binop`` for common case of operators like add 40 | 41 | Fold chunks up the collection into blocks of size ``chunksize`` and then 42 | feeds each of these to calls to ``reduce``. This work is distributed 43 | with a call to ``map``, gathered back and then refolded to finish the 44 | computation. In this way ``fold`` specifies only how to chunk up data but 45 | leaves the distribution of this work to an externally provided ``map`` 46 | function. This function can be sequential or rely on multithreading, 47 | multiprocessing, or even distributed solutions. 48 | 49 | If ``map`` intends to serialize functions it should be prepared to accept 50 | and serialize lambdas. Note that the standard ``pickle`` module fails 51 | here. 52 | 53 | Example 54 | ------- 55 | 56 | >>> # Provide a parallel map to accomplish a parallel sum 57 | >>> from operator import add 58 | >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) 59 | 10 60 | """ 61 | assert chunksize > 1 62 | 63 | if combine is None: 64 | combine = binop 65 | 66 | chunks = partition_all(chunksize, seq) 67 | 68 | # Evaluate sequence in chunks via map 69 | if default == no_default: 70 | results = map( 71 | functools.partial(_reduce, binop), 72 | chunks) 73 | else: 74 | results = map( 75 | functools.partial(_reduce, binop, initial=default), 76 | chunks) 77 | 78 | results = list(results) # TODO: Support complete laziness 79 | 80 | if len(results) == 1: # Return completed result 81 | return results[0] 82 | else: # Recurse to reaggregate intermediate results 83 | return fold(combine, results, map=map, chunksize=chunksize) 84 | -------------------------------------------------------------------------------- /doc/source/tips-and-tricks.rst: -------------------------------------------------------------------------------- 1 | Tips and Tricks 2 | =============== 3 | 4 | Toolz functions can be combined to make functions that, while common, aren't 5 | a part of toolz's standard offerings. This section presents 6 | a few of these recipes. 7 | 8 | 9 | * .. function:: pick(allowlist, dictionary) 10 | 11 | Return a subset of the provided dictionary with keys contained in the 12 | allowlist. 13 | 14 | :: 15 | 16 | from toolz import keyfilter 17 | 18 | def pick(allowlist, d): 19 | return keyfilter(lambda k: k in allowlist, d) 20 | 21 | 22 | Example: 23 | 24 | >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4} 25 | >>> pick(['a', 'b'], alphabet) 26 | {'a': 1, 'b': 2} 27 | 28 | 29 | * .. function:: omit(denylist, dictionary) 30 | 31 | Return a subset of the provided dictionary with keys *not* contained in the 32 | denylist. 33 | 34 | :: 35 | 36 | from toolz import keyfilter 37 | 38 | def omit(denylist, d): 39 | return keyfilter(lambda k: k not in denylist, d) 40 | 41 | 42 | Example: 43 | 44 | >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4} 45 | >>> omit(['a', 'b'], alphabet) 46 | {'c': 3, 'd': 4} 47 | 48 | 49 | * .. function:: compact(iterable) 50 | 51 | Filter an iterable on "truthy" values. 52 | 53 | :: 54 | 55 | from toolz import filter 56 | 57 | def compact(iter): 58 | return filter(None, iter) 59 | 60 | 61 | Example: 62 | 63 | >>> results = [0, 1, 2, None, 3, False] 64 | >>> list(compact(results)) 65 | [1, 2, 3] 66 | 67 | * .. function:: keyjoin(leftkey, leftseq, rightkey, rightseq) 68 | 69 | Inner join two sequences of dictionaries on specified keys, merging matches with right value 70 | precedence. 71 | 72 | :: 73 | 74 | from itertools import starmap 75 | from toolz import join, merge 76 | 77 | def keyjoin(leftkey, leftseq, rightkey, rightseq): 78 | return starmap(merge, join(leftkey, leftseq, rightkey, rightseq)) 79 | 80 | 81 | Example: 82 | 83 | >>> people = [{'id': 0, 'name': 'Anonymous Guy', 'location': 'Unknown'}, 84 | {'id': 1, 'name': 'Karan', 'location': 'San Francisco'}, 85 | {'id': 2, 'name': 'Matthew', 'location': 'Oakland'}] 86 | >>> hobbies = [{'person_id': 1, 'hobby': 'Tennis'}, 87 | {'person_id': 1, 'hobby': 'Acting'}, 88 | {'person_id': 2, 'hobby': 'Biking'}] 89 | >>> list(keyjoin('id', people, 'person_id', hobbies)) 90 | [{'hobby': 'Tennis', 91 | 'id': 1, 92 | 'location': 'San Francisco', 93 | 'name': 'Karan', 94 | 'person_id': 1}, 95 | {'hobby': 'Acting', 96 | 'id': 1, 97 | 'location': 'San Francisco', 98 | 'name': 'Karan', 99 | 'person_id': 1}, 100 | {'hobby': 'Biking', 101 | 'id': 2, 102 | 'location': 'Oakland', 103 | 'name': 'Matthew', 104 | 'person_id': 2}] 105 | 106 | * .. function:: areidentical(\*seqs) 107 | 108 | Determine if sequences are identical element-wise. 109 | This lazily evaluates the sequences and stops as soon as the result 110 | is determined. 111 | 112 | :: 113 | 114 | from toolz import diff 115 | 116 | def areidentical(*seqs): 117 | return not any(diff(*seqs, default=object())) 118 | 119 | 120 | Example: 121 | 122 | >>> areidentical([1, 2, 3], (1, 2, 3)) 123 | True 124 | 125 | >>> areidentical([1, 2, 3], [1, 2]) 126 | False 127 | -------------------------------------------------------------------------------- /tlz/_build_tlz.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import types 3 | import toolz 4 | from importlib import import_module 5 | from importlib.machinery import ModuleSpec 6 | 7 | 8 | class TlzLoader: 9 | """ Finds and loads ``tlz`` modules when added to sys.meta_path""" 10 | 11 | def __init__(self): 12 | self.always_from_toolz = { 13 | toolz.pipe, 14 | } 15 | 16 | def _load_toolz(self, fullname): 17 | rv = {} 18 | package, dot, submodules = fullname.partition('.') 19 | try: 20 | module_name = ''.join(['cytoolz', dot, submodules]) 21 | rv['cytoolz'] = import_module(module_name) 22 | except ImportError: 23 | pass 24 | try: 25 | module_name = ''.join(['toolz', dot, submodules]) 26 | rv['toolz'] = import_module(module_name) 27 | except ImportError: 28 | pass 29 | if not rv: 30 | raise ImportError(fullname) 31 | return rv 32 | 33 | def find_module(self, fullname, path=None): # pragma: py3 no cover 34 | package, dot, submodules = fullname.partition('.') 35 | if package == 'tlz': 36 | return self 37 | 38 | def load_module(self, fullname): # pragma: py3 no cover 39 | if fullname in sys.modules: # pragma: no cover 40 | return sys.modules[fullname] 41 | spec = ModuleSpec(fullname, self) 42 | module = self.create_module(spec) 43 | sys.modules[fullname] = module 44 | self.exec_module(module) 45 | return module 46 | 47 | def find_spec(self, fullname, path, target=None): # pragma: no cover 48 | package, dot, submodules = fullname.partition('.') 49 | if package == 'tlz': 50 | return ModuleSpec(fullname, self) 51 | 52 | def create_module(self, spec): 53 | return types.ModuleType(spec.name) 54 | 55 | def exec_module(self, module): 56 | toolz_mods = self._load_toolz(module.__name__) 57 | fast_mod = toolz_mods.get('cytoolz') or toolz_mods['toolz'] 58 | slow_mod = toolz_mods.get('toolz') or toolz_mods['cytoolz'] 59 | module.__dict__.update(toolz.merge(fast_mod.__dict__, module.__dict__)) 60 | package = fast_mod.__package__ 61 | if package is not None: 62 | package, dot, submodules = package.partition('.') 63 | module.__package__ = ''.join(['tlz', dot, submodules]) 64 | if not module.__doc__: 65 | module.__doc__ = fast_mod.__doc__ 66 | 67 | # show file from toolz during introspection 68 | try: 69 | module.__file__ = slow_mod.__file__ 70 | except AttributeError: 71 | pass 72 | 73 | for k, v in fast_mod.__dict__.items(): 74 | tv = slow_mod.__dict__.get(k) 75 | try: 76 | hash(tv) 77 | except TypeError: 78 | tv = None 79 | if tv in self.always_from_toolz: 80 | module.__dict__[k] = tv 81 | elif ( 82 | isinstance(v, types.ModuleType) 83 | and v.__package__ == fast_mod.__name__ 84 | ): 85 | package, dot, submodules = v.__name__.partition('.') 86 | module_name = ''.join(['tlz', dot, submodules]) 87 | submodule = import_module(module_name) 88 | module.__dict__[k] = submodule 89 | 90 | 91 | tlz_loader = TlzLoader() 92 | sys.meta_path.append(tlz_loader) 93 | tlz_loader.exec_module(sys.modules['tlz']) 94 | -------------------------------------------------------------------------------- /.github/workflows/publish_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Builid Wheel and Release 2 | 3 | on: 4 | pull_request: 5 | workflow_dispatch: 6 | inputs: 7 | upload_dest: 8 | type: choice 9 | description: Upload wheels to 10 | options: 11 | - No Upload 12 | - PyPI 13 | - Test PyPI 14 | push: 15 | branches: 16 | - master 17 | tags: 18 | - '[0-9]+.[0-9]+.[0-9]+*' 19 | 20 | permissions: 21 | contents: read 22 | 23 | jobs: 24 | build-artifacts: 25 | runs-on: ubuntu-latest 26 | defaults: 27 | run: 28 | shell: bash -l {0} 29 | steps: 30 | - name: Checkout 31 | uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 32 | with: 33 | fetch-depth: 0 34 | persist-credentials: false 35 | - name: Set up Python 36 | uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 37 | with: 38 | python-version: "3.9" 39 | - name: Install build dependencies 40 | run: | 41 | python -m pip install --upgrade pip 42 | python -m pip install build twine 43 | - name: Build wheel and sdist 44 | run: python -m build 45 | - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 46 | with: 47 | name: releases 48 | path: dist 49 | if-no-files-found: error 50 | - name: Check with twine 51 | run: python -m twine check --strict dist/* 52 | 53 | upload-to-test-pypi: 54 | needs: build-artifacts 55 | runs-on: ubuntu-latest 56 | if: github.repository == 'pytoolz/toolz' && (startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' && github.event.inputs.upload_dest == 'Test PyPI') 57 | 58 | environment: 59 | name: test-pypi 60 | url: https://test.pypi.org/p/toolz 61 | permissions: 62 | id-token: write 63 | 64 | steps: 65 | - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 66 | with: 67 | name: releases 68 | path: dist 69 | - name: Publish to Test-PyPI 70 | uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 71 | with: 72 | repository-url: https://test.pypi.org/legacy/ 73 | print-hash: true 74 | verbose: true 75 | 76 | upload-to-pypi: 77 | needs: build-artifacts 78 | runs-on: ubuntu-latest 79 | if: github.repository == 'pytoolz/toolz' && startsWith(github.ref, 'refs/tags/') && (github.event_name != 'workflow_dispatch' || github.event.inputs.upload_dest == 'PyPI') 80 | 81 | environment: 82 | name: pypi 83 | url: https://pypi.org/p/toolz 84 | permissions: 85 | id-token: write 86 | attestations: write 87 | 88 | steps: 89 | - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0 90 | with: 91 | name: releases 92 | path: dist 93 | 94 | - name: Generate artifact attestation for sdist and wheel 95 | uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a # v3.0.0 96 | with: 97 | subject-path: "dist/toolz-*" 98 | 99 | - name: Publish to PyPI 100 | uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 101 | with: 102 | attestations: true 103 | print-hash: true 104 | verbose: true 105 | -------------------------------------------------------------------------------- /doc/source/laziness.rst: -------------------------------------------------------------------------------- 1 | Laziness 2 | ======== 3 | 4 | Lazy iterators evaluate only when necessary. They allow us to semantically 5 | manipulate large amounts of data while keeping very little of it actually in 6 | memory. They act like lists but don't take up space. 7 | 8 | 9 | Example - A Tale of Two Cities 10 | ------------------------------ 11 | 12 | We open `a file `_ containing 13 | the text of the classic text "A Tale of Two Cities" 14 | by Charles Dickens. 15 | 16 | .. code:: 17 | 18 | >>> book = open('tale-of-two-cities.txt') 19 | 20 | Much like a secondary school student, Python owns and opens the book without 21 | reading a single line of the text. The object ``book`` is a lazy iterator! 22 | Python will give us a line of the text only when we explicitly ask it to do so 23 | 24 | .. code:: 25 | 26 | >>> next(book) 27 | "It was the best of times," 28 | 29 | >>> next(book) 30 | "it was the worst of times," 31 | 32 | and so on. Each time we call ``next`` on ``book`` we burn through another line 33 | of the text and the ``book`` iterator marches slowly onwards through the text. 34 | 35 | 36 | Computation 37 | ----------- 38 | 39 | We can lazily operate on lazy iterators without doing any actual computation. 40 | For example let's read the book in upper case 41 | 42 | .. code:: 43 | 44 | >>> from toolz import map # toolz' map is lazy by default 45 | 46 | >>> loud_book = map(str.upper, book) 47 | 48 | >>> next(loud_book) 49 | "IT WAS THE AGE OF WISDOM," 50 | >>> next(loud_book) 51 | "IT WAS THE AGE OF FOOLISHNESS," 52 | 53 | It is as if we applied the function ``str.upper`` onto every line of the book; 54 | yet the first line completes instantaneously. Instead Python does the 55 | uppercasing work only when it becomes necessary, i.e. when you call ``next`` 56 | to ask for another line. 57 | 58 | 59 | Reductions 60 | ---------- 61 | 62 | You can operate on lazy iterators just as you would with lists, tuples, or 63 | sets. You can use them in for loops as in 64 | 65 | 66 | .. code:: 67 | 68 | for line in loud_book: 69 | ... 70 | 71 | You can instantiate them all into memory by calling them with the constructors 72 | ``list``, or ``tuple``. 73 | 74 | .. code:: 75 | 76 | loud_book = list(loud_book) 77 | 78 | Of course if they are very large then this might be unwise. Often we use 79 | laziness to avoid loading large datasets into memory at once. Many 80 | computations on large datasets don't require access to all of the data at a 81 | single time. In particular *reductions* (like sum) often take large amounts of 82 | sequential data (like [1, 2, 3, 4]) and produce much more manageable results 83 | (like 10) and can do so just by viewing the data a little bit at a time. For 84 | example we can count all of the letters in the Tale of Two Cities trivially 85 | using functions from ``toolz`` 86 | 87 | .. code:: 88 | 89 | >>> from toolz import concat, frequencies 90 | >>> letters = frequencies(concat(loud_book)) 91 | { 'A': 48036, 92 | 'B': 8402, 93 | 'C': 13812, 94 | 'D': 28000, 95 | 'E': 74624, 96 | ... 97 | 98 | In this case ``frequencies`` is a sort of reduction. At no time were more than 99 | a few hundred bytes of Tale of Two Cities necessarily in memory. We could just 100 | have easily done this computation on the entire Gutenberg collection or on 101 | Wikipedia. In this case we are limited by the size and speed of our hard drive 102 | and not by the capacity of our memory. 103 | -------------------------------------------------------------------------------- /doc/source/curry.rst: -------------------------------------------------------------------------------- 1 | 2 | Curry 3 | ===== 4 | 5 | Traditionally partial evaluation of functions is handled with the ``partial`` 6 | higher order function from ``functools``. Currying provides syntactic sugar. 7 | 8 | .. code:: 9 | 10 | >>> double = partial(mul, 2) # Partial evaluation 11 | >>> doubled = double(5) # Currying 12 | 13 | This syntactic sugar is valuable when developers chain several higher order 14 | functions together. 15 | 16 | Partial Evaluation 17 | ------------------ 18 | 19 | Often when composing smaller functions to form big ones we need partial 20 | evaluation. We do this in the word counting example: 21 | 22 | .. code:: 23 | 24 | >>> def stem(word): 25 | ... """ Stem word to primitive form """ 26 | ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 27 | 28 | >>> wordcount = compose(frequencies, partial(map, stem), str.split) 29 | 30 | Here we want to map the ``stem`` function onto each of the words produced by 31 | ``str.split``. We want a ``stem_many`` function that takes a list of words, 32 | stems them, and returns a list back. In full form this would look like the 33 | following: 34 | 35 | .. code:: 36 | 37 | >>> def stem_many(words): 38 | ... return map(stem, words) 39 | 40 | The ``partial`` function lets us create this function more naturally. 41 | 42 | .. code:: 43 | 44 | >>> stem_many = partial(map, stem) 45 | 46 | In general 47 | 48 | .. code:: 49 | 50 | >>> def f(x, y, z): 51 | ... # Do stuff with x, y, and z 52 | 53 | >>> # partially evaluate f with known values a and b 54 | >>> def g(z): 55 | ... return f(a, b, z) 56 | 57 | >>> # alternatively we could use `partial` 58 | >>> g = partial(f, a, b) 59 | 60 | Curry 61 | ----- 62 | 63 | In this context currying is just syntactic sugar for partial evaluation. A 64 | curried function partially evaluates if it does not receive enough arguments to 65 | compute a result. 66 | 67 | .. code:: 68 | 69 | >>> from toolz import curry 70 | 71 | >>> @curry # We can use curry as a decorator 72 | ... def mul(x, y): 73 | ... return x * y 74 | 75 | >>> double = mul(2) # mul didn't receive enough arguments to evaluate 76 | ... # so it holds onto the 2 and waits, returning a 77 | ... # partially evaluated function `double` 78 | 79 | >>> double(5) 80 | 10 81 | 82 | So if ``map`` was curried... 83 | 84 | .. code:: 85 | 86 | >>> map = curry(map) 87 | 88 | Then we could replace the ``partial`` with a function evaluation 89 | 90 | .. code:: 91 | 92 | >>> # wordcount = compose(frequencies, partial(map, stem), str.split) 93 | >>> wordcount = compose(frequencies, map(stem), str.split) 94 | 95 | In this particular example it's probably simpler to stick with ``partial``. 96 | Once ``partial`` starts occurring several times in your code it may be time to 97 | switch to the ``curried`` namespace. 98 | 99 | The Curried Namespace 100 | --------------------- 101 | 102 | All functions present in the ``toolz`` namespace are curried in the 103 | ``toolz.curried`` namespace. 104 | 105 | So you can exchange an import line like the following 106 | 107 | .. code:: 108 | 109 | >>> from toolz import * 110 | 111 | For the following 112 | 113 | .. code:: 114 | 115 | >>> from toolz.curried import * 116 | 117 | And all of your favorite ``toolz`` functions will curry automatically. We've 118 | also included curried versions of the standard Python higher order functions 119 | like ``map``, ``filter``, ``reduce`` so you'll get them too (whether you like 120 | it or not.) 121 | -------------------------------------------------------------------------------- /doc/source/parallelism.rst: -------------------------------------------------------------------------------- 1 | Parallelism 2 | =========== 3 | 4 | PyToolz tries to support other parallel processing libraries. It does this 5 | by ensuring easy serialization of ``toolz`` functions and providing 6 | architecture-agnostic parallel algorithms. 7 | 8 | In practice ``toolz`` is developed against ``multiprocessing`` and 9 | ``ipyparallel``. 10 | 11 | 12 | Serialization 13 | ------------- 14 | 15 | Multiprocessing or distributed computing requires the transmission of functions 16 | between different processes or computers. This is done through serializing the 17 | function into text, sending that text over a wire, and deserializing the text 18 | back into a function. To the extent possible PyToolz functions are compatible 19 | with the standard serialization library ``pickle``. 20 | 21 | The ``pickle`` library often fails for complex functions including lambdas, 22 | closures, and class methods. When this occurs we recommend the alternative 23 | serialization library ``dill``. 24 | 25 | 26 | Example with parallel map 27 | ------------------------- 28 | 29 | Most parallel processing tasks may be significantly accelerated using only a 30 | parallel map operation. A number of high quality parallel map operations exist 31 | in other libraries, notably ``multiprocessing``, ``ipyparallel``, and 32 | ``threading`` (if your operation is not processor bound). 33 | 34 | In the example below we extend our wordcounting solution with a parallel map. 35 | We show how one can progress in development from sequential, to 36 | multiprocessing, to distributed computation all with the same domain code. 37 | 38 | 39 | .. code:: 40 | 41 | from toolz.curried import map 42 | from toolz import frequencies, compose, concat, merge_with 43 | 44 | def stem(word): 45 | """ Stem word to primitive form 46 | 47 | >>> stem("Hello!") 48 | 'hello' 49 | """ 50 | return word.lower().rstrip(",.!)-*_?:;$'-\"").lstrip("-*'\"(_$'") 51 | 52 | 53 | wordcount = compose(frequencies, map(stem), concat, map(str.split), open) 54 | 55 | if __name__ == '__main__': 56 | # Filenames for thousands of books from which we'd like to count words 57 | filenames = ['Book_%d.txt'%i for i in range(10000)] 58 | 59 | # Start with sequential map for development 60 | # pmap = map 61 | 62 | # Advance to Multiprocessing map for heavy computation on single machine 63 | # from multiprocessing import Pool 64 | # p = Pool(8) 65 | # pmap = p.map 66 | 67 | # Finish with distributed parallel map for big data 68 | from ipyparallel import Client 69 | p = Client()[:] 70 | pmap = p.map_sync 71 | 72 | total = merge_with(sum, pmap(wordcount, filenames)) 73 | 74 | This smooth transition is possible because 75 | 76 | 1. The ``map`` abstraction is a simple function call and so can be replaced. 77 | By contrast, this transformation would be difficult if we had written our code with a 78 | for loop or list comprehension. 79 | 2. The operation ``wordcount`` is separate from the parallel solution. 80 | 3. The task is embarrassingly parallel, needing only a very simple parallel 81 | strategy. Fortunately this is the common case. 82 | 83 | 84 | Parallel Algorithms 85 | ------------------- 86 | 87 | PyToolz does not implement parallel processing systems. It does however 88 | provide parallel algorithms that can extend existing parallel systems. Our 89 | general solution is to build algorithms that operate around a user-supplied 90 | parallel map function. 91 | 92 | In particular we provide a parallel ``fold`` in ``toolz.sandbox.parallel.fold``. 93 | This fold can work equally well with ``multiprocessing.Pool.map``, 94 | ``threading.Pool.map``, or ``ipyparallel``'s ``map_async``. 95 | -------------------------------------------------------------------------------- /doc/source/composition.rst: -------------------------------------------------------------------------------- 1 | Composability 2 | ============= 3 | 4 | Toolz functions interoperate because they consume and produce only a small 5 | set of common, core data structures. Each ``toolz`` function consumes 6 | just iterables, dictionaries, and functions and each ``toolz`` function produces 7 | just iterables, dictionaries, and functions. This standardized interface 8 | enables us to compose several general purpose functions to solve custom 9 | problems. 10 | 11 | Standard interfaces enable us to use many tools together, even if those tools 12 | were not designed with each other in mind. We call this "using together" 13 | composition. 14 | 15 | 16 | Standard Interface 17 | ------------------ 18 | 19 | This is best explained by two examples; the automobile industry and LEGOs. 20 | 21 | Autos 22 | ^^^^^ 23 | 24 | Automobile pieces are not widely composable because they do not adhere to a 25 | standard interface. You can't connect a Porsche engine to the body of a 26 | Volkswagen Beetle but include the safety features of your favorite luxury car. 27 | As a result when something breaks you need to find a specialist who understands 28 | exactly your collection of components and, depending on the popularity of your 29 | model, replacement parts may be difficult to find. While the customization 30 | provides a number of efficiencies important for automobiles, it limits the 31 | ability of downstream tinkerers. This ability for future developers to tinker 32 | is paramount in good software design. 33 | 34 | Lego 35 | ^^^^ 36 | 37 | Contrast this with Lego toys. With Lego you *can* connect a rocket engine and 38 | skis to a rowboat. This is a perfectly natural thing to do because every piece 39 | adheres to a simple interface - those simple and regular 5mm circular bumps. 40 | This freedom to connect pieces at will lets children unleash their imagination 41 | in such varied ways (like going arctic shark hunting with a rocket-ski-boat). 42 | 43 | The abstractions in programming make it far more like Lego than like building 44 | cars. This breaks down a little when we start to be constrained by performance 45 | or memory issues but this affects only a very small fraction of applications. 46 | Most of the time we have the freedom to operate in the Lego model if we choose 47 | to give up customization and embrace simple core standards. 48 | 49 | 50 | Other Standard Interfaces 51 | ------------------------- 52 | 53 | The Toolz project builds off of a standard interface -- this choice is not 54 | unique. Other standard interfaces exist and provide immeasurable benefit to 55 | their application areas. 56 | 57 | The NumPy array serves as a foundational object for numeric and scientific 58 | computing within Python. The ability of any project to consume and produce 59 | NumPy arrays is largely responsible for the broad success of the 60 | various SciPy projects. We see similar development today with the Pandas 61 | DataFrame. 62 | 63 | The UNIX toolset relies on files and streams of text. 64 | 65 | JSON emerged as the standard interface for communication over the web. The 66 | virtues of standardization become glaringly apparent when we contrast JSON with 67 | its predecessor, XML. XML was designed to be extensible/customizable, allowing 68 | each application to design its own interface. This resulted in a sea of 69 | difficult to understand custom data languages that failed to develop a common 70 | analytic and data processing infrastructure. In contrast JSON is very 71 | restrictive and allows only a fixed set of data structures, namely lists, 72 | dictionaries, numbers, strings. Fortunately this set is common to most modern 73 | languages and so JSON is extremely widely supported, perhaps falling second 74 | only to CSV. 75 | 76 | Standard interfaces permeate physical reality as well. Examples range 77 | from supra-national currencies to drill bits and electrical circuitry. In all 78 | cases the interoperation that results becomes a defining and invaluable feature 79 | of each solution. 80 | -------------------------------------------------------------------------------- /toolz/tests/test_curried.py: -------------------------------------------------------------------------------- 1 | import toolz 2 | import toolz.curried 3 | from toolz.curried import (take, first, second, sorted, merge_with, reduce, 4 | merge, operator as cop) 5 | from collections import defaultdict 6 | from importlib import import_module 7 | from operator import add 8 | 9 | 10 | def test_take(): 11 | assert list(take(2)([1, 2, 3])) == [1, 2] 12 | 13 | 14 | def test_first(): 15 | assert first is toolz.itertoolz.first 16 | 17 | 18 | def test_merge(): 19 | assert merge(factory=lambda: defaultdict(int))({1: 1}) == {1: 1} 20 | assert merge({1: 1}) == {1: 1} 21 | assert merge({1: 1}, factory=lambda: defaultdict(int)) == {1: 1} 22 | 23 | 24 | def test_merge_with(): 25 | assert merge_with(sum)({1: 1}, {1: 2}) == {1: 3} 26 | 27 | 28 | def test_merge_with_list(): 29 | assert merge_with(sum, [{'a': 1}, {'a': 2}]) == {'a': 3} 30 | 31 | 32 | def test_sorted(): 33 | assert sorted(key=second)([(1, 2), (2, 1)]) == [(2, 1), (1, 2)] 34 | 35 | 36 | def test_reduce(): 37 | assert reduce(add)((1, 2, 3)) == 6 38 | 39 | 40 | def test_module_name(): 41 | assert toolz.curried.__name__ == 'toolz.curried' 42 | 43 | 44 | def should_curry(func): 45 | if not callable(func) or isinstance(func, toolz.curry): 46 | return False 47 | nargs = toolz.functoolz.num_required_args(func) 48 | if nargs is None or nargs > 1: 49 | return True 50 | return nargs == 1 and toolz.functoolz.has_keywords(func) 51 | 52 | 53 | def test_curried_operator(): 54 | import operator 55 | 56 | for k, v in vars(cop).items(): 57 | if not callable(v): 58 | continue 59 | 60 | if not isinstance(v, toolz.curry): 61 | try: 62 | # Make sure it is unary 63 | v(1) 64 | except TypeError: 65 | try: 66 | v('x') 67 | except TypeError: 68 | pass 69 | else: 70 | continue 71 | raise AssertionError( 72 | 'toolz.curried.operator.%s is not curried!' % k, 73 | ) 74 | assert should_curry(getattr(operator, k)) == isinstance(v, toolz.curry), k 75 | 76 | # Make sure this isn't totally empty. 77 | assert len(set(vars(cop)) & {'add', 'sub', 'mul'}) == 3 78 | 79 | 80 | def test_curried_namespace(): 81 | exceptions = import_module('toolz.curried.exceptions') 82 | namespace = {} 83 | 84 | 85 | def curry_namespace(ns): 86 | return { 87 | name: toolz.curry(f) if should_curry(f) else f 88 | for name, f in ns.items() if '__' not in name 89 | } 90 | 91 | from_toolz = curry_namespace(vars(toolz)) 92 | from_exceptions = curry_namespace(vars(exceptions)) 93 | namespace.update(toolz.merge(from_toolz, from_exceptions)) 94 | 95 | namespace = toolz.valfilter(callable, namespace) 96 | curried_namespace = toolz.valfilter(callable, toolz.curried.__dict__) 97 | 98 | if namespace != curried_namespace: 99 | missing = set(namespace) - set(curried_namespace) 100 | if missing: 101 | raise AssertionError('There are missing functions in toolz.curried:\n %s' 102 | % ' \n'.join(sorted(missing))) 103 | extra = set(curried_namespace) - set(namespace) 104 | if extra: 105 | raise AssertionError('There are extra functions in toolz.curried:\n %s' 106 | % ' \n'.join(sorted(extra))) 107 | unequal = toolz.merge_with(list, namespace, curried_namespace) 108 | unequal = toolz.valfilter(lambda x: x[0] != x[1], unequal) 109 | messages = [] 110 | for name, (orig_func, auto_func) in sorted(unequal.items()): 111 | if name in from_exceptions: 112 | messages.append('%s should come from toolz.curried.exceptions' % name) 113 | elif should_curry(getattr(toolz, name)): 114 | messages.append('%s should be curried from toolz' % name) 115 | else: 116 | messages.append('%s should come from toolz and NOT be curried' % name) 117 | raise AssertionError('\n'.join(messages)) 118 | -------------------------------------------------------------------------------- /toolz/sandbox/tests/test_core.py: -------------------------------------------------------------------------------- 1 | from toolz import curry, unique, first, take 2 | from toolz.sandbox.core import EqualityHashKey, unzip 3 | from itertools import count, repeat 4 | 5 | def test_EqualityHashKey_default_key(): 6 | EqualityHashDefault = curry(EqualityHashKey, None) 7 | L1 = [1] 8 | L2 = [2] 9 | data1 = [L1, L1, L2, [], [], [1], [2], {}, ()] 10 | set1 = set(map(EqualityHashDefault, data1)) 11 | set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()])) 12 | assert set1 == set2 13 | assert len(set1) == 5 14 | 15 | # Test that ``EqualityHashDefault(item)`` is distinct from ``item`` 16 | T0 = () 17 | T1 = (1,) 18 | data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)])) 19 | data2.extend([T0, T1, (), (1,)]) 20 | set3 = set(data2) 21 | assert set3 == {(), (1,), EqualityHashDefault(()), 22 | EqualityHashDefault((1,))} 23 | assert len(set3) == 4 24 | assert EqualityHashDefault(()) in set3 25 | assert EqualityHashDefault((1,)) in set3 26 | 27 | # Miscellaneous 28 | E1 = EqualityHashDefault(L1) 29 | E2 = EqualityHashDefault(L2) 30 | assert str(E1) == '=[1]=' 31 | assert repr(E1) == '=[1]=' 32 | assert E1 != E2 33 | assert not (E1 == E2) 34 | assert E1 == EqualityHashDefault(L1) 35 | assert not (E1 != EqualityHashDefault(L1)) 36 | assert E1 != L1 37 | assert not (E1 == L1) 38 | 39 | 40 | def test_EqualityHashKey_callable_key(): 41 | # Common simple hash key functions. 42 | EqualityHashLen = curry(EqualityHashKey, len) 43 | EqualityHashType = curry(EqualityHashKey, type) 44 | EqualityHashId = curry(EqualityHashKey, id) 45 | EqualityHashFirst = curry(EqualityHashKey, first) 46 | data1 = [[], [1], (), (1,), {}, {1: 2}] 47 | data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}] 48 | assert list(unique(data1*3, key=EqualityHashLen)) == data1 49 | assert list(unique(data2*3, key=EqualityHashLen)) == data2 50 | assert list(unique(data1*3, key=EqualityHashType)) == data1 51 | assert list(unique(data2*3, key=EqualityHashType)) == data2 52 | assert list(unique(data1*3, key=EqualityHashId)) == data1 53 | assert list(unique(data2*3, key=EqualityHashId)) == data2 54 | assert list(unique(data2*3, key=EqualityHashFirst)) == data2 55 | 56 | 57 | def test_EqualityHashKey_index_key(): 58 | d1 = {'firstname': 'Alice', 'age': 21, 'data': {}} 59 | d2 = {'firstname': 'Alice', 'age': 34, 'data': {}} 60 | d3a = {'firstname': 'Bob', 'age': 56, 'data': {}} 61 | d3b = {'firstname': 'Bob', 'age': 56, 'data': {}} 62 | EqualityHashFirstname = curry(EqualityHashKey, 'firstname') 63 | assert list(unique(3*[d1, d2, d3a, d3b], 64 | key=EqualityHashFirstname)) == [d1, d2, d3a] 65 | EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age']) 66 | assert list(unique(3*[d1, d2, d3a, d3b], 67 | key=EqualityHashFirstnameAge)) == [d1, d2, d3a] 68 | list1 = [0] * 10 69 | list2 = [0] * 100 70 | list3a = [1] * 10 71 | list3b = [1] * 10 72 | EqualityHash0 = curry(EqualityHashKey, 0) 73 | assert list(unique(3*[list1, list2, list3a, list3b], 74 | key=EqualityHash0)) == [list1, list2, list3a] 75 | 76 | 77 | def test_unzip(): 78 | def _to_lists(seq, n=10): 79 | """iter of iters -> finite list of finite lists 80 | """ 81 | def initial(s): 82 | return list(take(n, s)) 83 | 84 | return initial(map(initial, seq)) 85 | 86 | def _assert_initial_matches(a, b, n=10): 87 | assert list(take(n, a)) == list(take(n, b)) 88 | 89 | # Unzips a simple list correctly 90 | assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \ 91 | == [['a', 'b', 'c'], [1, 2, 3]] 92 | 93 | # Can handle a finite number of infinite iterators (the naive unzip 94 | # implementation `zip(*args)` implementation fails on this example). 95 | a, b, c = unzip(zip(count(1), repeat(0), repeat(1))) 96 | _assert_initial_matches(a, count(1)) 97 | _assert_initial_matches(b, repeat(0)) 98 | _assert_initial_matches(c, repeat(1)) 99 | 100 | # Sensibly handles empty input 101 | assert list(unzip(zip([]))) == [] 102 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Toolz 2 | ===== 3 | 4 | |Build Status| |Coverage Status| |Version Status| 5 | 6 | A set of utility functions for iterators, functions, and dictionaries. 7 | 8 | See the PyToolz documentation at https://toolz.readthedocs.io 9 | 10 | LICENSE 11 | ------- 12 | 13 | New BSD. See `License File `__. 14 | 15 | Install 16 | ------- 17 | 18 | ``toolz`` is on the Python Package Index (PyPI): 19 | 20 | :: 21 | 22 | pip install toolz 23 | 24 | Structure and Heritage 25 | ---------------------- 26 | 27 | ``toolz`` is implemented in three parts: 28 | 29 | |literal itertoolz|_, for operations on iterables. Examples: ``groupby``, 30 | ``unique``, ``interpose``, 31 | 32 | |literal functoolz|_, for higher-order functions. Examples: ``memoize``, 33 | ``curry``, ``compose``, 34 | 35 | |literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, 36 | ``update-in``, ``merge``. 37 | 38 | .. |literal itertoolz| replace:: ``itertoolz`` 39 | .. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py 40 | 41 | .. |literal functoolz| replace:: ``functoolz`` 42 | .. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py 43 | 44 | .. |literal dicttoolz| replace:: ``dicttoolz`` 45 | .. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py 46 | 47 | These functions come from the legacy of functional languages for list 48 | processing. They interoperate well to accomplish common complex tasks. 49 | 50 | Read our `API 51 | Documentation `__ for 52 | more details. 53 | 54 | Example 55 | ------- 56 | 57 | This builds a standard wordcount function from pieces within ``toolz``: 58 | 59 | .. code:: python 60 | 61 | >>> def stem(word): 62 | ... """ Stem word to primitive form """ 63 | ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 64 | 65 | >>> from toolz import compose, frequencies 66 | >>> from toolz.curried import map 67 | >>> wordcount = compose(frequencies, map(stem), str.split) 68 | 69 | >>> sentence = "This cat jumped over this other cat!" 70 | >>> wordcount(sentence) 71 | {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} 72 | 73 | Dependencies 74 | ------------ 75 | 76 | ``toolz`` supports Python 3.9+ with a common codebase. 77 | It is pure Python and requires no dependencies beyond the standard 78 | library. 79 | 80 | It is, in short, a lightweight dependency. 81 | 82 | 83 | CyToolz 84 | ------- 85 | 86 | The ``toolz`` project has been reimplemented in `Cython `__. 87 | The ``cytoolz`` project is a drop-in replacement for the Pure Python 88 | implementation. 89 | See `CyToolz GitHub Page `__ for more 90 | details. 91 | 92 | See Also 93 | -------- 94 | 95 | - `Underscore.js `__: A similar library for 96 | JavaScript 97 | - `Enumerable `__: A 98 | similar library for Ruby 99 | - `Clojure `__: A functional language whose 100 | standard library has several counterparts in ``toolz`` 101 | - `itertools `__: The 102 | Python standard library for iterator tools 103 | - `functools `__: The 104 | Python standard library for function tools 105 | 106 | Project Status 107 | -------------- 108 | 109 | **This project is alive but inactive.** 110 | 111 | The original maintainers have mostly moved on to other endeavors. We're still 112 | around for critical bug fixes, Python version bumps, and security issues and 113 | will commit to keeping the project alive (it's highly depended upon). 114 | However, beyond that we don't plan to spend much time reviewing contributions. 115 | We view Toolz as mostly complete. 116 | 117 | We encourage enthusiasts to innovate in new and wonderful places 🚀 118 | 119 | .. |Build Status| image:: https://github.com/pytoolz/toolz/actions/workflows/test.yml/badge.svg?branch=master 120 | :target: https://github.com/pytoolz/toolz/actions 121 | .. |Coverage Status| image:: https://codecov.io/gh/pytoolz/toolz/graph/badge.svg?token=4ZFc9dwKqY 122 | :target: https://codecov.io/gh/pytoolz/toolz 123 | .. |Version Status| image:: https://badge.fury.io/py/toolz.svg 124 | :target: https://badge.fury.io/py/toolz 125 | -------------------------------------------------------------------------------- /toolz/sandbox/core.py: -------------------------------------------------------------------------------- 1 | from toolz.itertoolz import getter, cons, pluck 2 | from itertools import tee, starmap 3 | 4 | 5 | # See #166: https://github.com/pytoolz/toolz/issues/166 6 | # See #173: https://github.com/pytoolz/toolz/pull/173 7 | class EqualityHashKey: 8 | """ Create a hash key that uses equality comparisons between items. 9 | 10 | This may be used to create hash keys for otherwise unhashable types: 11 | 12 | >>> from toolz import curry 13 | >>> EqualityHashDefault = curry(EqualityHashKey, None) 14 | >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP 15 | {=[]=, =()=, =[1]=} 16 | 17 | **Caution:** adding N ``EqualityHashKey`` items to a hash container 18 | may require O(N**2) operations, not O(N) as for typical hashable types. 19 | Therefore, a suitable key function such as ``tuple`` or ``frozenset`` 20 | is usually preferred over using ``EqualityHashKey`` if possible. 21 | 22 | The ``key`` argument to ``EqualityHashKey`` should be a function or 23 | index that returns a hashable object that effectively distinguishes 24 | unequal items. This helps avoid the poor scaling that occurs when 25 | using the default key. For example, the above example can be improved 26 | by using a key function that distinguishes items by length or type: 27 | 28 | >>> EqualityHashLen = curry(EqualityHashKey, len) 29 | >>> EqualityHashType = curry(EqualityHashKey, type) # this works too 30 | >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP 31 | {=[]=, =()=, =[1]=} 32 | 33 | ``EqualityHashKey`` is convenient to use when a suitable key function 34 | is complicated or unavailable. For example, the following returns all 35 | unique values based on equality: 36 | 37 | >>> from toolz import unique 38 | >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}] 39 | >>> list(unique(vals, key=EqualityHashDefault)) 40 | [[], (), [1], [2], {}] 41 | 42 | **Warning:** don't change the equality value of an item already in a hash 43 | container. Unhashable types are unhashable for a reason. For example: 44 | 45 | >>> L1 = [1] ; L2 = [2] 46 | >>> s = set(map(EqualityHashDefault, [L1, L2])) 47 | >>> s # doctest: +SKIP 48 | {=[1]=, =[2]=} 49 | 50 | >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items! 51 | >>> s # doctest: +SKIP 52 | {=[2]=, =[2]=} 53 | 54 | Although this may appear problematic, immutable data types is a common 55 | idiom in functional programming, and``EqualityHashKey`` easily allows 56 | the same idiom to be used by convention rather than strict requirement. 57 | 58 | See Also: 59 | identity 60 | """ 61 | __slots__ = ['item', 'key'] 62 | _default_hashkey = '__default__hashkey__' 63 | 64 | def __init__(self, key, item): 65 | if key is None: 66 | self.key = self._default_hashkey 67 | elif not callable(key): 68 | self.key = getter(key) 69 | else: 70 | self.key = key 71 | self.item = item 72 | 73 | def __hash__(self): 74 | if self.key == self._default_hashkey: 75 | val = self.key 76 | else: 77 | val = self.key(self.item) 78 | return hash(val) 79 | 80 | def __eq__(self, other): 81 | try: 82 | return (self._default_hashkey == other._default_hashkey and 83 | self.item == other.item) 84 | except AttributeError: 85 | return False 86 | 87 | def __ne__(self, other): 88 | return not self.__eq__(other) 89 | 90 | def __str__(self): 91 | return '=%s=' % str(self.item) 92 | 93 | def __repr__(self): 94 | return '=%s=' % repr(self.item) 95 | 96 | 97 | # See issue #293: https://github.com/pytoolz/toolz/issues/239 98 | def unzip(seq): 99 | """Inverse of ``zip`` 100 | 101 | >>> a, b = unzip([('a', 1), ('b', 2)]) 102 | >>> list(a) 103 | ['a', 'b'] 104 | >>> list(b) 105 | [1, 2] 106 | 107 | Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this 108 | implementation can handle an infinite sequence ``seq``. 109 | 110 | Caveats: 111 | 112 | * The implementation uses ``tee``, and so can use a significant amount 113 | of auxiliary storage if the resulting iterators are consumed at 114 | different times. 115 | 116 | * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be 117 | used if ``seq`` is a finite sequence of infinite sequences. 118 | 119 | """ 120 | 121 | seq = iter(seq) 122 | 123 | # Check how many iterators we need 124 | try: 125 | first = tuple(next(seq)) 126 | except StopIteration: 127 | return tuple() 128 | 129 | # and create them 130 | niters = len(first) 131 | seqs = tee(cons(first, seq), niters) 132 | 133 | return tuple(starmap(pluck, enumerate(seqs))) 134 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Toolz.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Toolz.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Toolz.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Toolz.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Toolz" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Toolz" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /toolz/tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | from toolz import * 2 | import toolz 3 | import toolz.curried 4 | import pickle 5 | from toolz.utils import raises 6 | 7 | 8 | def test_compose(): 9 | f = compose(str, sum) 10 | g = pickle.loads(pickle.dumps(f)) 11 | assert f((1, 2)) == g((1, 2)) 12 | 13 | 14 | def test_curry(): 15 | f = curry(map)(str) 16 | g = pickle.loads(pickle.dumps(f)) 17 | assert list(f((1, 2, 3))) == list(g((1, 2, 3))) 18 | 19 | 20 | def test_juxt(): 21 | f = juxt(str, int, bool) 22 | g = pickle.loads(pickle.dumps(f)) 23 | assert f(1) == g(1) 24 | assert f.funcs == g.funcs 25 | 26 | 27 | def test_complement(): 28 | f = complement(bool) 29 | assert f(True) is False 30 | assert f(False) is True 31 | g = pickle.loads(pickle.dumps(f)) 32 | assert f(True) == g(True) 33 | assert f(False) == g(False) 34 | 35 | 36 | def test_instanceproperty(): 37 | p = toolz.functoolz.InstanceProperty(bool) 38 | assert p.__get__(None) is None 39 | assert p.__get__(0) is False 40 | assert p.__get__(1) is True 41 | p2 = pickle.loads(pickle.dumps(p)) 42 | assert p2.__get__(None) is None 43 | assert p2.__get__(0) is False 44 | assert p2.__get__(1) is True 45 | 46 | 47 | def f(x, y): 48 | return x, y 49 | 50 | 51 | def test_flip(): 52 | flip = pickle.loads(pickle.dumps(toolz.functoolz.flip)) 53 | assert flip is toolz.functoolz.flip 54 | g1 = flip(f) 55 | g2 = pickle.loads(pickle.dumps(g1)) 56 | assert g1(1, 2) == g2(1, 2) == f(2, 1) 57 | g1 = flip(f)(1) 58 | g2 = pickle.loads(pickle.dumps(g1)) 59 | assert g1(2) == g2(2) == f(2, 1) 60 | 61 | 62 | def test_curried_exceptions(): 63 | # This tests a global curried object that isn't defined in toolz.functoolz 64 | merge = pickle.loads(pickle.dumps(toolz.curried.merge)) 65 | assert merge is toolz.curried.merge 66 | 67 | 68 | @toolz.curry 69 | class GlobalCurried: 70 | def __init__(self, x, y): 71 | self.x = x 72 | self.y = y 73 | 74 | @toolz.curry 75 | def f1(self, a, b): 76 | return self.x + self.y + a + b 77 | 78 | def g1(self): 79 | pass 80 | 81 | def __reduce__(self): 82 | """Allow us to serialize instances of GlobalCurried""" 83 | return GlobalCurried, (self.x, self.y) 84 | 85 | @toolz.curry 86 | class NestedCurried: 87 | def __init__(self, x, y): 88 | self.x = x 89 | self.y = y 90 | 91 | @toolz.curry 92 | def f2(self, a, b): 93 | return self.x + self.y + a + b 94 | 95 | def g2(self): 96 | pass 97 | 98 | def __reduce__(self): 99 | """Allow us to serialize instances of NestedCurried""" 100 | return GlobalCurried.NestedCurried, (self.x, self.y) 101 | 102 | class Nested: 103 | def __init__(self, x, y): 104 | self.x = x 105 | self.y = y 106 | 107 | @toolz.curry 108 | def f3(self, a, b): 109 | return self.x + self.y + a + b 110 | 111 | def g3(self): 112 | pass 113 | 114 | 115 | def test_curried_qualname(): 116 | 117 | def preserves_identity(obj): 118 | return pickle.loads(pickle.dumps(obj)) is obj 119 | 120 | assert preserves_identity(GlobalCurried) 121 | assert preserves_identity(GlobalCurried.func.f1) 122 | assert preserves_identity(GlobalCurried.func.NestedCurried) 123 | assert preserves_identity(GlobalCurried.func.NestedCurried.func.f2) 124 | assert preserves_identity(GlobalCurried.func.Nested.f3) 125 | 126 | global_curried1 = GlobalCurried(1) 127 | global_curried2 = pickle.loads(pickle.dumps(global_curried1)) 128 | assert global_curried1 is not global_curried2 129 | assert global_curried1(2).f1(3, 4) == global_curried2(2).f1(3, 4) == 10 130 | 131 | global_curried3 = global_curried1(2) 132 | global_curried4 = pickle.loads(pickle.dumps(global_curried3)) 133 | assert global_curried3 is not global_curried4 134 | assert global_curried3.f1(3, 4) == global_curried4.f1(3, 4) == 10 135 | 136 | func1 = global_curried1(2).f1(3) 137 | func2 = pickle.loads(pickle.dumps(func1)) 138 | assert func1 is not func2 139 | assert func1(4) == func2(4) == 10 140 | 141 | nested_curried1 = GlobalCurried.func.NestedCurried(1) 142 | nested_curried2 = pickle.loads(pickle.dumps(nested_curried1)) 143 | assert nested_curried1 is not nested_curried2 144 | assert nested_curried1(2).f2(3, 4) == nested_curried2(2).f2(3, 4) == 10 145 | 146 | # If we add `curry.__getattr__` forwarding, the following tests will pass 147 | 148 | # if not PY34: 149 | # assert preserves_identity(GlobalCurried.func.g1) 150 | # assert preserves_identity(GlobalCurried.func.NestedCurried.func.g2) 151 | # assert preserves_identity(GlobalCurried.func.Nested) 152 | # assert preserves_identity(GlobalCurried.func.Nested.g3) 153 | # 154 | # # Rely on curry.__getattr__ 155 | # assert preserves_identity(GlobalCurried.f1) 156 | # assert preserves_identity(GlobalCurried.NestedCurried) 157 | # assert preserves_identity(GlobalCurried.NestedCurried.f2) 158 | # assert preserves_identity(GlobalCurried.Nested.f3) 159 | # if not PY34: 160 | # assert preserves_identity(GlobalCurried.g1) 161 | # assert preserves_identity(GlobalCurried.NestedCurried.g2) 162 | # assert preserves_identity(GlobalCurried.Nested) 163 | # assert preserves_identity(GlobalCurried.Nested.g3) 164 | # 165 | # nested_curried3 = nested_curried1(2) 166 | # nested_curried4 = pickle.loads(pickle.dumps(nested_curried3)) 167 | # assert nested_curried3 is not nested_curried4 168 | # assert nested_curried3.f2(3, 4) == nested_curried4.f2(3, 4) == 10 169 | # 170 | # func1 = nested_curried1(2).f2(3) 171 | # func2 = pickle.loads(pickle.dumps(func1)) 172 | # assert func1 is not func2 173 | # assert func1(4) == func2(4) == 10 174 | # 175 | # if not PY34: 176 | # nested3 = GlobalCurried.func.Nested(1, 2) 177 | # nested4 = pickle.loads(pickle.dumps(nested3)) 178 | # assert nested3 is not nested4 179 | # assert nested3.f3(3, 4) == nested4.f3(3, 4) == 10 180 | # 181 | # func1 = nested3.f3(3) 182 | # func2 = pickle.loads(pickle.dumps(func1)) 183 | # assert func1 is not func2 184 | # assert func1(4) == func2(4) == 10 185 | 186 | 187 | def test_curried_bad_qualname(): 188 | @toolz.curry 189 | class Bad: 190 | __qualname__ = 'toolz.functoolz.not.a.valid.path' 191 | 192 | assert raises(pickle.PicklingError, lambda: pickle.dumps(Bad)) 193 | -------------------------------------------------------------------------------- /doc/source/control.rst: -------------------------------------------------------------------------------- 1 | Control Flow 2 | ============ 3 | 4 | Programming is hard when we think simultaneously about several concepts. Good 5 | programming breaks down big problems into small problems and 6 | builds up small solutions into big solutions. By this practice the 7 | need for simultaneous thought is restricted to only a few elements at a time. 8 | 9 | All modern languages provide mechanisms to build data into data structures and 10 | to build functions out of other functions. The third element of programming, 11 | besides data and functions, is control flow. Building complex control flow 12 | out of simple control flow presents deeper challenges. 13 | 14 | 15 | What? 16 | ----- 17 | 18 | Each element in a computer program is either 19 | 20 | - A variable or value literal like ``x``, ``total``, or ``5`` 21 | - A function or computation like the ``+`` in ``x + 1``, the function ``fib`` 22 | in ``fib(3)``, the method ``split`` in ``line.split(',')``, or the ``=`` in 23 | ``x = 0`` 24 | - Control flow like ``if``, ``for``, or ``return`` 25 | 26 | Here is a piece of code; see if you can label each term as either 27 | variable/value, function/computation, or control flow 28 | 29 | .. code:: 30 | 31 | def fib(n): 32 | a, b = 0, 1 33 | for i in range(n): 34 | a, b = b, a + b 35 | return b 36 | 37 | Programming is hard when we have to juggle many code elements of each type at 38 | the same time. Good programming is about managing these three elements so that 39 | the developer is only required to think about a handful of them at a time. For 40 | example we might collect many integer variables into a list of integers or 41 | build a big function out of smaller ones. 42 | 43 | We organize our data into **data structures** like lists, dictionaries, or objects 44 | in order to group related data together -- this allows us to manipulate large 45 | collections of related data as if we were only manipulating a single entity. 46 | 47 | We **build large functions out of smaller ones**, enabling us to break up a 48 | complex task like doing laundry into a sequence of simpler tasks. 49 | 50 | .. code:: 51 | 52 | def do_laundry(clothes): 53 | wet_clothes = wash(clothes) 54 | dry_clothes = dry(wet_clothes) 55 | return fold(dry_clothes) 56 | 57 | While we have natural ways to manage data and functions, **control flow presents more of a challenge**. 58 | How do we break down complex control flow into simpler pieces that fit in our brain? 59 | How do we encapsulate commonly recurring patterns? 60 | 61 | Let's motivate this with an example of a common control structure, applying a 62 | function to each element in a list. Imagine we want to download the HTML 63 | source for a number of webpages. 64 | 65 | .. code:: 66 | 67 | from urllib import urlopen 68 | 69 | urls = ['http://www.google.com', 'http://www.wikipedia.com', 'http://www.apple.com'] 70 | html_texts = [] 71 | for item in urls: 72 | html_texts.append(urlopen(item)) 73 | 74 | Or maybe we want to compute the Fibonacci numbers on a particular set of 75 | integers 76 | 77 | .. code:: 78 | 79 | integers = [1, 2, 3, 4, 5] 80 | fib_integers = [] 81 | for item in integers: 82 | fib_integers.append(fib(item)) 83 | 84 | These two unrelated applications share an identical control flow pattern. They 85 | apply a function (``urlopen`` or ``fib``) onto each element of an input list 86 | (``urls``, or ``integers``), appending the result onto an output list. Because 87 | this control flow pattern is so common we give it a name, ``map``, and say that 88 | we map a function (like ``urlopen``) onto a list (like ``urls``). 89 | 90 | Because Python can treat functions like variables we can encode this control 91 | pattern into a higher-order-function as follows: 92 | 93 | .. code:: 94 | 95 | def map(function, sequence): 96 | output = [] 97 | for item in sequence: 98 | output.append(function(item)) 99 | return output 100 | 101 | This allows us to simplify our code above to the following, pithy solutions 102 | 103 | .. code:: 104 | 105 | html_texts = map(urlopen, urls) 106 | fib_integers = map(fib, integers) 107 | 108 | Experienced Python programmers know that this control pattern is so popular 109 | that it has been elevated to the status of **syntax** with the popular list 110 | comprehension 111 | 112 | .. code:: 113 | 114 | html_texts = [urlopen(url) for url in urls] 115 | 116 | 117 | Why? 118 | ---- 119 | 120 | So maybe you already knew about ``map`` and don't use it or maybe you just 121 | prefer list comprehensions. Why should you keep reading? 122 | 123 | Managing Complexity 124 | ^^^^^^^^^^^^^^^^^^^ 125 | 126 | The higher order function ``map`` gives us a name to call a particular control 127 | pattern. Regardless of whether or not you use a for loop, a list 128 | comprehension, or ``map`` itself, it is useful to recognize the operation 129 | and to give it a name. Naming control patterns lets us tackle 130 | complex problems at larger scale without burdening our mind with rote details. 131 | It is just as important as bundling data into data structures or building 132 | complex functions out of simple ones. 133 | 134 | *Naming control flow patterns enables programmers to manipulate increasingly 135 | complex operations.* 136 | 137 | Other Patterns 138 | ^^^^^^^^^^^^^^ 139 | 140 | The function ``map`` has friends. Advanced programmers may know about 141 | ``map``'s siblings, ``filter`` and ``reduce``. The ``filter`` control pattern 142 | is also handled by list comprehension syntax and ``reduce`` is often replaced 143 | by straight for loops, so if you don't want to use them there is no immediately 144 | practical reason why you would care. 145 | 146 | Most programmers however don't know about the many cousins of 147 | ``map``/``filter``/``reduce``. Consider for example the unsung heroine, 148 | ``groupby``. A brief example grouping names by their length follows: 149 | 150 | .. code:: 151 | 152 | >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] 153 | >>> groupby(len, names) 154 | {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} 155 | 156 | ``groupby`` collects each element of a list into sublists determined by the value 157 | of a function. Let's see ``groupby`` in action again, grouping numbers by 158 | evenness. 159 | 160 | .. code:: 161 | 162 | >>> def iseven(n): 163 | ... return n % 2 == 0 164 | 165 | >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7]) 166 | {True: [2, 4, 6], False: [1, 3, 5, 7]} 167 | 168 | If we were to write this second operation out by hand it might look something 169 | like the following: 170 | 171 | .. code:: 172 | 173 | evens = [] 174 | odds = [] 175 | for item in numbers: 176 | if iseven(item): 177 | evens.append(item) 178 | else: 179 | odds.append(item) 180 | 181 | Most programmers have written code exactly like this over and over again, just 182 | like they may have repeated the ``map`` control pattern. When we identify code 183 | as a ``groupby`` operation we mentally collapse the detailed manipulation into 184 | a single concept. 185 | 186 | Additional Considerations 187 | ^^^^^^^^^^^^^^^^^^^^^^^^^ 188 | 189 | The Toolz library contains dozens of patterns like ``map`` and ``groupby``. 190 | Learning a core set (maybe a dozen) covers the vast majority of common 191 | programming tasks often done by hand. 192 | 193 | *A rich vocabulary of core control functions conveys the following benefits:* 194 | 195 | - You identify new patterns 196 | - You make fewer errors in rote coding 197 | - You can depend on well tested and benchmarked implementations 198 | 199 | But this does not come for free. As in spoken language the use of a rich 200 | vocabulary can alienate new practitioners. Most functional languages have 201 | fallen into this trap and are seen as unapproachable and smug. Python 202 | maintains a low-brow reputation and benefits from it. Just as with spoken 203 | language the value of using just-the-right-word must be moderated with the 204 | comprehension of the intended audience. 205 | -------------------------------------------------------------------------------- /toolz/tests/test_dicttoolz.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict as _defaultdict 2 | from collections.abc import Mapping 3 | import os 4 | from toolz.dicttoolz import (merge, merge_with, valmap, keymap, update_in, 5 | assoc, dissoc, keyfilter, valfilter, itemmap, 6 | itemfilter, assoc_in) 7 | from toolz.functoolz import identity 8 | from toolz.utils import raises 9 | 10 | 11 | def inc(x): 12 | return x + 1 13 | 14 | 15 | def iseven(i): 16 | return i % 2 == 0 17 | 18 | 19 | class TestDict: 20 | """Test typical usage: dict inputs, no factory keyword. 21 | 22 | Class attributes: 23 | D: callable that inputs a dict and creates or returns a MutableMapping 24 | kw: kwargs dict to specify "factory" keyword (if applicable) 25 | """ 26 | D = dict 27 | kw = {} 28 | 29 | def test_merge(self): 30 | D, kw = self.D, self.kw 31 | assert merge(D({1: 1, 2: 2}), D({3: 4}), **kw) == D({1: 1, 2: 2, 3: 4}) 32 | 33 | def test_merge_iterable_arg(self): 34 | D, kw = self.D, self.kw 35 | assert merge([D({1: 1, 2: 2}), D({3: 4})], **kw) == D({1: 1, 2: 2, 3: 4}) 36 | 37 | def test_merge_with(self): 38 | D, kw = self.D, self.kw 39 | dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) 40 | assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) 41 | assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20)}) 42 | 43 | dicts = D({1: 1, 2: 2, 3: 3}), D({1: 10, 2: 20}) 44 | assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22, 3: 3}) 45 | assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20), 3: (3,)}) 46 | 47 | assert not merge_with(sum) 48 | 49 | def test_merge_with_iterable_arg(self): 50 | D, kw = self.D, self.kw 51 | dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) 52 | assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) 53 | assert merge_with(sum, dicts, **kw) == D({1: 11, 2: 22}) 54 | assert merge_with(sum, iter(dicts), **kw) == D({1: 11, 2: 22}) 55 | 56 | def test_valmap(self): 57 | D, kw = self.D, self.kw 58 | assert valmap(inc, D({1: 1, 2: 2}), **kw) == D({1: 2, 2: 3}) 59 | 60 | def test_keymap(self): 61 | D, kw = self.D, self.kw 62 | assert keymap(inc, D({1: 1, 2: 2}), **kw) == D({2: 1, 3: 2}) 63 | 64 | def test_itemmap(self): 65 | D, kw = self.D, self.kw 66 | assert itemmap(reversed, D({1: 2, 2: 4}), **kw) == D({2: 1, 4: 2}) 67 | 68 | def test_valfilter(self): 69 | D, kw = self.D, self.kw 70 | assert valfilter(iseven, D({1: 2, 2: 3}), **kw) == D({1: 2}) 71 | 72 | def test_keyfilter(self): 73 | D, kw = self.D, self.kw 74 | assert keyfilter(iseven, D({1: 2, 2: 3}), **kw) == D({2: 3}) 75 | 76 | def test_itemfilter(self): 77 | D, kw = self.D, self.kw 78 | assert itemfilter(lambda item: iseven(item[0]), D({1: 2, 2: 3}), **kw) == D({2: 3}) 79 | assert itemfilter(lambda item: iseven(item[1]), D({1: 2, 2: 3}), **kw) == D({1: 2}) 80 | 81 | def test_assoc(self): 82 | D, kw = self.D, self.kw 83 | assert assoc(D({}), "a", 1, **kw) == D({"a": 1}) 84 | assert assoc(D({"a": 1}), "a", 3, **kw) == D({"a": 3}) 85 | assert assoc(D({"a": 1}), "b", 3, **kw) == D({"a": 1, "b": 3}) 86 | 87 | # Verify immutability: 88 | d = D({'x': 1}) 89 | oldd = d 90 | assoc(d, 'x', 2, **kw) 91 | assert d is oldd 92 | 93 | def test_dissoc(self): 94 | D, kw = self.D, self.kw 95 | assert dissoc(D({"a": 1}), "a", **kw) == D({}) 96 | assert dissoc(D({"a": 1, "b": 2}), "a", **kw) == D({"b": 2}) 97 | assert dissoc(D({"a": 1, "b": 2}), "b", **kw) == D({"a": 1}) 98 | assert dissoc(D({"a": 1, "b": 2}), "a", "b", **kw) == D({}) 99 | assert dissoc(D({"a": 1}), "a", **kw) == dissoc(dissoc(D({"a": 1}), "a", **kw), "a", **kw) 100 | 101 | # Verify immutability: 102 | d = D({'x': 1}) 103 | oldd = d 104 | d2 = dissoc(d, 'x', **kw) 105 | assert d is oldd 106 | assert d2 is not oldd 107 | 108 | def test_assoc_in(self): 109 | D, kw = self.D, self.kw 110 | assert assoc_in(D({"a": 1}), ["a"], 2, **kw) == D({"a": 2}) 111 | assert (assoc_in(D({"a": D({"b": 1})}), ["a", "b"], 2, **kw) == 112 | D({"a": D({"b": 2})})) 113 | assert assoc_in(D({}), ["a", "b"], 1, **kw) == D({"a": D({"b": 1})}) 114 | 115 | # Verify immutability: 116 | d = D({'x': 1}) 117 | oldd = d 118 | d2 = assoc_in(d, ['x'], 2, **kw) 119 | assert d is oldd 120 | assert d2 is not oldd 121 | 122 | def test_update_in(self): 123 | D, kw = self.D, self.kw 124 | assert update_in(D({"a": 0}), ["a"], inc, **kw) == D({"a": 1}) 125 | assert update_in(D({"a": 0, "b": 1}), ["b"], str, **kw) == D({"a": 0, "b": "1"}) 126 | assert (update_in(D({"t": 1, "v": D({"a": 0})}), ["v", "a"], inc, **kw) == 127 | D({"t": 1, "v": D({"a": 1})})) 128 | # Handle one missing key. 129 | assert update_in(D({}), ["z"], str, None, **kw) == D({"z": "None"}) 130 | assert update_in(D({}), ["z"], inc, 0, **kw) == D({"z": 1}) 131 | assert update_in(D({}), ["z"], lambda x: x+"ar", default="b", **kw) == D({"z": "bar"}) 132 | # Same semantics as Clojure for multiple missing keys, ie. recursively 133 | # create nested empty dictionaries to the depth specified by the 134 | # keys with the innermost value set to f(default). 135 | assert update_in(D({}), [0, 1], inc, default=-1, **kw) == D({0: D({1: 0})}) 136 | assert update_in(D({}), [0, 1], str, default=100, **kw) == D({0: D({1: "100"})}) 137 | assert (update_in(D({"foo": "bar", 1: 50}), ["d", 1, 0], str, 20, **kw) == 138 | D({"foo": "bar", 1: 50, "d": D({1: D({0: "20"})})})) 139 | # Verify immutability: 140 | d = D({'x': 1}) 141 | oldd = d 142 | update_in(d, ['x'], inc, **kw) 143 | assert d is oldd 144 | 145 | def test_factory(self): 146 | D, kw = self.D, self.kw 147 | assert merge(defaultdict(int, D({1: 2})), D({2: 3})) == {1: 2, 2: 3} 148 | assert (merge(defaultdict(int, D({1: 2})), D({2: 3}), 149 | factory=lambda: defaultdict(int)) == 150 | defaultdict(int, D({1: 2, 2: 3}))) 151 | assert not (merge(defaultdict(int, D({1: 2})), D({2: 3}), 152 | factory=lambda: defaultdict(int)) == {1: 2, 2: 3}) 153 | assert raises(TypeError, lambda: merge(D({1: 2}), D({2: 3}), factoryy=dict)) 154 | 155 | 156 | class defaultdict(_defaultdict): 157 | def __eq__(self, other): 158 | return (super().__eq__(other) and 159 | isinstance(other, _defaultdict) and 160 | self.default_factory == other.default_factory) 161 | 162 | 163 | class TestDefaultDict(TestDict): 164 | """Test defaultdict as input and factory 165 | 166 | Class attributes: 167 | D: callable that inputs a dict and creates or returns a MutableMapping 168 | kw: kwargs dict to specify "factory" keyword (if applicable) 169 | """ 170 | @staticmethod 171 | def D(dict_): 172 | return defaultdict(int, dict_) 173 | 174 | kw = {'factory': lambda: defaultdict(int)} 175 | 176 | 177 | class CustomMapping: 178 | """Define methods of the MutableMapping protocol required by dicttoolz""" 179 | def __init__(self, *args, **kwargs): 180 | self._d = dict(*args, **kwargs) 181 | 182 | def __getitem__(self, key): 183 | return self._d[key] 184 | 185 | def __setitem__(self, key, val): 186 | self._d[key] = val 187 | 188 | def __delitem__(self, key): 189 | del self._d[key] 190 | 191 | def __iter__(self): 192 | return iter(self._d) 193 | 194 | def __len__(self): 195 | return len(self._d) 196 | 197 | def __contains__(self, key): 198 | return key in self._d 199 | 200 | def __eq__(self, other): 201 | return isinstance(other, CustomMapping) and self._d == other._d 202 | 203 | def __ne__(self, other): 204 | return not isinstance(other, CustomMapping) or self._d != other._d 205 | 206 | def keys(self): 207 | return self._d.keys() 208 | 209 | def values(self): 210 | return self._d.values() 211 | 212 | def items(self): 213 | return self._d.items() 214 | 215 | def update(self, *args, **kwargs): 216 | self._d.update(*args, **kwargs) 217 | 218 | # Unused methods that are part of the MutableMapping protocol 219 | #def get(self, key, *args): 220 | # return self._d.get(key, *args) 221 | 222 | #def pop(self, key, *args): 223 | # return self._d.pop(key, *args) 224 | 225 | #def popitem(self, key): 226 | # return self._d.popitem() 227 | 228 | #def clear(self): 229 | # self._d.clear() 230 | 231 | #def setdefault(self, key, *args): 232 | # return self._d.setdefault(self, key, *args) 233 | 234 | 235 | class TestCustomMapping(TestDict): 236 | """Test CustomMapping as input and factory 237 | 238 | Class attributes: 239 | D: callable that inputs a dict and creates or returns a MutableMapping 240 | kw: kwargs dict to specify "factory" keyword (if applicable) 241 | """ 242 | D = CustomMapping 243 | kw = {'factory': lambda: CustomMapping()} 244 | 245 | 246 | def test_environ(): 247 | # See: https://github.com/pytoolz/cytoolz/issues/127 248 | assert keymap(identity, os.environ) == os.environ 249 | assert valmap(identity, os.environ) == os.environ 250 | assert itemmap(identity, os.environ) == os.environ 251 | 252 | 253 | def test_merge_with_non_dict_mappings(): 254 | class Foo(Mapping): 255 | def __init__(self, d): 256 | self.d = d 257 | 258 | def __iter__(self): 259 | return iter(self.d) 260 | 261 | def __getitem__(self, key): 262 | return self.d[key] 263 | 264 | def __len__(self): 265 | return len(self.d) 266 | 267 | d = Foo({1: 1}) 268 | 269 | assert merge(d) is d or merge(d) == {1: 1} 270 | assert merge_with(sum, d) == {1: 1} 271 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # Toolz documentation build configuration file, created by 2 | # sphinx-quickstart on Sun Sep 22 18:06:00 2013. 3 | # 4 | # This file is execfile()d with the current directory set to its containing dir. 5 | # 6 | # Note that not all possible configuration values are present in this 7 | # autogenerated file. 8 | # 9 | # All configuration values have a default; values that are commented out 10 | # serve to show the default. 11 | 12 | import sys, os 13 | 14 | # If extensions (or modules to document with autodoc) are in another directory, 15 | # add these directories to sys.path here. If the directory is relative to the 16 | # documentation root, use os.path.abspath to make it absolute, like shown here. 17 | #sys.path.insert(0, os.path.abspath('.')) 18 | sys.path.insert(0, os.path.abspath('.')) 19 | 20 | # -- General configuration ----------------------------------------------------- 21 | 22 | # If your documentation needs a minimal Sphinx version, state it here. 23 | #needs_sphinx = '1.0' 24 | 25 | # Add any Sphinx extension module names here, as strings. They can be extensions 26 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 27 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.autosummary'] 28 | 29 | # Add any paths that contain templates here, relative to this directory. 30 | templates_path = ['_templates'] 31 | 32 | # The suffix of source filenames. 33 | source_suffix = '.rst' 34 | 35 | # The encoding of source files. 36 | #source_encoding = 'utf-8-sig' 37 | 38 | # The master toctree document. 39 | master_doc = 'index' 40 | 41 | # General information about the project. 42 | project = 'Toolz' 43 | copyright = '2013, Matthew Rocklin, John Jacobsen' 44 | 45 | # The version info for the project you're documenting, acts as replacement for 46 | # |version| and |release|, also used in various other places throughout the 47 | # built documents. 48 | # 49 | # The short X.Y version. 50 | import toolz 51 | version = toolz.__version__ 52 | # The full version, including alpha/beta/rc tags. 53 | release = toolz.__version__ 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = [] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'furo' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | html_title = "Toolz" 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'Toolzdoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | latex_elements = { 173 | # The paper size ('letterpaper' or 'a4paper'). 174 | #'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | 179 | # Additional stuff for the LaTeX preamble. 180 | #'preamble': '', 181 | } 182 | 183 | # Grouping the document tree into LaTeX files. List of tuples 184 | # (source start file, target name, title, author, documentclass [howto/manual]). 185 | latex_documents = [ 186 | ('index', 'Toolz.tex', 'Toolz Documentation', 187 | 'Matthew Rocklin, John Jacobsen', 'manual'), 188 | ] 189 | 190 | # The name of an image file (relative to this directory) to place at the top of 191 | # the title page. 192 | #latex_logo = None 193 | 194 | # For "manual" documents, if this is true, then toplevel headings are parts, 195 | # not chapters. 196 | #latex_use_parts = False 197 | 198 | # If true, show page references after internal links. 199 | #latex_show_pagerefs = False 200 | 201 | # If true, show URL addresses after external links. 202 | #latex_show_urls = False 203 | 204 | # Documents to append as an appendix to all manuals. 205 | #latex_appendices = [] 206 | 207 | # If false, no module index is generated. 208 | #latex_domain_indices = True 209 | 210 | 211 | # -- Options for manual page output -------------------------------------------- 212 | 213 | # One entry per manual page. List of tuples 214 | # (source start file, name, description, authors, manual section). 215 | man_pages = [ 216 | ('index', 'toolz', 'Toolz Documentation', 217 | ['Matthew Rocklin, John Jacobsen'], 1) 218 | ] 219 | 220 | # If true, show URL addresses after external links. 221 | #man_show_urls = False 222 | 223 | 224 | # -- Options for Texinfo output ------------------------------------------------ 225 | 226 | # Grouping the document tree into Texinfo files. List of tuples 227 | # (source start file, target name, title, author, 228 | # dir menu entry, description, category) 229 | texinfo_documents = [ 230 | ('index', 'Toolz', 'Toolz Documentation', 231 | 'Matthew Rocklin, John Jacobsen', 'Toolz', 'One line description of project.', 232 | 'Miscellaneous'), 233 | ] 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #texinfo_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #texinfo_domain_indices = True 240 | 241 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 242 | #texinfo_show_urls = 'footnote' 243 | 244 | 245 | # -- Options for Epub output --------------------------------------------------- 246 | 247 | # Bibliographic Dublin Core info. 248 | epub_title = 'Toolz' 249 | epub_author = 'Matthew Rocklin, John Jacobsen' 250 | epub_publisher = 'Matthew Rocklin, John Jacobsen' 251 | epub_copyright = '2013, Matthew Rocklin, John Jacobsen' 252 | 253 | # The language of the text. It defaults to the language option 254 | # or en if the language is not set. 255 | #epub_language = '' 256 | 257 | # The scheme of the identifier. Typical schemes are ISBN or URL. 258 | #epub_scheme = '' 259 | 260 | # The unique identifier of the text. This can be a ISBN number 261 | # or the project homepage. 262 | #epub_identifier = '' 263 | 264 | # A unique identification for the text. 265 | #epub_uid = '' 266 | 267 | # A tuple containing the cover image and cover page html template filenames. 268 | #epub_cover = () 269 | 270 | # HTML files that should be inserted before the pages created by sphinx. 271 | # The format is a list of tuples containing the path and title. 272 | #epub_pre_files = [] 273 | 274 | # HTML files that should be inserted after the pages created by sphinx. 275 | # The format is a list of tuples containing the path and title. 276 | #epub_post_files = [] 277 | 278 | # A list of files that should not be packed into the epub file. 279 | #epub_exclude_files = [] 280 | 281 | # The depth of the table of contents in toc.ncx. 282 | #epub_tocdepth = 3 283 | 284 | # Allow duplicate toc entries. 285 | #epub_tocdup = True 286 | -------------------------------------------------------------------------------- /toolz/dicttoolz.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import collections 3 | from functools import reduce 4 | from collections.abc import Mapping 5 | 6 | __all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap', 7 | 'valfilter', 'keyfilter', 'itemfilter', 8 | 'assoc', 'dissoc', 'assoc_in', 'update_in', 'get_in') 9 | 10 | 11 | def _get_factory(f, kwargs): 12 | factory = kwargs.pop('factory', dict) 13 | if kwargs: 14 | raise TypeError("{}() got an unexpected keyword argument " 15 | "'{}'".format(f.__name__, kwargs.popitem()[0])) 16 | return factory 17 | 18 | 19 | def merge(*dicts, **kwargs): 20 | """ Merge a collection of dictionaries 21 | 22 | >>> merge({1: 'one'}, {2: 'two'}) 23 | {1: 'one', 2: 'two'} 24 | 25 | Later dictionaries have precedence 26 | 27 | >>> merge({1: 2, 3: 4}, {3: 3, 4: 4}) 28 | {1: 2, 3: 3, 4: 4} 29 | 30 | See Also: 31 | merge_with 32 | """ 33 | if len(dicts) == 1 and not isinstance(dicts[0], Mapping): 34 | dicts = dicts[0] 35 | factory = _get_factory(merge, kwargs) 36 | 37 | rv = factory() 38 | for d in dicts: 39 | rv.update(d) 40 | return rv 41 | 42 | 43 | def merge_with(func, *dicts, **kwargs): 44 | """ Merge dictionaries and apply function to combined values 45 | 46 | A key may occur in more than one dict, and all values mapped from the key 47 | will be passed to the function as a list, such as func([val1, val2, ...]). 48 | 49 | >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20}) 50 | {1: 11, 2: 22} 51 | 52 | >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP 53 | {1: 1, 2: 2, 3: 30} 54 | 55 | See Also: 56 | merge 57 | """ 58 | if len(dicts) == 1 and not isinstance(dicts[0], Mapping): 59 | dicts = dicts[0] 60 | factory = _get_factory(merge_with, kwargs) 61 | 62 | values = collections.defaultdict(lambda: [].append) 63 | for d in dicts: 64 | for k, v in d.items(): 65 | values[k](v) 66 | 67 | result = factory() 68 | for k, v in values.items(): 69 | result[k] = func(v.__self__) 70 | return result 71 | 72 | 73 | def valmap(func, d, factory=dict): 74 | """ Apply function to values of dictionary 75 | 76 | >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} 77 | >>> valmap(sum, bills) # doctest: +SKIP 78 | {'Alice': 65, 'Bob': 45} 79 | 80 | See Also: 81 | keymap 82 | itemmap 83 | """ 84 | rv = factory() 85 | rv.update(zip(d.keys(), map(func, d.values()))) 86 | return rv 87 | 88 | 89 | def keymap(func, d, factory=dict): 90 | """ Apply function to keys of dictionary 91 | 92 | >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} 93 | >>> keymap(str.lower, bills) # doctest: +SKIP 94 | {'alice': [20, 15, 30], 'bob': [10, 35]} 95 | 96 | See Also: 97 | valmap 98 | itemmap 99 | """ 100 | rv = factory() 101 | rv.update(zip(map(func, d.keys()), d.values())) 102 | return rv 103 | 104 | 105 | def itemmap(func, d, factory=dict): 106 | """ Apply function to items of dictionary 107 | 108 | >>> accountids = {"Alice": 10, "Bob": 20} 109 | >>> itemmap(reversed, accountids) # doctest: +SKIP 110 | {10: "Alice", 20: "Bob"} 111 | 112 | See Also: 113 | keymap 114 | valmap 115 | """ 116 | rv = factory() 117 | rv.update(map(func, d.items())) 118 | return rv 119 | 120 | 121 | def valfilter(predicate, d, factory=dict): 122 | """ Filter items in dictionary by value 123 | 124 | >>> iseven = lambda x: x % 2 == 0 125 | >>> d = {1: 2, 2: 3, 3: 4, 4: 5} 126 | >>> valfilter(iseven, d) 127 | {1: 2, 3: 4} 128 | 129 | See Also: 130 | keyfilter 131 | itemfilter 132 | valmap 133 | """ 134 | rv = factory() 135 | for k, v in d.items(): 136 | if predicate(v): 137 | rv[k] = v 138 | return rv 139 | 140 | 141 | def keyfilter(predicate, d, factory=dict): 142 | """ Filter items in dictionary by key 143 | 144 | >>> iseven = lambda x: x % 2 == 0 145 | >>> d = {1: 2, 2: 3, 3: 4, 4: 5} 146 | >>> keyfilter(iseven, d) 147 | {2: 3, 4: 5} 148 | 149 | See Also: 150 | valfilter 151 | itemfilter 152 | keymap 153 | """ 154 | rv = factory() 155 | for k, v in d.items(): 156 | if predicate(k): 157 | rv[k] = v 158 | return rv 159 | 160 | 161 | def itemfilter(predicate, d, factory=dict): 162 | """ Filter items in dictionary by item 163 | 164 | >>> def isvalid(item): 165 | ... k, v = item 166 | ... return k % 2 == 0 and v < 4 167 | 168 | >>> d = {1: 2, 2: 3, 3: 4, 4: 5} 169 | >>> itemfilter(isvalid, d) 170 | {2: 3} 171 | 172 | See Also: 173 | keyfilter 174 | valfilter 175 | itemmap 176 | """ 177 | rv = factory() 178 | for item in d.items(): 179 | if predicate(item): 180 | k, v = item 181 | rv[k] = v 182 | return rv 183 | 184 | 185 | def assoc(d, key, value, factory=dict): 186 | """ Return a new dict with new key value pair 187 | 188 | New dict has d[key] set to value. Does not modify the initial dictionary. 189 | 190 | >>> assoc({'x': 1}, 'x', 2) 191 | {'x': 2} 192 | >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP 193 | {'x': 1, 'y': 3} 194 | """ 195 | d2 = factory() 196 | d2.update(d) 197 | d2[key] = value 198 | return d2 199 | 200 | 201 | def dissoc(d, *keys, **kwargs): 202 | """ Return a new dict with the given key(s) removed. 203 | 204 | New dict has d[key] deleted for each supplied key. 205 | Does not modify the initial dictionary. 206 | 207 | >>> dissoc({'x': 1, 'y': 2}, 'y') 208 | {'x': 1} 209 | >>> dissoc({'x': 1, 'y': 2}, 'y', 'x') 210 | {} 211 | >>> dissoc({'x': 1}, 'y') # Ignores missing keys 212 | {'x': 1} 213 | """ 214 | factory = _get_factory(dissoc, kwargs) 215 | d2 = factory() 216 | 217 | if len(keys) < len(d) * .6: 218 | d2.update(d) 219 | for key in keys: 220 | if key in d2: 221 | del d2[key] 222 | else: 223 | remaining = set(d) 224 | remaining.difference_update(keys) 225 | for k in remaining: 226 | d2[k] = d[k] 227 | return d2 228 | 229 | 230 | def assoc_in(d, keys, value, factory=dict): 231 | """ Return a new dict with new, potentially nested, key value pair 232 | 233 | >>> purchase = {'name': 'Alice', 234 | ... 'order': {'items': ['Apple', 'Orange'], 235 | ... 'costs': [0.50, 1.25]}, 236 | ... 'credit card': '5555-1234-1234-1234'} 237 | >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP 238 | {'credit card': '5555-1234-1234-1234', 239 | 'name': 'Alice', 240 | 'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}} 241 | """ 242 | return update_in(d, keys, lambda x: value, value, factory) 243 | 244 | 245 | def update_in(d, keys, func, default=None, factory=dict): 246 | """ Update value in a (potentially) nested dictionary 247 | 248 | inputs: 249 | d - dictionary on which to operate 250 | keys - list or tuple giving the location of the value to be changed in d 251 | func - function to operate on that value 252 | 253 | If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the 254 | original dictionary with v replaced by func(v), but does not mutate the 255 | original dictionary. 256 | 257 | If k0 is not a key in d, update_in creates nested dictionaries to the depth 258 | specified by the keys, with the innermost value set to func(default). 259 | 260 | >>> inc = lambda x: x + 1 261 | >>> update_in({'a': 0}, ['a'], inc) 262 | {'a': 1} 263 | 264 | >>> transaction = {'name': 'Alice', 265 | ... 'purchase': {'items': ['Apple', 'Orange'], 266 | ... 'costs': [0.50, 1.25]}, 267 | ... 'credit card': '5555-1234-1234-1234'} 268 | >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP 269 | {'credit card': '5555-1234-1234-1234', 270 | 'name': 'Alice', 271 | 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}} 272 | 273 | >>> # updating a value when k0 is not in d 274 | >>> update_in({}, [1, 2, 3], str, default="bar") 275 | {1: {2: {3: 'bar'}}} 276 | >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0) 277 | {1: 'foo', 2: {3: {4: 1}}} 278 | """ 279 | ks = iter(keys) 280 | k = next(ks) 281 | 282 | rv = inner = factory() 283 | rv.update(d) 284 | 285 | for key in ks: 286 | if k in d: 287 | d = d[k] 288 | dtemp = factory() 289 | dtemp.update(d) 290 | else: 291 | d = dtemp = factory() 292 | 293 | inner[k] = inner = dtemp 294 | k = key 295 | 296 | if k in d: 297 | inner[k] = func(d[k]) 298 | else: 299 | inner[k] = func(default) 300 | return rv 301 | 302 | 303 | def get_in(keys, coll, default=None, no_default=False): 304 | """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. 305 | 306 | If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless 307 | ``no_default`` is specified, then it raises KeyError or IndexError. 308 | 309 | ``get_in`` is a generalization of ``operator.getitem`` for nested data 310 | structures such as dictionaries and lists. 311 | 312 | >>> transaction = {'name': 'Alice', 313 | ... 'purchase': {'items': ['Apple', 'Orange'], 314 | ... 'costs': [0.50, 1.25]}, 315 | ... 'credit card': '5555-1234-1234-1234'} 316 | >>> get_in(['purchase', 'items', 0], transaction) 317 | 'Apple' 318 | >>> get_in(['name'], transaction) 319 | 'Alice' 320 | >>> get_in(['purchase', 'total'], transaction) 321 | >>> get_in(['purchase', 'items', 'apple'], transaction) 322 | >>> get_in(['purchase', 'items', 10], transaction) 323 | >>> get_in(['purchase', 'total'], transaction, 0) 324 | 0 325 | >>> get_in(['y'], {}, no_default=True) 326 | Traceback (most recent call last): 327 | ... 328 | KeyError: 'y' 329 | 330 | See Also: 331 | itertoolz.get 332 | operator.getitem 333 | """ 334 | try: 335 | return reduce(operator.getitem, keys, coll) 336 | except (KeyError, IndexError, TypeError): 337 | if no_default: 338 | raise 339 | return default 340 | -------------------------------------------------------------------------------- /doc/source/streaming-analytics.rst: -------------------------------------------------------------------------------- 1 | Streaming Analytics 2 | =================== 3 | 4 | The toolz functions can be composed to analyze large streaming datasets. 5 | Toolz supports common analytics patterns like the selection, grouping, 6 | reduction, and joining of data through pure composable functions. These 7 | functions often have analogs to familiar operations in other data analytics 8 | platforms like SQL or Pandas. 9 | 10 | Throughout this document we'll use this simple dataset of accounts 11 | 12 | .. code:: 13 | 14 | >>> accounts = [(1, 'Alice', 100, 'F'), # id, name, balance, gender 15 | ... (2, 'Bob', 200, 'M'), 16 | ... (3, 'Charlie', 150, 'M'), 17 | ... (4, 'Dennis', 50, 'M'), 18 | ... (5, 'Edith', 300, 'F')] 19 | 20 | Selecting with ``map`` and ``filter`` 21 | ------------------------------------- 22 | 23 | Simple projection and linear selection from a sequence is achieved through the 24 | standard functions ``map`` and ``filter``. 25 | 26 | .. code:: 27 | 28 | SELECT name, balance 29 | FROM accounts 30 | WHERE balance > 150; 31 | 32 | These functions correspond to the SQL commands ``SELECT`` and ``WHERE``. 33 | 34 | .. code:: 35 | 36 | >>> from toolz.curried import pipe, map, filter, get 37 | >>> pipe(accounts, filter(lambda acc: acc[2] > 150), 38 | ... map(get([1, 2])), 39 | ... list) 40 | 41 | Note: this uses the `curried`` versions of ``map`` and ``filter``. 42 | 43 | Of course, these operations are also well supported with standard 44 | list/generator comprehension syntax. This syntax is more often used and 45 | generally considered to be more Pythonic. 46 | 47 | .. code:: 48 | 49 | >>> [(name, balance) for (id, name, balance, gender) in accounts 50 | ... if balance > 150] 51 | 52 | 53 | Split-apply-combine with ``groupby`` and ``reduceby`` 54 | ----------------------------------------------------- 55 | 56 | We separate split-apply-combine operations into the following two concepts 57 | 58 | 1. Split the dataset into groups by some property 59 | 2. Reduce each of the groups with some synopsis function 60 | 61 | Toolz supports this common workflow with 62 | 63 | 1. a simple in-memory solution 64 | 2. a more sophisticated streaming solution. 65 | 66 | 67 | In Memory Split-Apply-Combine 68 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 69 | 70 | The in-memory solution depends on the functions `groupby`_ to split, and 71 | `valmap`_ to apply/combine. 72 | 73 | .. code:: 74 | 75 | SELECT gender, SUM(balance) 76 | FROM accounts 77 | GROUP BY gender; 78 | 79 | We first show these two functions piece by piece to show the intermediate 80 | groups. 81 | 82 | .. code:: 83 | 84 | >>> from toolz import compose 85 | >>> from toolz.curried import get, pluck, groupby, valmap 86 | 87 | >>> groupby(get(3), accounts) 88 | {'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')], 89 | 'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')]} 90 | 91 | >>> valmap(compose(sum, pluck(2)), 92 | ... _) # The underscore captures results from the previous prompt 93 | {'F': 400, 'M': 400} 94 | 95 | 96 | Then we chain them together into a single computation 97 | 98 | .. code:: 99 | 100 | >>> pipe(accounts, groupby(get(3)), 101 | ... valmap(compose(sum, pluck(2)))) 102 | {'F': 400, 'M': 400} 103 | 104 | 105 | Streaming Split-Apply-Combine 106 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 107 | 108 | The ``groupby`` function collects the entire dataset in memory into a 109 | dictionary. While convenient, the ``groupby`` operation is *not streaming* and 110 | so this approach is limited to datasets that can fit comfortably into memory. 111 | 112 | Toolz achieves streaming split-apply-combine with `reduceby`_, a function that 113 | performs a simultaneous reduction on each group as the elements stream in. To 114 | understand this section you should first be familiar with the builtin function 115 | ``reduce``. 116 | 117 | The ``reduceby`` operation takes a key function, like ``get(3)`` or ``lambda x: 118 | x[3]``, and a binary operator like ``add`` or ``lesser = lambda acc, x: acc if 119 | acc < x else x``. It applies the key function to each item in succession, 120 | accumulating running totals for each key by combining each new 121 | value with the previous using the binary operator. It can't accept full 122 | reduction operations like ``sum`` or ``min`` as these require access to the 123 | entire group at once. Here is a simple example: 124 | 125 | .. code:: 126 | 127 | >>> from toolz import reduceby 128 | 129 | >>> def iseven(n): 130 | ... return n % 2 == 0 131 | 132 | >>> def add(x, y): 133 | ... return x + y 134 | 135 | >>> reduceby(iseven, add, [1, 2, 3, 4]) 136 | {True: 6, False: 4} 137 | 138 | The even numbers are added together ``(2 + 4 = 6)`` into group ``True``, and 139 | the odd numbers are added together ``(1 + 3 = 4)`` into group ``False``. 140 | 141 | 142 | Note that we have to replace the reduction ``sum`` with the binary operator 143 | ``add``. The incremental nature of ``add`` allows us to do the summation work as 144 | new data comes in. The use of binary operators like ``add`` over full reductions 145 | like ``sum`` enables computation on very large streaming datasets. 146 | 147 | The challenge to using ``reduceby`` often lies in the construction of a 148 | suitable binary operator. Here is the solution for our accounts example 149 | that adds up the balances for each group: 150 | 151 | .. code:: 152 | 153 | >>> binop = lambda total, account: total + account[2] 154 | 155 | >>> reduceby(get(3), binop, accounts, 0) 156 | {'F': 400, 'M': 400} 157 | 158 | 159 | This construction supports datasets that are much larger than available memory. 160 | Only the output must be able to fit comfortably in memory and this is rarely an 161 | issue, even for very large split-apply-combine computations. 162 | 163 | 164 | Semi-Streaming ``join`` 165 | ----------------------- 166 | 167 | We register multiple datasets together with `join`_. Consider a second 168 | dataset storing addresses by ID 169 | 170 | .. code:: 171 | 172 | >>> addresses = [(1, '123 Main Street'), # id, address 173 | ... (2, '5 Adams Way'), 174 | ... (5, '34 Rue St Michel')] 175 | 176 | We can join this dataset against our accounts dataset by specifying attributes 177 | which register different elements with each other; in this case they share a 178 | common first column, id. 179 | 180 | .. code:: 181 | 182 | SELECT accounts.name, addresses.address 183 | FROM accounts 184 | JOIN addresses 185 | ON accounts.id = addresses.id; 186 | 187 | 188 | .. code:: 189 | 190 | >>> from toolz import join, first 191 | 192 | >>> result = join(first, accounts, 193 | ... first, addresses) 194 | 195 | >>> for ((id, name, bal, gender), (id, address)) in result: 196 | ... print((name, address)) 197 | ('Alice', '123 Main Street') 198 | ('Bob', '5 Adams Way') 199 | ('Edith', '34 Rue St Michel') 200 | 201 | Join takes four main arguments, a left and right key function and a left 202 | and right sequence. It returns a sequence of pairs of matching items. In our 203 | case the return value of ``join`` is a sequence of pairs of tuples such that the 204 | first element of each tuple (the ID) is the same. In the example above we 205 | unpack this pair of tuples to get the fields that we want (``name`` and 206 | ``address``) from the result. 207 | 208 | 209 | Join on arbitrary functions / data 210 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 211 | 212 | Those familiar with SQL are accustomed to this kind of join on columns. 213 | However a functional join is more general than this; it doesn't need to operate 214 | on tuples, and key functions do not need to get particular columns. In the 215 | example below we match numbers from two collections so that exactly one is even 216 | and one is odd. 217 | 218 | .. code:: 219 | 220 | >>> def iseven(x): 221 | ... return x % 2 == 0 222 | >>> def isodd(x): 223 | ... return x % 2 == 1 224 | 225 | >>> list(join(iseven, [1, 2, 3, 4], 226 | ... isodd, [7, 8, 9])) 227 | [(2, 7), (4, 7), (1, 8), (3, 8), (2, 9), (4, 9)] 228 | 229 | 230 | Semi-Streaming Join 231 | ^^^^^^^^^^^^^^^^^^^ 232 | 233 | The Toolz Join operation fully evaluates the *left* sequence and streams the 234 | *right* sequence through memory. Thus, if streaming support is desired the 235 | larger of the two sequences should always occupy the right side of the join. 236 | 237 | 238 | Algorithmic Details 239 | ^^^^^^^^^^^^^^^^^^^ 240 | 241 | The semi-streaming join operation in ``toolz`` is asymptotically optimal. 242 | Computationally it is linear in the size of the input + output. In terms of 243 | storage the left sequence must fit in memory but the right sequence is free to 244 | stream. 245 | 246 | The results are not normalized, as in SQL, in that they permit repeated values. If 247 | normalization is desired, consider composing with the function ``unique`` (note 248 | that ``unique`` is not fully streaming.) 249 | 250 | 251 | More Complex Example 252 | ^^^^^^^^^^^^^^^^^^^^ 253 | 254 | The accounts example above connects two one-to-one relationships, ``accounts`` 255 | and ``addresses``; there was exactly one name per ID and one address per ID. 256 | This need not be the case. The join abstraction is sufficiently flexible to 257 | join one-to-many or even many-to-many relationships. The following example 258 | finds city/person pairs where that person has a friend who has a residence in 259 | that city. This is an example of joining two many-to-many relationships, 260 | because a person may have many friends and because a friend may have many 261 | residences. 262 | 263 | 264 | .. code:: 265 | 266 | >>> friends = [('Alice', 'Edith'), 267 | ... ('Alice', 'Zhao'), 268 | ... ('Edith', 'Alice'), 269 | ... ('Zhao', 'Alice'), 270 | ... ('Zhao', 'Edith')] 271 | 272 | >>> cities = [('Alice', 'NYC'), 273 | ... ('Alice', 'Chicago'), 274 | ... ('Dan', 'Sydney'), 275 | ... ('Edith', 'Paris'), 276 | ... ('Edith', 'Berlin'), 277 | ... ('Zhao', 'Shanghai')] 278 | 279 | >>> # Vacation opportunities 280 | >>> # In what cities do people have friends? 281 | >>> result = join(second, friends, 282 | ... first, cities) 283 | >>> for ((name, friend), (friend, city)) in sorted(unique(result)): 284 | ... print((name, city)) 285 | ('Alice', 'Berlin') 286 | ('Alice', 'Paris') 287 | ('Alice', 'Shanghai') 288 | ('Edith', 'Chicago') 289 | ('Edith', 'NYC') 290 | ('Zhao', 'Chicago') 291 | ('Zhao', 'NYC') 292 | ('Zhao', 'Berlin') 293 | ('Zhao', 'Paris') 294 | 295 | Join is computationally powerful: 296 | 297 | * It is expressive enough to cover a wide set of analytics operations 298 | * It runs in linear time relative to the size of the input and output 299 | * Only the left sequence must fit in memory 300 | 301 | 302 | Disclaimer 303 | ---------- 304 | 305 | Toolz is a general purpose functional standard library, not a library 306 | specifically for data analytics. While there are obvious benefits (streaming, 307 | composition, ...) users interested in data analytics might be better served by 308 | using projects specific to data analytics like Pandas_ or SQLAlchemy. 309 | 310 | 311 | .. _groupby: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.groupby 312 | .. _join: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.join 313 | .. _reduceby: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.reduceby 314 | .. _valmap: https://toolz.readthedocs.io/en/latest/api.html#toolz.dicttoolz.valmap 315 | .. _Pandas: http://pandas.pydata.org/pandas-docs/stable/groupby.html 316 | .. _curried: https://toolz.readthedocs.io/en/latest/curry.html 317 | -------------------------------------------------------------------------------- /toolz/tests/test_inspect_args.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import inspect 3 | import itertools 4 | import operator 5 | import sys 6 | import toolz 7 | from toolz.functoolz import (curry, is_valid_args, is_partial_args, is_arity, 8 | num_required_args, has_varargs, has_keywords) 9 | from toolz._signatures import builtins 10 | import toolz._signatures as _sigs 11 | from toolz.utils import raises 12 | 13 | 14 | def make_func(param_string, raise_if_called=True): 15 | if not param_string.startswith('('): 16 | param_string = '(%s)' % param_string 17 | if raise_if_called: 18 | body = 'raise ValueError("function should not be called")' 19 | else: 20 | body = 'return True' 21 | d = {} 22 | exec(f'def func{param_string}:\n {body}', globals(), d) 23 | return d['func'] 24 | 25 | 26 | def test_make_func(): 27 | f = make_func('') 28 | assert raises(ValueError, lambda: f()) 29 | assert raises(TypeError, lambda: f(1)) 30 | 31 | f = make_func('', raise_if_called=False) 32 | assert f() 33 | assert raises(TypeError, lambda: f(1)) 34 | 35 | f = make_func('x, y=1', raise_if_called=False) 36 | assert f(1) 37 | assert f(x=1) 38 | assert f(1, 2) 39 | assert f(x=1, y=2) 40 | assert raises(TypeError, lambda: f(1, 2, 3)) 41 | 42 | f = make_func('(x, y=1)', raise_if_called=False) 43 | assert f(1) 44 | assert f(x=1) 45 | assert f(1, 2) 46 | assert f(x=1, y=2) 47 | assert raises(TypeError, lambda: f(1, 2, 3)) 48 | 49 | 50 | def test_is_valid(check_valid=is_valid_args, incomplete=False): 51 | orig_check_valid = check_valid 52 | check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs) 53 | 54 | f = make_func('') 55 | assert check_valid(f) 56 | assert check_valid(f, 1) is False 57 | assert check_valid(f, x=1) is False 58 | 59 | f = make_func('x') 60 | assert check_valid(f) is incomplete 61 | assert check_valid(f, 1) 62 | assert check_valid(f, x=1) 63 | assert check_valid(f, 1, x=2) is False 64 | assert check_valid(f, 1, y=2) is False 65 | assert check_valid(f, 1, 2) is False 66 | assert check_valid(f, x=1, y=2) is False 67 | 68 | f = make_func('x=1') 69 | assert check_valid(f) 70 | assert check_valid(f, 1) 71 | assert check_valid(f, x=1) 72 | assert check_valid(f, 1, x=2) is False 73 | assert check_valid(f, 1, y=2) is False 74 | assert check_valid(f, 1, 2) is False 75 | assert check_valid(f, x=1, y=2) is False 76 | 77 | f = make_func('*args') 78 | assert check_valid(f) 79 | assert check_valid(f, 1) 80 | assert check_valid(f, 1, 2) 81 | assert check_valid(f, x=1) is False 82 | 83 | f = make_func('**kwargs') 84 | assert check_valid(f) 85 | assert check_valid(f, x=1) 86 | assert check_valid(f, x=1, y=2) 87 | assert check_valid(f, 1) is False 88 | 89 | f = make_func('x, *args') 90 | assert check_valid(f) is incomplete 91 | assert check_valid(f, 1) 92 | assert check_valid(f, 1, 2) 93 | assert check_valid(f, x=1) 94 | assert check_valid(f, 1, x=1) is False 95 | assert check_valid(f, 1, y=1) is False 96 | 97 | f = make_func('x, y=1, **kwargs') 98 | assert check_valid(f) is incomplete 99 | assert check_valid(f, 1) 100 | assert check_valid(f, x=1) 101 | assert check_valid(f, 1, 2) 102 | assert check_valid(f, x=1, y=2, z=3) 103 | assert check_valid(f, 1, 2, y=3) is False 104 | 105 | f = make_func('a, b, c=3, d=4') 106 | assert check_valid(f) is incomplete 107 | assert check_valid(f, 1) is incomplete 108 | assert check_valid(f, 1, 2) 109 | assert check_valid(f, 1, c=3) is incomplete 110 | assert check_valid(f, 1, e=3) is False 111 | assert check_valid(f, 1, 2, e=3) is False 112 | assert check_valid(f, 1, 2, b=3) is False 113 | 114 | assert check_valid(1) is False 115 | 116 | 117 | def test_is_valid_py3(check_valid=is_valid_args, incomplete=False): 118 | orig_check_valid = check_valid 119 | check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs) 120 | 121 | f = make_func('x, *, y=1') 122 | assert check_valid(f) is incomplete 123 | assert check_valid(f, 1) 124 | assert check_valid(f, x=1) 125 | assert check_valid(f, 1, y=2) 126 | assert check_valid(f, 1, 2) is False 127 | assert check_valid(f, 1, z=2) is False 128 | 129 | f = make_func('x, *args, y=1') 130 | assert check_valid(f) is incomplete 131 | assert check_valid(f, 1) 132 | assert check_valid(f, x=1) 133 | assert check_valid(f, 1, y=2) 134 | assert check_valid(f, 1, 2, y=2) 135 | assert check_valid(f, 1, 2) 136 | assert check_valid(f, 1, z=2) is False 137 | 138 | f = make_func('*, y=1') 139 | assert check_valid(f) 140 | assert check_valid(f, 1) is False 141 | assert check_valid(f, y=1) 142 | assert check_valid(f, z=1) is False 143 | 144 | f = make_func('x, *, y') 145 | assert check_valid(f) is incomplete 146 | assert check_valid(f, 1) is incomplete 147 | assert check_valid(f, x=1) is incomplete 148 | assert check_valid(f, 1, y=2) 149 | assert check_valid(f, x=1, y=2) 150 | assert check_valid(f, 1, 2) is False 151 | assert check_valid(f, 1, z=2) is False 152 | assert check_valid(f, 1, y=1, z=2) is False 153 | 154 | f = make_func('x=1, *, y, z=3') 155 | assert check_valid(f) is incomplete 156 | assert check_valid(f, 1, z=3) is incomplete 157 | assert check_valid(f, y=2) 158 | assert check_valid(f, 1, y=2) 159 | assert check_valid(f, x=1, y=2) 160 | assert check_valid(f, x=1, y=2, z=3) 161 | assert check_valid(f, 1, x=1, y=2) is False 162 | assert check_valid(f, 1, 3, y=2) is False 163 | 164 | f = make_func('w, x=2, *args, y, z=4') 165 | assert check_valid(f) is incomplete 166 | assert check_valid(f, 1) is incomplete 167 | assert check_valid(f, 1, y=3) 168 | 169 | f = make_func('a, b, c=3, d=4, *args, e=5, f=6, g, h') 170 | assert check_valid(f) is incomplete 171 | assert check_valid(f, 1) is incomplete 172 | assert check_valid(f, 1, 2) is incomplete 173 | assert check_valid(f, 1, 2, g=7) is incomplete 174 | assert check_valid(f, 1, 2, g=7, h=8) 175 | assert check_valid(f, 1, 2, 3, 4, 5, 6, 7, 8, 9) is incomplete 176 | 177 | f = make_func('a: int, b: float') 178 | assert check_valid(f) is incomplete 179 | assert check_valid(f, 1) is incomplete 180 | assert check_valid(f, b=1) is incomplete 181 | assert check_valid(f, 1, 2) 182 | 183 | f = make_func('(a: int, b: float) -> float') 184 | assert check_valid(f) is incomplete 185 | assert check_valid(f, 1) is incomplete 186 | assert check_valid(f, b=1) is incomplete 187 | assert check_valid(f, 1, 2) 188 | 189 | f.__signature__ = 34 190 | assert check_valid(f) is False 191 | 192 | class RaisesValueError: 193 | def __call__(self): 194 | pass 195 | @property 196 | def __signature__(self): 197 | raise ValueError('Testing Python 3.4') 198 | 199 | f = RaisesValueError() 200 | assert check_valid(f) is None 201 | 202 | 203 | def test_is_partial(): 204 | test_is_valid(check_valid=is_partial_args, incomplete=True) 205 | test_is_valid_py3(check_valid=is_partial_args, incomplete=True) 206 | 207 | 208 | def test_is_valid_curry(): 209 | def check_curry(func, args, kwargs, incomplete=True): 210 | try: 211 | curry(func)(*args, **kwargs) 212 | curry(func, *args)(**kwargs) 213 | curry(func, **kwargs)(*args) 214 | curry(func, *args, **kwargs)() 215 | if not isinstance(func, type(lambda: None)): 216 | return None 217 | return incomplete 218 | except ValueError: 219 | return True 220 | except TypeError: 221 | return False 222 | 223 | check_valid = functools.partial(check_curry, incomplete=True) 224 | test_is_valid(check_valid=check_valid, incomplete=True) 225 | test_is_valid_py3(check_valid=check_valid, incomplete=True) 226 | 227 | check_valid = functools.partial(check_curry, incomplete=False) 228 | test_is_valid(check_valid=check_valid, incomplete=False) 229 | test_is_valid_py3(check_valid=check_valid, incomplete=False) 230 | 231 | 232 | def test_func_keyword(): 233 | def f(func=None): 234 | pass 235 | assert is_valid_args(f, (), {}) 236 | assert is_valid_args(f, (None,), {}) 237 | assert is_valid_args(f, (), {'func': None}) 238 | assert is_valid_args(f, (None,), {'func': None}) is False 239 | assert is_partial_args(f, (), {}) 240 | assert is_partial_args(f, (None,), {}) 241 | assert is_partial_args(f, (), {'func': None}) 242 | assert is_partial_args(f, (None,), {'func': None}) is False 243 | 244 | 245 | def test_has_unknown_args(): 246 | assert has_varargs(1) is False 247 | assert has_varargs(map) 248 | assert has_varargs(make_func('')) is False 249 | assert has_varargs(make_func('x, y, z')) is False 250 | assert has_varargs(make_func('*args')) 251 | assert has_varargs(make_func('**kwargs')) is False 252 | assert has_varargs(make_func('x, y, *args, **kwargs')) 253 | assert has_varargs(make_func('x, y, z=1')) is False 254 | assert has_varargs(make_func('x, y, z=1, **kwargs')) is False 255 | 256 | f = make_func('*args') 257 | f.__signature__ = 34 258 | assert has_varargs(f) is False 259 | 260 | class RaisesValueError: 261 | def __call__(self): 262 | pass 263 | @property 264 | def __signature__(self): 265 | raise ValueError('Testing Python 3.4') 266 | 267 | f = RaisesValueError() 268 | assert has_varargs(f) is None 269 | 270 | 271 | def test_num_required_args(): 272 | assert num_required_args(lambda: None) == 0 273 | assert num_required_args(lambda x: None) == 1 274 | assert num_required_args(lambda x, *args: None) == 1 275 | assert num_required_args(lambda x, **kwargs: None) == 1 276 | assert num_required_args(lambda x, y, *args, **kwargs: None) == 2 277 | assert num_required_args(map) == 2 278 | assert num_required_args(dict) is None 279 | 280 | 281 | def test_has_keywords(): 282 | assert has_keywords(lambda: None) is False 283 | assert has_keywords(lambda x: None) is False 284 | assert has_keywords(lambda x=1: None) 285 | assert has_keywords(lambda **kwargs: None) 286 | assert has_keywords(int) 287 | assert has_keywords(sorted) 288 | assert has_keywords(max) 289 | # map gained `strict=False` keyword in Python 3.14 290 | assert has_keywords(map) == (sys.version_info[1] >= 14) 291 | assert has_keywords(bytearray) is None 292 | 293 | 294 | def test_has_varargs(): 295 | assert has_varargs(lambda: None) is False 296 | assert has_varargs(lambda *args: None) 297 | assert has_varargs(lambda **kwargs: None) is False 298 | assert has_varargs(map) 299 | assert has_varargs(max) is None 300 | 301 | 302 | def test_is_arity(): 303 | assert is_arity(0, lambda: None) 304 | assert is_arity(1, lambda: None) is False 305 | assert is_arity(1, lambda x: None) 306 | assert is_arity(3, lambda x, y, z: None) 307 | assert is_arity(1, lambda x, *args: None) is False 308 | assert is_arity(1, lambda x, **kwargs: None) is False 309 | assert is_arity(1, all) 310 | assert is_arity(2, map) is False 311 | assert is_arity(2, range) is None 312 | 313 | 314 | def test_introspect_curry_valid_py3(check_valid=is_valid_args, incomplete=False): 315 | orig_check_valid = check_valid 316 | check_valid = lambda _func, *args, **kwargs: orig_check_valid(_func, args, kwargs) 317 | 318 | f = toolz.curry(make_func('x, y, z=0')) 319 | assert check_valid(f) 320 | assert check_valid(f, 1) 321 | assert check_valid(f, 1, 2) 322 | assert check_valid(f, 1, 2, 3) 323 | assert check_valid(f, 1, 2, 3, 4) is False 324 | assert check_valid(f, invalid_keyword=True) is False 325 | assert check_valid(f(1)) 326 | assert check_valid(f(1), 2) 327 | assert check_valid(f(1), 2, 3) 328 | assert check_valid(f(1), 2, 3, 4) is False 329 | assert check_valid(f(1), x=2) is False 330 | assert check_valid(f(1), y=2) 331 | assert check_valid(f(x=1), 2) is False 332 | assert check_valid(f(x=1), y=2) 333 | assert check_valid(f(y=2), 1) 334 | assert check_valid(f(y=2), 1, z=3) 335 | assert check_valid(f(y=2), 1, 3) is False 336 | 337 | f = toolz.curry(make_func('x, y, z=0'), 1, x=1) 338 | assert check_valid(f) is False 339 | assert check_valid(f, z=3) is False 340 | 341 | f = toolz.curry(make_func('x, y, *args, z')) 342 | assert check_valid(f) 343 | assert check_valid(f, 0) 344 | assert check_valid(f(1), 0) 345 | assert check_valid(f(1, 2), 0) 346 | assert check_valid(f(1, 2, 3), 0) 347 | assert check_valid(f(1, 2, 3, 4), 0) 348 | assert check_valid(f(1, 2, 3, 4), z=4) 349 | assert check_valid(f(x=1)) 350 | assert check_valid(f(x=1), 1) is False 351 | assert check_valid(f(x=1), y=2) 352 | 353 | 354 | def test_introspect_curry_partial_py3(): 355 | test_introspect_curry_valid_py3(check_valid=is_partial_args, incomplete=True) 356 | 357 | 358 | def test_introspect_curry_py3(): 359 | f = toolz.curry(make_func('')) 360 | assert num_required_args(f) == 0 361 | assert is_arity(0, f) 362 | assert has_varargs(f) is False 363 | assert has_keywords(f) is False 364 | 365 | f = toolz.curry(make_func('x')) 366 | assert num_required_args(f) == 0 367 | assert is_arity(0, f) is False 368 | assert is_arity(1, f) is False 369 | assert has_varargs(f) is False 370 | assert has_keywords(f) # A side-effect of being curried 371 | 372 | f = toolz.curry(make_func('x, y, z=0')) 373 | assert num_required_args(f) == 0 374 | assert is_arity(0, f) is False 375 | assert is_arity(1, f) is False 376 | assert is_arity(2, f) is False 377 | assert is_arity(3, f) is False 378 | assert has_varargs(f) is False 379 | assert has_keywords(f) 380 | 381 | f = toolz.curry(make_func('*args, **kwargs')) 382 | assert num_required_args(f) == 0 383 | assert has_varargs(f) 384 | assert has_keywords(f) 385 | 386 | 387 | def test_introspect_builtin_modules(): 388 | mods = [builtins, functools, itertools, operator, toolz, 389 | toolz.functoolz, toolz.itertoolz, toolz.dicttoolz, toolz.recipes] 390 | 391 | denylist = set() 392 | 393 | def add_denylist(mod, attr): 394 | if hasattr(mod, attr): 395 | denylist.add(getattr(mod, attr)) 396 | 397 | add_denylist(builtins, 'basestring') 398 | add_denylist(builtins, 'NoneType') 399 | add_denylist(builtins, '__metaclass__') 400 | add_denylist(builtins, 'sequenceiterator') 401 | 402 | def is_missing(modname, name, func): 403 | if name.startswith('_') and not name.startswith('__'): 404 | return False 405 | if name.startswith('__pyx_unpickle_') or name.endswith('_cython__'): 406 | return False 407 | try: 408 | if issubclass(func, BaseException): 409 | return False 410 | except TypeError: 411 | pass 412 | try: 413 | return (callable(func) 414 | and func.__module__ is not None 415 | and modname in func.__module__ 416 | and is_partial_args(func, (), {}) is not True 417 | and func not in denylist) 418 | except AttributeError: 419 | return False 420 | 421 | missing = {} 422 | for mod in mods: 423 | modname = mod.__name__ 424 | for name, func in vars(mod).items(): 425 | if is_missing(modname, name, func): 426 | if modname not in missing: 427 | missing[modname] = [] 428 | missing[modname].append(name) 429 | if missing: 430 | messages = [] 431 | for modname, names in sorted(missing.items()): 432 | msg = '{}:\n {}'.format(modname, '\n '.join(sorted(names))) 433 | messages.append(msg) 434 | message = 'Missing introspection for the following callables:\n\n' 435 | raise AssertionError(message + '\n\n'.join(messages)) 436 | 437 | 438 | def test_inspect_signature_property(): 439 | 440 | # By adding AddX to our signature registry, we can inspect the class 441 | # itself and objects of the class. `inspect.signature` doesn't like 442 | # it when `obj.__signature__` is a property. 443 | class AddX: 444 | def __init__(self, func): 445 | self.func = func 446 | 447 | def __call__(self, addx, *args, **kwargs): 448 | return addx + self.func(*args, **kwargs) 449 | 450 | @property 451 | def __signature__(self): 452 | sig = inspect.signature(self.func) 453 | params = list(sig.parameters.values()) 454 | kind = inspect.Parameter.POSITIONAL_OR_KEYWORD 455 | newparam = inspect.Parameter('addx', kind) 456 | params = [newparam] + params 457 | return sig.replace(parameters=params) 458 | 459 | addx = AddX(lambda x: x) 460 | sig = inspect.signature(addx) 461 | assert sig == inspect.Signature(parameters=[ 462 | inspect.Parameter('addx', inspect.Parameter.POSITIONAL_OR_KEYWORD), 463 | inspect.Parameter('x', inspect.Parameter.POSITIONAL_OR_KEYWORD)]) 464 | 465 | assert num_required_args(AddX) is False 466 | _sigs.signatures[AddX] = (_sigs.expand_sig((0, lambda func: None)),) 467 | assert num_required_args(AddX) == 1 468 | del _sigs.signatures[AddX] 469 | 470 | 471 | def test_inspect_wrapped_property(): 472 | class Wrapped: 473 | def __init__(self, func): 474 | self.func = func 475 | 476 | def __call__(self, *args, **kwargs): 477 | return self.func(*args, **kwargs) 478 | 479 | @property 480 | def __wrapped__(self): 481 | return self.func 482 | 483 | func = lambda x: x 484 | wrapped = Wrapped(func) 485 | assert inspect.signature(func) == inspect.signature(wrapped) 486 | 487 | # inspect.signature did not used to work properly on wrappers, 488 | # but it was fixed in Python 3.11.9, Python 3.12.3 and Python 489 | # 3.13+ 490 | inspectbroken = True 491 | if sys.version_info.major > 3: 492 | inspectbroken = False 493 | if sys.version_info.minor == 11 and sys.version_info.micro > 8: 494 | inspectbroken = False 495 | if sys.version_info.minor == 12 and sys.version_info.micro > 2: 496 | inspectbroken = False 497 | if sys.version_info.minor > 12: 498 | inspectbroken = False 499 | 500 | if inspectbroken: 501 | assert num_required_args(Wrapped) is None 502 | _sigs.signatures[Wrapped] = (_sigs.expand_sig((0, lambda func: None)),) 503 | 504 | assert num_required_args(Wrapped) == 1 505 | -------------------------------------------------------------------------------- /toolz/tests/test_itertoolz.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from itertools import starmap 3 | from toolz.utils import raises 4 | from functools import partial 5 | from random import Random 6 | from pickle import dumps, loads 7 | from toolz.itertoolz import (remove, groupby, merge_sorted, 8 | concat, concatv, interleave, unique, 9 | isiterable, getter, 10 | mapcat, isdistinct, first, second, 11 | nth, take, tail, drop, interpose, get, 12 | rest, last, cons, frequencies, 13 | reduceby, iterate, accumulate, 14 | sliding_window, count, partition, 15 | partition_all, take_nth, pluck, join, 16 | diff, topk, peek, peekn, random_sample) 17 | from operator import add, mul 18 | 19 | 20 | # is comparison will fail between this and no_default 21 | no_default2 = loads(dumps('__no__default__')) 22 | 23 | 24 | def identity(x): 25 | return x 26 | 27 | 28 | def iseven(x): 29 | return x % 2 == 0 30 | 31 | 32 | def isodd(x): 33 | return x % 2 == 1 34 | 35 | 36 | def inc(x): 37 | return x + 1 38 | 39 | 40 | def double(x): 41 | return 2 * x 42 | 43 | 44 | def test_remove(): 45 | r = remove(iseven, range(5)) 46 | assert type(r) is not list 47 | assert list(r) == list(filter(isodd, range(5))) 48 | 49 | 50 | def test_groupby(): 51 | assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]} 52 | 53 | 54 | def test_groupby_non_callable(): 55 | assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ 56 | {1: [(1, 2), (1, 3)], 57 | 2: [(2, 2), (2, 4)]} 58 | 59 | assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ 60 | {(1,): [(1, 2), (1, 3)], 61 | (2,): [(2, 2), (2, 4)]} 62 | 63 | assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ 64 | {(1, 1): [(1, 2), (1, 3)], 65 | (2, 2): [(2, 2), (2, 4)]} 66 | 67 | 68 | def test_merge_sorted(): 69 | assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] 70 | assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] 71 | assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] 72 | assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], 73 | key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] 74 | assert list(merge_sorted([2, 1, 3], [1, 2, 3], 75 | key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] 76 | assert list(merge_sorted([2, 3], [1, 3], 77 | key=lambda x: x // 3)) == [2, 1, 3, 3] 78 | assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc' 79 | assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc' 80 | assert ''.join(merge_sorted('cba', 'cba', 'cba', 81 | key=lambda x: -ord(x))) == 'cccbbbaaa' 82 | assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4] 83 | 84 | data = [[(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)], 85 | [(9, 1), (9, 8), (9, 9)]] 86 | assert list(merge_sorted(*data, key=lambda x: x[1])) == [ 87 | (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9)] 88 | assert list(merge_sorted()) == [] 89 | assert list(merge_sorted([1, 2, 3])) == [1, 2, 3] 90 | assert list(merge_sorted([1, 4, 5], [2, 3])) == [1, 2, 3, 4, 5] 91 | assert list(merge_sorted([1, 4, 5], [2, 3], key=identity)) == [ 92 | 1, 2, 3, 4, 5] 93 | assert list(merge_sorted([1, 5], [2], [4, 7], [3, 6], key=identity)) == [ 94 | 1, 2, 3, 4, 5, 6, 7] 95 | 96 | 97 | def test_interleave(): 98 | assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3' 99 | assert ''.join(interleave(('ABC', '1'))) == 'A1BC' 100 | 101 | 102 | def test_unique(): 103 | assert tuple(unique((1, 2, 3))) == (1, 2, 3) 104 | assert tuple(unique((1, 2, 1, 3))) == (1, 2, 3) 105 | assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2) 106 | 107 | 108 | def test_isiterable(): 109 | # objects that have a __iter__() or __getitem__() method are iterable 110 | # https://docs.python.org/3/library/functions.html#iter 111 | class IterIterable: 112 | def __iter__(self): 113 | return iter(["a", "b", "c"]) 114 | 115 | class GetItemIterable: 116 | def __getitem__(self, item): 117 | return ["a", "b", "c"][item] 118 | 119 | # "if a class sets __iter__() to None, the class is not iterable" 120 | # https://docs.python.org/3/reference/datamodel.html#special-method-names 121 | class NotIterable: 122 | __iter__ = None 123 | 124 | class NotIterableEvenWithGetItem: 125 | __iter__ = None 126 | 127 | def __getitem__(self, item): 128 | return ["a", "b", "c"][item] 129 | 130 | assert isiterable([1, 2, 3]) is True 131 | assert isiterable('abc') is True 132 | assert isiterable(IterIterable()) is True 133 | assert isiterable(GetItemIterable()) is True 134 | assert isiterable(5) is False 135 | assert isiterable(NotIterable()) is False 136 | assert isiterable(NotIterableEvenWithGetItem()) is False 137 | 138 | 139 | def test_isdistinct(): 140 | assert isdistinct([1, 2, 3]) is True 141 | assert isdistinct([1, 2, 1]) is False 142 | 143 | assert isdistinct("Hello") is False 144 | assert isdistinct("World") is True 145 | 146 | assert isdistinct(iter([1, 2, 3])) is True 147 | assert isdistinct(iter([1, 2, 1])) is False 148 | 149 | 150 | def test_nth(): 151 | assert nth(2, 'ABCDE') == 'C' 152 | assert nth(2, iter('ABCDE')) == 'C' 153 | assert nth(1, (3, 2, 1)) == 2 154 | assert nth(0, {'foo': 'bar'}) == 'foo' 155 | assert raises(StopIteration, lambda: nth(10, {10: 'foo'})) 156 | assert nth(-2, 'ABCDE') == 'D' 157 | assert raises(ValueError, lambda: nth(-2, iter('ABCDE'))) 158 | 159 | 160 | def test_first(): 161 | assert first('ABCDE') == 'A' 162 | assert first((3, 2, 1)) == 3 163 | assert isinstance(first({0: 'zero', 1: 'one'}), int) 164 | 165 | 166 | def test_second(): 167 | assert second('ABCDE') == 'B' 168 | assert second((3, 2, 1)) == 2 169 | assert isinstance(second({0: 'zero', 1: 'one'}), int) 170 | 171 | 172 | def test_last(): 173 | assert last('ABCDE') == 'E' 174 | assert last((3, 2, 1)) == 1 175 | assert isinstance(last({0: 'zero', 1: 'one'}), int) 176 | 177 | 178 | def test_rest(): 179 | assert list(rest('ABCDE')) == list('BCDE') 180 | assert list(rest((3, 2, 1))) == list((2, 1)) 181 | 182 | 183 | def test_take(): 184 | assert list(take(3, 'ABCDE')) == list('ABC') 185 | assert list(take(2, (3, 2, 1))) == list((3, 2)) 186 | 187 | 188 | def test_tail(): 189 | assert list(tail(3, 'ABCDE')) == list('CDE') 190 | assert list(tail(3, iter('ABCDE'))) == list('CDE') 191 | assert list(tail(2, (3, 2, 1))) == list((2, 1)) 192 | 193 | 194 | def test_drop(): 195 | assert list(drop(3, 'ABCDE')) == list('DE') 196 | assert list(drop(1, (3, 2, 1))) == list((2, 1)) 197 | 198 | 199 | def test_take_nth(): 200 | assert list(take_nth(2, 'ABCDE')) == list('ACE') 201 | 202 | 203 | def test_get(): 204 | assert get(1, 'ABCDE') == 'B' 205 | assert list(get([1, 3], 'ABCDE')) == list('BD') 206 | assert get('a', {'a': 1, 'b': 2, 'c': 3}) == 1 207 | assert get(['a', 'b'], {'a': 1, 'b': 2, 'c': 3}) == (1, 2) 208 | 209 | assert get('foo', {}, default='bar') == 'bar' 210 | assert get({}, [1, 2, 3], default='bar') == 'bar' 211 | assert get([0, 2], 'AB', 'C') == ('A', 'C') 212 | 213 | assert get([0], 'AB') == ('A',) 214 | assert get([], 'AB') == () 215 | 216 | assert raises(IndexError, lambda: get(10, 'ABC')) 217 | assert raises(KeyError, lambda: get(10, {'a': 1})) 218 | assert raises(TypeError, lambda: get({}, [1, 2, 3])) 219 | assert raises(TypeError, lambda: get([1, 2, 3], 1, None)) 220 | assert raises(KeyError, lambda: get('foo', {}, default=no_default2)) 221 | 222 | 223 | def test_mapcat(): 224 | assert (list(mapcat(identity, [[1, 2, 3], [4, 5, 6]])) == 225 | [1, 2, 3, 4, 5, 6]) 226 | 227 | assert (list(mapcat(reversed, [[3, 2, 1, 0], [6, 5, 4], [9, 8, 7]])) == 228 | list(range(10))) 229 | 230 | inc = lambda i: i + 1 231 | assert ([4, 5, 6, 7, 8, 9] == 232 | list(mapcat(partial(map, inc), [[3, 4, 5], [6, 7, 8]]))) 233 | 234 | 235 | def test_cons(): 236 | assert list(cons(1, [2, 3])) == [1, 2, 3] 237 | 238 | 239 | def test_concat(): 240 | assert list(concat([[], [], []])) == [] 241 | assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) == 242 | ['a', 'b', 0, 1, 2]) 243 | 244 | 245 | def test_concatv(): 246 | assert list(concatv([], [], [])) == [] 247 | assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) == 248 | ['a', 'b', 0, 1, 2]) 249 | 250 | 251 | def test_interpose(): 252 | assert "a" == first(rest(interpose("a", range(1000000000)))) 253 | assert "tXaXrXzXaXn" == "".join(interpose("X", "tarzan")) 254 | assert list(interpose(0, itertools.repeat(1, 4))) == [1, 0, 1, 0, 1, 0, 1] 255 | assert list(interpose('.', ['a', 'b', 'c'])) == ['a', '.', 'b', '.', 'c'] 256 | 257 | 258 | def test_frequencies(): 259 | assert (frequencies(["cat", "pig", "cat", "eel", 260 | "pig", "dog", "dog", "dog"]) == 261 | {"cat": 2, "eel": 1, "pig": 2, "dog": 3}) 262 | assert frequencies([]) == {} 263 | assert frequencies("onomatopoeia") == {"a": 2, "e": 1, "i": 1, "m": 1, 264 | "o": 4, "n": 1, "p": 1, "t": 1} 265 | 266 | 267 | def test_reduceby(): 268 | data = [1, 2, 3, 4, 5] 269 | iseven = lambda x: x % 2 == 0 270 | assert reduceby(iseven, add, data, 0) == {False: 9, True: 6} 271 | assert reduceby(iseven, mul, data, 1) == {False: 15, True: 8} 272 | 273 | projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, 274 | {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, 275 | {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, 276 | {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] 277 | assert reduceby(lambda x: x['state'], 278 | lambda acc, x: acc + x['cost'], 279 | projects, 0) == {'CA': 1200000, 'IL': 2100000} 280 | 281 | assert reduceby('state', 282 | lambda acc, x: acc + x['cost'], 283 | projects, 0) == {'CA': 1200000, 'IL': 2100000} 284 | 285 | 286 | def test_reduce_by_init(): 287 | assert reduceby(iseven, add, [1, 2, 3, 4]) == {True: 2 + 4, False: 1 + 3} 288 | assert reduceby(iseven, add, [1, 2, 3, 4], no_default2) == {True: 2 + 4, 289 | False: 1 + 3} 290 | 291 | 292 | def test_reduce_by_callable_default(): 293 | def set_add(s, i): 294 | s.add(i) 295 | return s 296 | 297 | assert reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2], set) == \ 298 | {True: {2, 4}, False: {1, 3}} 299 | 300 | 301 | def test_iterate(): 302 | assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4] 303 | assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8] 304 | 305 | 306 | def test_accumulate(): 307 | assert list(accumulate(add, [1, 2, 3, 4, 5])) == [1, 3, 6, 10, 15] 308 | assert list(accumulate(mul, [1, 2, 3, 4, 5])) == [1, 2, 6, 24, 120] 309 | assert list(accumulate(add, [1, 2, 3, 4, 5], -1)) == [-1, 0, 2, 5, 9, 14] 310 | 311 | def binop(a, b): 312 | raise AssertionError('binop should not be called') 313 | 314 | start = object() 315 | assert list(accumulate(binop, [], start)) == [start] 316 | assert list(accumulate(binop, [])) == [] 317 | assert list(accumulate(add, [1, 2, 3], no_default2)) == [1, 3, 6] 318 | 319 | 320 | def test_accumulate_works_on_consumable_iterables(): 321 | assert list(accumulate(add, iter((1, 2, 3)))) == [1, 3, 6] 322 | 323 | 324 | def test_sliding_window(): 325 | assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)] 326 | assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)] 327 | 328 | 329 | def test_sliding_window_of_short_iterator(): 330 | assert list(sliding_window(3, [1, 2])) == [] 331 | assert list(sliding_window(7, [1, 2])) == [] 332 | 333 | 334 | def test_partition(): 335 | assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] 336 | assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)] 337 | assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2), 338 | (3, -1, -1)] 339 | assert list(partition(2, [])) == [] 340 | 341 | 342 | def test_partition_all(): 343 | assert list(partition_all(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] 344 | assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)] 345 | assert list(partition_all(2, [])) == [] 346 | 347 | # Regression test: https://github.com/pytoolz/toolz/issues/387 348 | class NoCompare: 349 | def __eq__(self, other): 350 | if self.__class__ == other.__class__: 351 | return True 352 | raise ValueError() 353 | obj = NoCompare() 354 | result = [(obj, obj, obj, obj), (obj, obj, obj)] 355 | assert list(partition_all(4, [obj]*7)) == result 356 | assert list(partition_all(4, iter([obj]*7))) == result 357 | 358 | # Test invalid __len__: https://github.com/pytoolz/toolz/issues/602 359 | class ListWithBadLength(list): 360 | def __init__(self, contents, off_by=1): 361 | self.off_by = off_by 362 | super().__init__(contents) 363 | 364 | def __len__(self): 365 | return super().__len__() + self.off_by 366 | 367 | too_long_list = ListWithBadLength([1, 2], off_by=+1) 368 | assert raises(LookupError, lambda: list(partition_all(5, too_long_list))) 369 | too_short_list = ListWithBadLength([1, 2], off_by=-1) 370 | assert raises(LookupError, lambda: list(partition_all(5, too_short_list))) 371 | 372 | 373 | def test_count(): 374 | assert count((1, 2, 3)) == 3 375 | assert count([]) == 0 376 | assert count(iter((1, 2, 3, 4))) == 4 377 | 378 | assert count('hello') == 5 379 | assert count(iter('hello')) == 5 380 | 381 | 382 | def test_pluck(): 383 | assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4] 384 | assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)] 385 | assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1] 386 | 387 | data = [{'id': 1, 'name': 'cheese'}, {'id': 2, 'name': 'pies', 'price': 1}] 388 | assert list(pluck('id', data)) == [1, 2] 389 | assert list(pluck('price', data, 0)) == [0, 1] 390 | assert list(pluck(['id', 'name'], data)) == [(1, 'cheese'), (2, 'pies')] 391 | assert list(pluck(['name'], data)) == [('cheese',), ('pies',)] 392 | assert list(pluck(['price', 'other'], data, 0)) == [(0, 0), (1, 0)] 393 | 394 | assert raises(IndexError, lambda: list(pluck(1, [[0]]))) 395 | assert raises(KeyError, lambda: list(pluck('name', [{'id': 1}]))) 396 | 397 | assert list(pluck(0, [[0, 1], [2, 3], [4, 5]], no_default2)) == [0, 2, 4] 398 | assert raises(IndexError, lambda: list(pluck(1, [[0]], no_default2))) 399 | 400 | 401 | def test_join(): 402 | names = [(1, 'one'), (2, 'two'), (3, 'three')] 403 | fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)] 404 | 405 | def addpair(pair): 406 | return pair[0] + pair[1] 407 | 408 | result = set(starmap(add, join(first, names, second, fruit))) 409 | 410 | expected = {(1, 'one', 'apple', 1), 411 | (1, 'one', 'orange', 1), 412 | (2, 'two', 'banana', 2), 413 | (2, 'two', 'coconut', 2)} 414 | 415 | assert result == expected 416 | 417 | result = set(starmap(add, join(first, names, second, fruit, 418 | left_default=no_default2, 419 | right_default=no_default2))) 420 | assert result == expected 421 | 422 | 423 | def test_getter(): 424 | assert getter(0)('Alice') == 'A' 425 | assert getter([0])('Alice') == ('A',) 426 | assert getter([])('Alice') == () 427 | 428 | 429 | def test_key_as_getter(): 430 | squares = [(i, i**2) for i in range(5)] 431 | pows = [(i, i**2, i**3) for i in range(5)] 432 | 433 | assert set(join(0, squares, 0, pows)) == set(join(lambda x: x[0], squares, 434 | lambda x: x[0], pows)) 435 | 436 | get = lambda x: (x[0], x[1]) 437 | assert set(join([0, 1], squares, [0, 1], pows)) == set(join(get, squares, 438 | get, pows)) 439 | 440 | get = lambda x: (x[0],) 441 | assert set(join([0], squares, [0], pows)) == set(join(get, squares, 442 | get, pows)) 443 | 444 | 445 | def test_join_double_repeats(): 446 | names = [(1, 'one'), (2, 'two'), (3, 'three'), (1, 'uno'), (2, 'dos')] 447 | fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)] 448 | 449 | result = set(starmap(add, join(first, names, second, fruit))) 450 | 451 | expected = {(1, 'one', 'apple', 1), 452 | (1, 'one', 'orange', 1), 453 | (2, 'two', 'banana', 2), 454 | (2, 'two', 'coconut', 2), 455 | (1, 'uno', 'apple', 1), 456 | (1, 'uno', 'orange', 1), 457 | (2, 'dos', 'banana', 2), 458 | (2, 'dos', 'coconut', 2)} 459 | 460 | assert result == expected 461 | 462 | 463 | def test_join_missing_element(): 464 | names = [(1, 'one'), (2, 'two'), (3, 'three')] 465 | fruit = [('apple', 5), ('orange', 1)] 466 | 467 | result = set(starmap(add, join(first, names, second, fruit))) 468 | 469 | expected = {(1, 'one', 'orange', 1)} 470 | 471 | assert result == expected 472 | 473 | 474 | def test_left_outer_join(): 475 | result = set(join(identity, [1, 2], identity, [2, 3], left_default=None)) 476 | expected = {(2, 2), (None, 3)} 477 | 478 | assert result == expected 479 | 480 | 481 | def test_right_outer_join(): 482 | result = set(join(identity, [1, 2], identity, [2, 3], right_default=None)) 483 | expected = {(2, 2), (1, None)} 484 | 485 | assert result == expected 486 | 487 | 488 | def test_outer_join(): 489 | result = set(join(identity, [1, 2], identity, [2, 3], 490 | left_default=None, right_default=None)) 491 | expected = {(2, 2), (1, None), (None, 3)} 492 | 493 | assert result == expected 494 | 495 | 496 | def test_diff(): 497 | assert raises(TypeError, lambda: list(diff())) 498 | assert raises(TypeError, lambda: list(diff([1, 2]))) 499 | assert raises(TypeError, lambda: list(diff([1, 2], 3))) 500 | assert list(diff([1, 2], (1, 2), iter([1, 2]))) == [] 501 | assert list(diff([1, 2, 3], (1, 10, 3), iter([1, 2, 10]))) == [ 502 | (2, 10, 2), (3, 3, 10)] 503 | assert list(diff([1, 2], [10])) == [(1, 10)] 504 | assert list(diff([1, 2], [10], default=None)) == [(1, 10), (2, None)] 505 | # non-variadic usage 506 | assert raises(TypeError, lambda: list(diff([]))) 507 | assert raises(TypeError, lambda: list(diff([[]]))) 508 | assert raises(TypeError, lambda: list(diff([[1, 2]]))) 509 | assert raises(TypeError, lambda: list(diff([[1, 2], 3]))) 510 | assert list(diff([(1, 2), (1, 3)])) == [(2, 3)] 511 | 512 | data1 = [{'cost': 1, 'currency': 'dollar'}, 513 | {'cost': 2, 'currency': 'dollar'}] 514 | 515 | data2 = [{'cost': 100, 'currency': 'yen'}, 516 | {'cost': 300, 'currency': 'yen'}] 517 | 518 | conversions = {'dollar': 1, 'yen': 0.01} 519 | 520 | def indollars(item): 521 | return conversions[item['currency']] * item['cost'] 522 | 523 | list(diff(data1, data2, key=indollars)) == [ 524 | ({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})] 525 | 526 | 527 | def test_topk(): 528 | assert topk(2, [4, 1, 5, 2]) == (5, 4) 529 | assert topk(2, [4, 1, 5, 2], key=lambda x: -x) == (1, 2) 530 | assert topk(2, iter([5, 1, 4, 2]), key=lambda x: -x) == (1, 2) 531 | 532 | assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9}, 533 | {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='a') == \ 534 | ({'a': 10, 'b': 1}, {'a': 9, 'b': 2}) 535 | 536 | assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9}, 537 | {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='b') == \ 538 | ({'a': 1, 'b': 10}, {'a': 2, 'b': 9}) 539 | assert topk(2, [(0, 4), (1, 3), (2, 2), (3, 1), (4, 0)], 0) == \ 540 | ((4, 0), (3, 1)) 541 | 542 | 543 | def test_topk_is_stable(): 544 | assert topk(4, [5, 9, 2, 1, 5, 3], key=lambda x: 1) == (5, 9, 2, 1) 545 | 546 | 547 | def test_peek(): 548 | alist = ["Alice", "Bob", "Carol"] 549 | element, blist = peek(alist) 550 | assert element == alist[0] 551 | assert list(blist) == alist 552 | 553 | assert raises(StopIteration, lambda: peek([])) 554 | 555 | 556 | def test_peekn(): 557 | alist = ("Alice", "Bob", "Carol") 558 | elements, blist = peekn(2, alist) 559 | assert elements == alist[:2] 560 | assert tuple(blist) == alist 561 | 562 | elements, blist = peekn(len(alist) * 4, alist) 563 | assert elements == alist 564 | assert tuple(blist) == alist 565 | 566 | 567 | def test_random_sample(): 568 | alist = list(range(100)) 569 | 570 | assert list(random_sample(prob=1, seq=alist, random_state=2016)) == alist 571 | 572 | mk_rsample = lambda rs=1: list(random_sample(prob=0.1, 573 | seq=alist, 574 | random_state=rs)) 575 | rsample1 = mk_rsample() 576 | assert rsample1 == mk_rsample() 577 | 578 | rsample2 = mk_rsample(1984) 579 | randobj = Random(1984) 580 | assert rsample2 == mk_rsample(randobj) 581 | 582 | assert rsample1 != rsample2 583 | 584 | assert mk_rsample(hash(object)) == mk_rsample(hash(object)) 585 | assert mk_rsample(hash(object)) != mk_rsample(hash(object())) 586 | assert mk_rsample(b"a") == mk_rsample("a") 587 | 588 | assert raises(TypeError, lambda: mk_rsample([])) 589 | --------------------------------------------------------------------------------