├── toolz
    ├── tests
    │   ├── __init__.py
    │   ├── test_utils.py
    │   ├── test_compatibility.py
    │   ├── test_curried_doctests.py
    │   ├── test_package.py
    │   ├── test_recipes.py
    │   ├── test_tlz.py
    │   ├── test_signatures.py
    │   ├── test_curried.py
    │   ├── test_serialization.py
    │   ├── test_dicttoolz.py
    │   ├── test_inspect_args.py
    │   └── test_itertoolz.py
    ├── sandbox
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_parallel.py
    │   │   └── test_core.py
    │   ├── __init__.py
    │   ├── parallel.py
    │   └── core.py
    ├── utils.py
    ├── curried
    │   ├── exceptions.py
    │   ├── operator.py
    │   └── __init__.py
    ├── __init__.py
    ├── compatibility.py
    ├── recipes.py
    └── dicttoolz.py
├── doc
    ├── requirements.txt
    ├── source
    │   ├── install.rst
    │   ├── references.rst
    │   ├── index.rst
    │   ├── api.rst
    │   ├── heritage.rst
    │   ├── purity.rst
    │   ├── tips-and-tricks.rst
    │   ├── laziness.rst
    │   ├── curry.rst
    │   ├── parallelism.rst
    │   ├── composition.rst
    │   ├── control.rst
    │   ├── conf.py
    │   └── streaming-analytics.rst
    ├── make.bat
    └── Makefile
├── .gitignore
├── .github
    ├── dependabot.yml
    └── workflows
    │   ├── pre-commit.yml
    │   ├── test.yml
    │   └── publish_pypi.yml
├── bench
    ├── test_groupby.py
    ├── test_sliding_window.py
    ├── test_get.py
    ├── test_get_list.py
    ├── test_memoize.py
    ├── test_curry.py
    ├── test_memoize_kwargs.py
    ├── test_curry_baseline.py
    ├── test_first.py
    ├── test_pluck.py
    ├── test_frequencies.py
    ├── test_first_iter.py
    ├── test_wordcount.py
    └── test_join.py
├── MANIFEST.in
├── tox.ini
├── tlz
    ├── __init__.py
    └── _build_tlz.py
├── examples
    ├── wordcount.py
    ├── graph.py
    └── fib.py
├── .readthedocs.yaml
├── LICENSE.txt
├── AUTHORS.md
├── release-notes
├── pyproject.toml
├── .pre-commit-config.yaml
└── README.rst


/toolz/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/toolz/sandbox/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | furo
3 | 


--------------------------------------------------------------------------------
/toolz/sandbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import EqualityHashKey, unzip
2 | from .parallel import fold
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | build/
 3 | dist/
 4 | *.egg-info/
 5 | bench/shakespeare.txt
 6 | .coverage
 7 | *.sw?
 8 | .DS_STORE
 9 | \.tox/
10 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 |   - package-ecosystem: 'github-actions'
4 |     directory: '/'
5 |     schedule:
6 |       interval: 'monthly'
7 | 


--------------------------------------------------------------------------------
/bench/test_groupby.py:
--------------------------------------------------------------------------------
1 | from toolz import groupby, identity
2 | 
3 | 
4 | data = list(range(1000)) * 1000
5 | 
6 | 
7 | def test_groupby():
8 |     groupby(identity, data)
9 | 


--------------------------------------------------------------------------------
/bench/test_sliding_window.py:
--------------------------------------------------------------------------------
1 | from toolz import sliding_window
2 | 
3 | seq = range(1000000)
4 | 
5 | 
6 | def test_sliding_window():
7 |     list(sliding_window(3, seq))
8 | 


--------------------------------------------------------------------------------
/bench/test_get.py:
--------------------------------------------------------------------------------
1 | from toolz import get
2 | 
3 | tuples = [(1, 2, 3) for i in range(100000)]
4 | 
5 | 
6 | def test_get():
7 |     for tup in tuples:
8 |         get(1, tup)
9 | 


--------------------------------------------------------------------------------
/bench/test_get_list.py:
--------------------------------------------------------------------------------
1 | from toolz import get
2 | 
3 | tuples = [(1, 2, 3) for i in range(100000)]
4 | 
5 | 
6 | def test_get():
7 |     for tup in tuples:
8 |         get([1, 2], tup)
9 | 


--------------------------------------------------------------------------------
/toolz/utils.py:
--------------------------------------------------------------------------------
 1 | def raises(err, lamda):
 2 |     try:
 3 |         lamda()
 4 |         return False
 5 |     except err:
 6 |         return True
 7 | 
 8 | 
 9 | no_default = '__no__default__'
10 | 


--------------------------------------------------------------------------------
/toolz/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from toolz.utils import raises
2 | 
3 | 
4 | def test_raises():
5 |     assert raises(ZeroDivisionError, lambda: 1 / 0)
6 |     assert not raises(ZeroDivisionError, lambda: 1)
7 | 


--------------------------------------------------------------------------------
/bench/test_memoize.py:
--------------------------------------------------------------------------------
 1 | from toolz import memoize
 2 | 
 3 | 
 4 | def test_memoize_no_kwargs():
 5 |     @memoize
 6 |     def f(x):
 7 |         return x
 8 | 
 9 |     for i in range(100000):
10 |         f(3)
11 | 


--------------------------------------------------------------------------------
/bench/test_curry.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import get
 2 | 
 3 | 
 4 | pairs = [(1, 2) for i in range(100000)]
 5 | 
 6 | 
 7 | def test_get_curried():
 8 |     first = get(0)
 9 |     for p in pairs:
10 |         first(p)
11 | 


--------------------------------------------------------------------------------
/bench/test_memoize_kwargs.py:
--------------------------------------------------------------------------------
 1 | from toolz import memoize
 2 | 
 3 | 
 4 | def test_memoize_kwargs():
 5 |     @memoize
 6 |     def f(x, y=3):
 7 |         return x
 8 | 
 9 |     for i in range(100000):
10 |         f(3)
11 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include toolz *
2 | recursive-include tlz *
3 | include AUTHORS.md
4 | include LICENSE.txt
5 | include MANIFEST.in
6 | include README.rst
7 | include pyproject.toml
8 | global-exclude *.pyc *~ *.bak *.swp *.swo *.pyo *.so
9 | 


--------------------------------------------------------------------------------
/bench/test_curry_baseline.py:
--------------------------------------------------------------------------------
 1 | from toolz import get
 2 | from functools import partial
 3 | 
 4 | 
 5 | pairs = [(1, 2) for i in range(100000)]
 6 | 
 7 | 
 8 | def test_get():
 9 |     first = partial(get, 0)
10 |     for p in pairs:
11 |         first(p)
12 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist =
 3 |     py39
 4 |     py310
 5 |     py311
 6 |     py312
 7 |     py313
 8 |     py314
 9 |     pypy3
10 | 
11 | skip_missing_interpreters = true
12 | 
13 | 
14 | [testenv]
15 | deps = pytest
16 | commands = py.test {posargs}
17 | 


--------------------------------------------------------------------------------
/bench/test_first.py:
--------------------------------------------------------------------------------
 1 | from toolz import first, second
 2 | 
 3 | pairs = [(1, 2) for i in range(1000000)]
 4 | 
 5 | 
 6 | def test_first():
 7 |     for p in pairs:
 8 |         first(p)
 9 | 
10 | 
11 | def test_second():
12 |     for p in pairs:
13 |         second(p)
14 | 


--------------------------------------------------------------------------------
/bench/test_pluck.py:
--------------------------------------------------------------------------------
 1 | from toolz import pluck
 2 | 
 3 | tuples = [(1, 2, 3) for i in range(100000)]
 4 | less_tuples = [(1, 2, 3) for i in range(100)]
 5 | 
 6 | 
 7 | def test_pluck():
 8 |     for i in pluck(2, tuples):
 9 |         pass
10 | 
11 |     for i in range(1000):
12 |         tuple(pluck(2, less_tuples))
13 | 


--------------------------------------------------------------------------------
/toolz/tests/test_compatibility.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import importlib
 3 | 
 4 | def test_compat_warn():
 5 |     with pytest.warns(DeprecationWarning):
 6 |         # something else is importing this,
 7 |         import toolz.compatibility
 8 |         # reload to be sure we warn
 9 |         importlib.reload(toolz.compatibility)
10 | 


--------------------------------------------------------------------------------
/bench/test_frequencies.py:
--------------------------------------------------------------------------------
 1 | from toolz import frequencies, identity
 2 | 
 3 | 
 4 | big_data = list(range(1000)) * 1000
 5 | small_data = list(range(100))
 6 | 
 7 | 
 8 | def test_frequencies():
 9 |     frequencies(big_data)
10 | 
11 | 
12 | def test_frequencies_small():
13 |     for i in range(1000):
14 |         frequencies(small_data)
15 | 


--------------------------------------------------------------------------------
/toolz/tests/test_curried_doctests.py:
--------------------------------------------------------------------------------
 1 | import doctest
 2 | import toolz
 3 | 
 4 | 
 5 | def test_doctests():
 6 |     toolz.__test__ = {}
 7 |     for name, func in vars(toolz).items():
 8 |         if isinstance(func, toolz.curry):
 9 |             toolz.__test__[name] = func.func
10 |     assert doctest.testmod(toolz).failed == 0
11 |     del toolz.__test__
12 | 


--------------------------------------------------------------------------------
/toolz/tests/test_package.py:
--------------------------------------------------------------------------------
 1 | import toolz
 2 | 
 3 | 
 4 | def test_has_version():
 5 |     # If this test fails, then toolz probably isn't installed properly.
 6 |     # For local development, try `pip install -e .` from the project directory.
 7 |     version = toolz.__version__
 8 |     assert isinstance(version, str)
 9 |     assert version.startswith("1.")
10 | 


--------------------------------------------------------------------------------
/bench/test_first_iter.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from toolz import first, second
 3 | 
 4 | 
 5 | def test_first_iter():
 6 |     iters = map(iter, [(1, 2) for i in range(1000000)])
 7 |     for p in iters:
 8 |         first(p)
 9 | 
10 | 
11 | def test_second_iter():
12 |     iters = map(iter, [(1, 2) for i in range(1000000)])
13 |     for p in iters:
14 |         second(p)
15 | 


--------------------------------------------------------------------------------
/tlz/__init__.py:
--------------------------------------------------------------------------------
 1 | """``tlz`` mirrors the ``toolz`` API and uses ``cytoolz`` if possible.
 2 | 
 3 | The ``tlz`` package is installed when ``toolz`` is installed.  It provides
 4 | a convenient way to use functions from ``cytoolz``--a faster Cython
 5 | implementation of ``toolz``--if it is installed, otherwise it uses
 6 | functions from ``toolz``.
 7 | """
 8 | 
 9 | from . import _build_tlz
10 | 


--------------------------------------------------------------------------------
/examples/wordcount.py:
--------------------------------------------------------------------------------
 1 | from toolz import *
 2 | 
 3 | 
 4 | def stem(word):
 5 |     """ Stem word to primitive form """
 6 |     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
 7 | 
 8 | wordcount = comp(frequencies, partial(map, stem), str.split)
 9 | 
10 | if __name__ == '__main__':
11 |     print(wordcount("This cat jumped over this other cat!"))
12 |     # prints {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1}
13 | 


--------------------------------------------------------------------------------
/toolz/curried/exceptions.py:
--------------------------------------------------------------------------------
 1 | import toolz
 2 | 
 3 | 
 4 | __all__ = ['merge_with', 'merge']
 5 | 
 6 | 
 7 | @toolz.curry
 8 | def merge_with(func, d, *dicts, **kwargs):
 9 |     return toolz.merge_with(func, d, *dicts, **kwargs)
10 | 
11 | 
12 | @toolz.curry
13 | def merge(d, *dicts, **kwargs):
14 |     return toolz.merge(d, *dicts, **kwargs)
15 | 
16 | 
17 | merge_with.__doc__ = toolz.merge_with.__doc__
18 | merge.__doc__ = toolz.merge.__doc__
19 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 2 | version: 2
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: "3.12"
 7 | 
 8 | sphinx:
 9 |   configuration: doc/source/conf.py
10 |   # Temporarily turning off to get docs build passing
11 |   # fail_on_warning: true
12 | 
13 | python:
14 |   install:
15 |     - requirements: doc/requirements.txt
16 |     - method: pip
17 |       path: .
18 | 


--------------------------------------------------------------------------------
/doc/source/install.rst:
--------------------------------------------------------------------------------
 1 | Installation and Dependencies
 2 | =============================
 3 | 
 4 | Toolz is pure Python and so is easily installable by the standard
 5 | dependency manager ``pip``::
 6 | 
 7 |     pip install toolz
 8 | 
 9 | Toolz endeavors to be a very light dependency.  It accomplishes this in
10 | three ways:
11 | 
12 | 1.  Toolz is pure Python
13 | 2.  Toolz relies only on the standard library
14 | 3.  Toolz simultaneously supports Python versions 3.9+ and PyPy
15 | 


--------------------------------------------------------------------------------
/bench/test_wordcount.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import *
 2 | import os
 3 | 
 4 | if not os.path.exists('bench/shakespeare.txt'):
 5 |     os.system('wget http://www.gutenberg.org/files/100/100-0.txt'
 6 |               ' -O bench/shakespeare.txt')
 7 | 
 8 | 
 9 | def stem(word):
10 |     """ Stem word to primitive form """
11 |     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
12 | 
13 | wordcount = comp(frequencies, map(stem), concat, map(str.split))
14 | 
15 | 
16 | def test_shakespeare():
17 |     with open('bench/shakespeare.txt') as f:
18 |         counts = wordcount(f)
19 | 


--------------------------------------------------------------------------------
/toolz/curried/operator.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | 
 3 | from toolz.functoolz import curry
 4 | 
 5 | 
 6 | # Tests will catch if/when this needs updated
 7 | IGNORE = {
 8 |     "__abs__", "__index__", "__inv__", "__invert__", "__neg__", "__not__",
 9 |     "__pos__", "_abs", "abs", "attrgetter", "index", "inv", "invert",
10 |     "is_none", "is_not_none", "itemgetter", "neg", "not_", "pos", "truth"
11 | }
12 | locals().update(
13 |     {name: f if name in IGNORE else curry(f)
14 |      for name, f in vars(operator).items() if callable(f)}
15 | )
16 | 
17 | # Clean up the namespace.
18 | del IGNORE
19 | del curry
20 | del operator
21 | 


--------------------------------------------------------------------------------
/toolz/__init__.py:
--------------------------------------------------------------------------------
 1 | from .itertoolz import *
 2 | 
 3 | from .functoolz import *
 4 | 
 5 | from .dicttoolz import *
 6 | 
 7 | from .recipes import *
 8 | 
 9 | from functools import partial, reduce
10 | 
11 | sorted = sorted
12 | 
13 | map = map
14 | 
15 | filter = filter
16 | 
17 | # Aliases
18 | comp = compose
19 | 
20 | from . import curried, sandbox
21 | 
22 | functoolz._sigs.create_signature_registry()
23 | 
24 | 
25 | def __getattr__(name):
26 |     if name == "__version__":
27 |         from importlib.metadata import version
28 | 
29 |         rv = version("toolz")
30 |         globals()[name] = rv
31 |         return rv
32 |     raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
33 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit.yml:
--------------------------------------------------------------------------------
 1 | # Alternatively, consider using https://pre-commit.ci/
 2 | name: pre-commit checks
 3 | 
 4 | on:
 5 |   pull_request:
 6 |   push:
 7 |     branches: [master]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   pre-commit:
14 |     name: pre-commit-hooks
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v5
18 |         with:
19 |           fetch-depth: 0
20 |           persist-credentials: false
21 |       - uses: actions/setup-python@v6
22 |         with:
23 |           python-version: "3.13"
24 |       - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd  # v3.0.1
25 |         env:
26 |           SKIP: "no-commit-to-branch"
27 | 


--------------------------------------------------------------------------------
/bench/test_join.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import *
 2 | 
 3 | try:
 4 |     xrange
 5 | except NameError:
 6 |     xrange = range
 7 | 
 8 | def burn(seq):
 9 |     for item in seq:
10 |         pass
11 | 
12 | 
13 | small = [(i, str(i)) for i in range(100)] * 10
14 | big = pipe([110]*10000, map(range), concat, list)
15 | 
16 | 
17 | def test_many_to_many_large():
18 |     burn(join(get(0), small, identity, big))
19 | 
20 | 
21 | def test_one_to_one_tiny():
22 |     A = list(range(20))
23 |     B = A[::2] + A[1::2][::-1]
24 | 
25 |     for i in xrange(50000):
26 |         burn(join(identity, A, identity, B))
27 | 
28 | 
29 | def test_one_to_many():
30 |     A = list(range(20))
31 |     B = pipe([20]*1000, map(range), concat, list)
32 | 
33 |     for i in xrange(100):
34 |         burn(join(identity, A, identity, B))
35 | 


--------------------------------------------------------------------------------
/examples/graph.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import *
 2 | a, b, c, d, e, f, g = 'abcdefg'
 3 | 
 4 | edges = [(a, b), (b, a), (a, c), (a, d), (d, a), (d, e), (e, f), (d, f),
 5 |          (f, d), (d, g), (e, g)]
 6 | 
 7 | 
 8 | out_degrees = countby(first,  edges)
 9 | # {'a': 3, 'b': 1, 'd': 4, 'e': 2, 'f': 1}
10 | 
11 | in_degrees = countby(second, edges)
12 | # {'a': 2, 'b': 1, 'c': 1, 'd': 2, 'e': 1, 'f': 2, 'g': 2}
13 | 
14 | 
15 | out_neighbors = valmap(comp(tuple, map(second)),
16 |                        groupby(first, edges))
17 | # {'a': ('b', 'c', 'd'),
18 | #  'b': ('a',),
19 | #  'd': ('a', 'e', 'f', 'g'),
20 | #  'e': ('f', 'g'),
21 | #  'f': ('d',)}
22 | 
23 | in_neighbors = valmap(comp(tuple, map(first)),
24 |                       groupby(second, edges))
25 | # {'a': ('b', 'd'),
26 | #  'b': ('a',),
27 | #  'c': ('a',),
28 | #  'd': ('a', 'f'),
29 | #  'e': ('d',),
30 | #  'f': ('e', 'd'),
31 | #  'g': ('d', 'e')}
32 | 


--------------------------------------------------------------------------------
/toolz/tests/test_recipes.py:
--------------------------------------------------------------------------------
 1 | from toolz import first, identity, countby, partitionby
 2 | 
 3 | 
 4 | def iseven(x):
 5 |     return x % 2 == 0
 6 | 
 7 | 
 8 | def test_countby():
 9 |     assert countby(iseven, [1, 2, 3]) == {True: 1, False: 2}
10 |     assert countby(len, ['cat', 'dog', 'mouse']) == {3: 2, 5: 1}
11 |     assert countby(0, ('ab', 'ac', 'bc')) == {'a': 2, 'b': 1}
12 | 
13 | 
14 | def test_partitionby():
15 |     assert list(partitionby(identity, [])) == []
16 | 
17 |     vowels = "aeiou"
18 |     assert (list(partitionby(vowels.__contains__, "abcdefghi")) ==
19 |             [("a",), ("b", "c", "d"), ("e",), ("f", "g", "h"), ("i",)])
20 | 
21 |     assert (list(map(first,
22 |                      partitionby(identity,
23 |                                  [1, 1, 1, 2, 3, 3, 2, 2, 3]))) ==
24 |             [1, 2, 3, 2, 3])
25 | 
26 |     assert ''.join(map(first,
27 |                        partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!'
28 | 


--------------------------------------------------------------------------------
/examples/fib.py:
--------------------------------------------------------------------------------
 1 | #          /            0               if i is 0
 2 | # fib(i) = |            1               if i is 1
 3 | #          \ fib(i - 1) + fib(i - 2)    otherwise
 4 | 
 5 | 
 6 | def fib(n):
 7 |     """ Imperative definition of Fibonacci numbers """
 8 |     a, b = 0, 1
 9 |     for i in range(n):
10 |         a, b = b, a + b
11 |     return a
12 | 
13 | 
14 | # This is intuitive but VERY slow
15 | def fib(n):
16 |     """ Functional definition of Fibonacci numbers """
17 |     if n == 0 or n == 1:
18 |         return n
19 |     else:
20 |         return fib(n - 1) + fib(n - 2)
21 | 
22 | from toolz import memoize
23 | 
24 | # Oh wait, it's fast again
25 | fib = memoize(fib)
26 | 
27 | 
28 | # Provide a cache with initial values to `memoize`
29 | @memoize(cache={0: 0, 1: 1})
30 | def fib(n):
31 |     """ Functional definition of Fibonacci numbers with initial terms cached.
32 | 
33 |     fib(0) == 0
34 |     fib(1) == 1
35 |     ...
36 |     fib(n) == fib(n - 1) + fib(n - 2)
37 |     """
38 |     return fib(n - 1) + fib(n - 2)
39 | 


--------------------------------------------------------------------------------
/toolz/sandbox/tests/test_parallel.py:
--------------------------------------------------------------------------------
 1 | from toolz.sandbox.parallel import fold
 2 | from toolz import reduce
 3 | from operator import add
 4 | from pickle import dumps, loads
 5 | from multiprocessing import Pool
 6 | 
 7 | 
 8 | # is comparison will fail between this and no_default
 9 | no_default2 = loads(dumps('__no__default__'))
10 | 
11 | 
12 | def test_fold():
13 |     assert fold(add, range(10), 0) == reduce(add, range(10), 0)
14 | 
15 |     with Pool() as pool:
16 |         assert fold(add, range(10), 0, map=pool.map) == reduce(add, range(10), 0)
17 | 
18 |     assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0)
19 |     assert fold(add, range(10)) == fold(add, range(10), 0)
20 | 
21 |     def setadd(s, item):
22 |         s = s.copy()
23 |         s.add(item)
24 |         return s
25 | 
26 |     assert fold(setadd, [1, 2, 3], set()) == {1, 2, 3}
27 |     assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union)
28 |             == {1, 2, 3})
29 | 
30 |     assert fold(add, range(10), default=no_default2) == fold(add, range(10))
31 | 


--------------------------------------------------------------------------------
/toolz/compatibility.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | warnings.warn("The toolz.compatibility module is no longer "
 3 |               "needed in Python 3 and has been deprecated. Please "
 4 |               "import these utilities directly from the standard library. "
 5 |               "This module will be removed in a future release.",
 6 |               category=DeprecationWarning, stacklevel=2)
 7 | 
 8 | import operator
 9 | import sys
10 | 
11 | PY3 = sys.version_info[0] > 2
12 | PY34 = sys.version_info[0] == 3 and sys.version_info[1] == 4
13 | PYPY = hasattr(sys, 'pypy_version_info') and PY3
14 | 
15 | __all__ = ('map', 'filter', 'range', 'zip', 'reduce', 'zip_longest',
16 |            'iteritems', 'iterkeys', 'itervalues', 'filterfalse',
17 |            'PY3', 'PY34', 'PYPY')
18 | 
19 | 
20 | map = map
21 | filter = filter
22 | range = range
23 | zip = zip
24 | from functools import reduce
25 | from itertools import zip_longest
26 | from itertools import filterfalse
27 | iteritems = operator.methodcaller('items')
28 | iterkeys = operator.methodcaller('keys')
29 | itervalues = operator.methodcaller('values')
30 | from collections.abc import Sequence
31 | 


--------------------------------------------------------------------------------
/doc/source/references.rst:
--------------------------------------------------------------------------------
 1 | References
 2 | ==========
 3 | 
 4 | -  `Underscore.js <http://underscorejs.org>`__: A similar library for
 5 |    JavaScript
 6 | -  `Enumerable <http://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A
 7 |    similar library for Ruby
 8 | -  `Clojure <http://clojure.org>`__: A functional language whose
 9 |    standard library has several counterparts in ``toolz``
10 | -  `itertools <http://docs.python.org/3/library/itertools.html>`__: The
11 |    Python standard library for iterator tools
12 | -  `functools <http://docs.python.org/3/library/functools.html>`__: The
13 |    Python standard library for function tools
14 | -  `Functional Programming HOWTO <http://docs.python.org/dev/howto/functional.html>`__:
15 |    The description of functional programming features from the official
16 |    Python docs.
17 | 
18 | Contemporary Projects
19 | ---------------------
20 | 
21 | These projects also provide iterator and functional utilities within
22 | Python. Their functionality overlaps substantially with that of PyToolz.
23 | 
24 | -  `funcy <https://github.com/suor/funcy/>`__
25 | -  `fn.py <https://github.com/kachayev/fn.py>`__
26 | -  `more\_itertools <https://github.com/erikrose/more-itertools>`__
27 | 


--------------------------------------------------------------------------------
/toolz/recipes.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from .itertoolz import frequencies, pluck, getter
 3 | 
 4 | 
 5 | __all__ = ('countby', 'partitionby')
 6 | 
 7 | 
 8 | def countby(key, seq):
 9 |     """ Count elements of a collection by a key function
10 | 
11 |     >>> countby(len, ['cat', 'mouse', 'dog'])
12 |     {3: 2, 5: 1}
13 | 
14 |     >>> def iseven(x): return x % 2 == 0
15 |     >>> countby(iseven, [1, 2, 3])  # doctest:+SKIP
16 |     {True: 1, False: 2}
17 | 
18 |     See Also:
19 |         groupby
20 |     """
21 |     if not callable(key):
22 |         key = getter(key)
23 |     return frequencies(map(key, seq))
24 | 
25 | 
26 | def partitionby(func, seq):
27 |     """ Partition a sequence according to a function
28 | 
29 |     Partition `s` into a sequence of lists such that, when traversing
30 |     `s`, every time the output of `func` changes a new list is started
31 |     and that and subsequent items are collected into that list.
32 | 
33 |     >>> is_space = lambda c: c == " "
34 |     >>> list(partitionby(is_space, "I have space"))
35 |     [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')]
36 | 
37 |     >>> is_large = lambda x: x > 10
38 |     >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5]))
39 |     [(1, 2, 1), (99, 88, 33, 99), (-1, 5)]
40 | 
41 |     See also:
42 |         partition
43 |         groupby
44 |         itertools.groupby
45 |     """
46 |     return map(tuple, pluck(1, itertools.groupby(seq, key=func)))
47 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Matthew Rocklin
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |   a. Redistributions of source code must retain the above copyright notice,
 9 |      this list of conditions and the following disclaimer.
10 |   b. Redistributions in binary form must reproduce the above copyright
11 |      notice, this list of conditions and the following disclaimer in the
12 |      documentation and/or other materials provided with the distribution.
13 |   c. Neither the name of toolz nor the names of its contributors
14 |      may be used to endorse or promote products derived from this software
15 |      without specific prior written permission.
16 | 
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 | DAMAGE.
29 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | [Matthew Rocklin](http://matthewrocklin.com)    [@mrocklin](http://github.com/mrocklin/)
 2 | 
 3 | [John Jacobsen](http://eigenhombre.com)         [@eigenhombre](http://github.com/eigenhombre/)
 4 | 
 5 | Erik Welch                                      [@eriknw](https://github.com/eriknw/)
 6 | 
 7 | John Crichton                                   [@jcrichton](https://github.com/jcrichton/)
 8 | 
 9 | Han Semaj                                       [@microamp](https://github.com/microamp/)
10 | 
11 | [Graeme Coupar](https://twitter.com/obmarg)     [@obmarg](https://github.com/obmarg/)
12 | 
13 | [Leonid Shvechikov](http://brainstorage.me/shvechikov)  [@shvechikov](https://github.com/shvechikov)
14 | 
15 | Lars Buitinck                                   [@larsmans](http://github.com/larsmans)
16 | 
17 | José Ricardo                                    [@josericardo](https://github.com/josericardo)
18 | 
19 | Tom Prince                                      [@tomprince](https://github.com/tomprince)
20 | 
21 | Bart van Merriënboer                            [@bartvm](https://github.com/bartvm)
22 | 
23 | Nikolaos-Digenis Karagiannis                    [@digenis](https://github.com/digenis/)
24 | 
25 | [Antonio Lima](https://twitter.com/themiurgo)   [@themiurgo](https://github.com/themiurgo/)
26 | 
27 | Joe Jevnik                                      [@llllllllll](https://github.com/llllllllll)
28 | 
29 | Rory Kirchner                                      [@roryk](https://github.com/roryk)
30 | 
31 | [Steven Cutting](http://steven-cutting.github.io) [@steven_cutting](https://github.com/steven-cutting)
32 | 
33 | Aric Coady                                      [@coady](https://github.com/coady)
34 | 


--------------------------------------------------------------------------------
/release-notes:
--------------------------------------------------------------------------------
 1 | New in 0.4.2
 2 | 
 3 | Removed intersection
 4 | 
 5 | 
 6 | New in 0.5.3
 7 | 
 8 | *   get_in function
 9 | *   add itervalues, iterkeys, iteritems to compatibility
10 | *   Add do function, remove side_effects from sandbox
11 | *   Add juxt, partner to map
12 | *   Performance improvements to merge_with
13 | *   Errors from curried functions propagate upwards
14 | *   keyfilter, valfilter
15 | *   do
16 | 
17 | New Authors:
18 | 
19 | Graeme Coupar, @obmarg
20 | 
21 | 
22 | New in 0.6.0
23 | 
24 | *   memoize is curried by default
25 | *   memoize support `key` keyword argument
26 | *   Cleaned up issues in curried namespace
27 | *   Unary functions memoize with just the single argument, not a tuple
28 | *   Flattened directory structure
29 | *   Add `pluck` function from underscore.js
30 | *   Remove `sandbox.jackknife`
31 | 
32 | 
33 | New in 0.6.1
34 | 
35 | 
36 | *   Python 3.4 support
37 | *   New `join` operation
38 | *   `join`, `groupby`, ... accept non-callable key functions.
39 | *   Many speed improvements:
40 |     *   Cache method lookup
41 |     *   Faster `merge_sorted` without key
42 |     *   An additional round of tuning on `groupby`
43 | *   Toolz builds on binstar build under mrocklin channel
44 | *   Avoid generators, favor map.  Assists in debugging.
45 | *   Cleaner `curry` implementation
46 | *   Fix serialization issues for `juxt`, `complement`
47 | *   `reduceby` no longer requires `default` keyword argument
48 | *   Fix bug in `get` where `get([1], coll)` used to return element rather than
49 |     length-one tuple
50 | *   `EqualityHashKey` added to sandbox
51 | *   `juxt` returns a tuple, not a generator
52 | 
53 | 
54 | New Authors:
55 | 
56 | Leonid Shvechikov,  José Ricardo, Lars Buitinck, Tom Prince
57 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [master]
 6 |   pull_request:
 7 | 
 8 | permissions:
 9 |   contents: read
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         os: ["ubuntu-latest"]
18 |         python-version:
19 |           - "3.9"
20 |           - "3.10"
21 |           - "3.11"
22 |           - "3.12"
23 |           - "3.13"
24 |           - "3.13t"
25 |           - "3.14"
26 |           - "3.14t"
27 |           - "pypy-3.9"
28 |           - "pypy-3.10"
29 |           - "pypy-3.11"
30 |     steps:
31 |       - name: Checkout
32 |         uses: actions/checkout@v5
33 |         with:
34 |           fetch-depth: 0
35 |           persist-credentials: false
36 |       - name: Set up Python
37 |         uses: actions/setup-python@v6
38 |         with:
39 |           python-version: ${{ matrix.python-version }}
40 |       - name: Install dependencies
41 |         run: |
42 |           python -m pip install --upgrade pip setuptools wheel
43 |           pip install coverage pycodestyle pytest
44 |           pip install -e .
45 |       - name: Pytest
46 |         run: |
47 |           coverage run -m pytest --doctest-modules toolz/
48 |           pytest bench/
49 |           pycodestyle --ignore="E731,W503,W504,E402" --exclude=conf.py,tests,examples,bench -r --show-source .
50 |       - name: Coverage
51 |         if: (! contains(matrix.python-version, 'pypy'))
52 |         run: |
53 |           coverage xml
54 |       - name: codecov
55 |         if: (! contains(matrix.python-version, 'pypy'))
56 |         uses: codecov/codecov-action@5a1091511ad55cbe89839c7260b706298ca349f7  # v5.5.1
57 |         with:
58 |           token: ${{ secrets.CODECOV_TOKEN }}
59 | 


--------------------------------------------------------------------------------
/toolz/tests/test_tlz.py:
--------------------------------------------------------------------------------
 1 | import toolz
 2 | 
 3 | 
 4 | def test_tlz():
 5 |     import tlz
 6 |     tlz.curry
 7 |     tlz.functoolz.curry
 8 |     assert tlz.__package__ == 'tlz'
 9 |     assert tlz.__name__ == 'tlz'
10 |     import tlz.curried
11 |     assert tlz.curried.__package__ == 'tlz.curried'
12 |     assert tlz.curried.__name__ == 'tlz.curried'
13 |     tlz.curried.curry
14 |     import tlz.curried.operator
15 |     assert tlz.curried.operator.__package__ in (None, 'tlz.curried')
16 |     assert tlz.curried.operator.__name__ == 'tlz.curried.operator'
17 |     assert tlz.functoolz.__name__ == 'tlz.functoolz'
18 |     m1 = tlz.functoolz
19 |     import tlz.functoolz as m2
20 |     assert m1 is m2
21 |     import tlz.sandbox
22 |     try:
23 |         import tlzthisisabadname.curried
24 |         1/0
25 |     except ImportError:
26 |         pass
27 |     try:
28 |         import tlz.curry
29 |         1/0
30 |     except ImportError:
31 |         pass
32 |     try:
33 |         import tlz.badsubmodulename
34 |         1/0
35 |     except ImportError:
36 |         pass
37 | 
38 |     assert toolz.__package__ == 'toolz'
39 |     assert toolz.curried.__package__ == 'toolz.curried'
40 |     assert toolz.functoolz.__name__ == 'toolz.functoolz'
41 |     try:
42 |         import cytoolz
43 |         assert cytoolz.__package__ == 'cytoolz'
44 |         assert cytoolz.curried.__package__ == 'cytoolz.curried'
45 |         assert cytoolz.functoolz.__name__ == 'cytoolz.functoolz'
46 |     except ImportError:
47 |         pass
48 | 
49 |     if hasattr(tlz, '__file__'):
50 |         assert tlz.__file__ == toolz.__file__
51 |     if hasattr(tlz.functoolz, '__file__'):
52 |         assert tlz.functoolz.__file__ == toolz.functoolz.__file__
53 | 
54 |     assert tlz.pipe is toolz.pipe
55 | 
56 |     assert 'tlz' in tlz.__doc__
57 |     assert tlz.curried.__doc__ is not None
58 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | PyToolz API Documentation
 3 | =========================
 4 | 
 5 | Toolz provides a set of utility functions for iterators, functions,
 6 | and dictionaries.  These functions interoperate well and form
 7 | the building blocks of common data analytic operations.  They extend the
 8 | standard libraries `itertools` and `functools` and borrow heavily from the
 9 | standard libraries of contemporary functional languages.
10 | 
11 | Toolz provides a suite of functions which have the following functional virtues:
12 | 
13 | -   **Composable:** They interoperate due to their use of core data structures.
14 | -   **Pure:**  They don't change their inputs or rely on external state.
15 | -   **Lazy:**  They don't run until absolutely necessary, allowing them to support large streaming data sets.
16 | 
17 | Toolz functions are *pragmatic*.  They understand that most programmers
18 | have deadlines.
19 | 
20 | -   **Low Tech:** They're just functions, no syntax or magic tricks to learn
21 | -   **Tuned:** They're profiled and optimized
22 | -   **Serializable:** They support common solutions for parallel computing
23 | 
24 | This gives developers the power to write *powerful* programs to solve *complex
25 | problems* with relatively *simple code*.  This code can be *easy to understand*
26 | without sacrificing *performance*.  Toolz enables this approach, commonly
27 | associated with functional programming, within a natural Pythonic style
28 | suitable for most developers.
29 | 
30 | BSD licensed source code is available at http://github.com/pytoolz/toolz/ .
31 | 
32 | 
33 | Contents
34 | ^^^^^^^^
35 | 
36 | .. toctree::
37 |    :maxdepth: 2
38 | 
39 |    heritage.rst
40 |    install.rst
41 |    composition.rst
42 |    purity.rst
43 |    laziness.rst
44 |    control.rst
45 |    curry.rst
46 |    streaming-analytics.rst
47 |    parallelism.rst
48 |    api.rst
49 |    tips-and-tricks.rst
50 |    references.rst
51 | 


--------------------------------------------------------------------------------
/doc/source/api.rst:
--------------------------------------------------------------------------------
  1 | API
  2 | ===
  3 | 
  4 | This page contains a comprehensive list of all functions within ``toolz``.
  5 | Docstrings should provide sufficient understanding for any individual function.
  6 | 
  7 | Itertoolz
  8 | ---------
  9 | 
 10 | .. currentmodule:: toolz.itertoolz
 11 | 
 12 | .. autosummary::
 13 |    accumulate
 14 |    concat
 15 |    concatv
 16 |    cons
 17 |    count
 18 |    diff
 19 |    drop
 20 |    first
 21 |    frequencies
 22 |    get
 23 |    groupby
 24 |    interleave
 25 |    interpose
 26 |    isdistinct
 27 |    isiterable
 28 |    iterate
 29 |    join
 30 |    last
 31 |    mapcat
 32 |    merge_sorted
 33 |    nth
 34 |    partition
 35 |    partition_all
 36 |    peek
 37 |    peekn
 38 |    pluck
 39 |    random_sample
 40 |    reduceby
 41 |    remove
 42 |    second
 43 |    sliding_window
 44 |    tail
 45 |    take
 46 |    take_nth
 47 |    topk
 48 |    unique
 49 | 
 50 | Functoolz
 51 | ---------
 52 | 
 53 | .. currentmodule:: toolz.functoolz
 54 | 
 55 | .. autosummary::
 56 |    apply
 57 |    complement
 58 |    compose
 59 |    compose_left
 60 |    curry
 61 |    do
 62 |    excepts
 63 |    flip
 64 |    identity
 65 |    juxt
 66 |    memoize
 67 |    pipe
 68 |    thread_first
 69 |    thread_last
 70 | 
 71 | Dicttoolz
 72 | ---------
 73 | 
 74 | .. currentmodule:: toolz.dicttoolz
 75 | 
 76 | .. autosummary::
 77 |    assoc
 78 |    assoc_in
 79 |    dissoc
 80 |    get_in
 81 |    itemfilter
 82 |    itemmap
 83 |    keyfilter
 84 |    keymap
 85 |    merge
 86 |    merge_with
 87 |    update_in
 88 |    valfilter
 89 |    valmap
 90 | 
 91 | Recipes
 92 | ---------
 93 | 
 94 | .. currentmodule:: toolz.recipes
 95 | 
 96 | .. autosummary::
 97 |    countby
 98 |    partitionby
 99 | 
100 | Sandbox
101 | -------
102 | 
103 | .. currentmodule:: toolz.sandbox
104 | 
105 | .. autosummary::
106 |    parallel.fold
107 |    core.EqualityHashKey
108 |    core.unzip
109 | 
110 | 
111 | Definitions
112 | -----------
113 | 
114 | .. automodule:: toolz.itertoolz
115 |    :members:
116 | 
117 | .. automodule:: toolz.recipes
118 |    :members:
119 | 
120 | .. automodule:: toolz.functoolz
121 |    :members:
122 | 
123 | .. automodule:: toolz.dicttoolz
124 |    :members:
125 | 
126 | .. automodule:: toolz.sandbox.core
127 |    :members:
128 | 
129 | .. automodule:: toolz.sandbox.parallel
130 |    :members:
131 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | build-backend = "setuptools.build_meta"
 3 | requires = [
 4 |   "setuptools >=77",
 5 |   "setuptools-git-versioning >=2.0",
 6 | ]
 7 | 
 8 | [project]
 9 | name = "toolz"
10 | dynamic = ["version"]
11 | description = "List processing tools and functional utilities"
12 | readme = "README.rst"
13 | requires-python = ">=3.9"
14 | license = "BSD-3-Clause"
15 | license-files = ["LICENSE.txt"]
16 | authors = [
17 |   { name = "PyToolz Contributors" },
18 | ]
19 | maintainers = [
20 |   { name = "Erik Welch", email = "erik.n.welch@gmail.com" },
21 | ]
22 | keywords = [
23 |   "functional",
24 |   "utility",
25 |   "itertools",
26 |   "functools",
27 | ]
28 | classifiers = [
29 |   "Development Status :: 5 - Production/Stable",
30 |   "Programming Language :: Python",
31 |   "Programming Language :: Python :: 3",
32 |   "Programming Language :: Python :: 3.9",
33 |   "Programming Language :: Python :: 3.10",
34 |   "Programming Language :: Python :: 3.11",
35 |   "Programming Language :: Python :: 3.12",
36 |   "Programming Language :: Python :: 3.13",
37 |   "Programming Language :: Python :: 3.14",
38 |   "Programming Language :: Python :: Implementation :: CPython",
39 |   "Programming Language :: Python :: Implementation :: PyPy",
40 | ]
41 | 
42 | [project.urls]
43 | homepage = "https://github.com/pytoolz/toolz"
44 | repository = "https://github.com/pytoolz/toolz"
45 | documentation = "https://toolz.readthedocs.io/en/latest/"
46 | changelog = "https://github.com/pytoolz/toolz/releases"
47 | 
48 | [tool.setuptools-git-versioning]
49 | enabled = true
50 | dev_template = "{tag}+{ccount}.g{sha}"
51 | dirty_template = "{tag}+{ccount}.g{sha}.dirty"
52 | 
53 | [tool.setuptools]
54 | packages = [
55 |   "toolz",
56 |   "toolz.curried",
57 |   "toolz.sandbox",
58 |   "toolz.sandbox.tests",
59 |   "toolz.tests",
60 |   "tlz",
61 | ]
62 | 
63 | [tool.coverage.run]
64 | source = ["toolz"]
65 | omit = [
66 |   "toolz/tests/test*",
67 |   "toolz/*/tests/test*",
68 |   "toolz/compatibility.py",
69 | ]
70 | 
71 | [tool.pytest.ini_options]
72 | minversion = "6.0"
73 | testpaths = ["toolz"]
74 | xfail_strict = true
75 | addopts = [
76 |   "--strict-config",  # Force error if config is mispelled
77 |   "--strict-markers", # Force error if marker is mispelled (must be defined in config)
78 |   "-ra",              # Print summary of all fails/errors
79 | ]
80 | log_cli_level = "info"
81 | filterwarnings = [
82 |   "error",
83 |   "ignore:The toolz.compatibility module is no longer needed:DeprecationWarning:",
84 | ]
85 | 
86 | [tool.coverage.report]
87 | exclude_lines = [
88 |     "pragma: no cover",
89 | ]
90 | 
91 | [tool.codespell]
92 | ignore-words-list = "juxt,lamda"
93 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ci:
 2 |   autofix_prs: false
 3 |   skip: [no-commit-to-branch]
 4 | fail_fast: false
 5 | default_language_version:
 6 |   python: python3
 7 | repos:
 8 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 9 |     rev: v6.0.0
10 |     hooks:
11 |       # Sanity checks
12 |       - id: check-added-large-files
13 |       - id: check-case-conflict
14 |       - id: check-illegal-windows-names
15 |       - id: check-merge-conflict
16 |       # Checks based on file type
17 |       - id: check-ast
18 |       - id: check-toml
19 |       - id: check-yaml
20 |       # Detect mistakes
21 |       - id: check-vcs-permalinks
22 |       - id: debug-statements
23 |       - id: destroyed-symlinks
24 |       - id: detect-private-key
25 |       - id: forbid-submodules
26 |       # Automatic fixes
27 |       - id: end-of-file-fixer
28 |       - id: mixed-line-ending
29 |         args: [--fix=lf]
30 |       - id: trailing-whitespace
31 |       - id: name-tests-test
32 |         args: ["--pytest-test-first"]
33 |   - repo: https://github.com/abravalheri/validate-pyproject
34 |     rev: v0.24.1
35 |     hooks:
36 |       - id: validate-pyproject
37 |         name: Validate pyproject.toml
38 |   - repo: https://github.com/asottile/pyupgrade
39 |     rev: v3.21.0
40 |     hooks:
41 |       - id: pyupgrade
42 |         args: [--py39-plus]
43 |   - repo: https://github.com/codespell-project/codespell
44 |     rev: v2.4.1
45 |     hooks:
46 |       - id: codespell
47 |         types_or: [python, markdown, rst, toml, yaml]
48 |         additional_dependencies:
49 |           - tomli; python_version<'3.11'
50 |         files: ^(toolz|tlz|docs)/
51 |   - repo: https://github.com/rhysd/actionlint
52 |     rev: v1.7.8
53 |     hooks:
54 |       - id: actionlint
55 |   - repo: https://github.com/adrienverge/yamllint
56 |     rev: v1.37.1
57 |     hooks:
58 |       - id: yamllint
59 |         args: [-d, "{extends: default, rules: {line-length: disable}}"]
60 |   - repo: https://github.com/woodruffw/zizmor-pre-commit
61 |     rev: v1.15.2
62 |     hooks:
63 |       - id: zizmor
64 |   - repo: https://github.com/pre-commit/pygrep-hooks
65 |     rev: v1.10.0
66 |     hooks:
67 |       - id: rst-directive-colons
68 |       - id: rst-inline-touching-normal
69 |       - id: python-check-blanket-noqa
70 |       - id: python-check-blanket-type-ignore
71 |       - id: python-no-eval
72 |       - id: python-no-log-warn
73 |       - id: text-unicode-replacement-char
74 |   - repo: https://github.com/python-jsonschema/check-jsonschema
75 |     rev: 0.34.1
76 |     hooks:
77 |       - id: check-dependabot
78 |       - id: check-github-workflows
79 |       - id: check-readthedocs
80 |   - repo: meta
81 |     hooks:
82 |       - id: check-hooks-apply
83 |       - id: check-useless-excludes
84 |   - repo: https://github.com/pre-commit/pre-commit-hooks
85 |     rev: v6.0.0
86 |     hooks:
87 |       - id: no-commit-to-branch
88 |         args: [--branch, master]
89 | 


--------------------------------------------------------------------------------
/doc/source/heritage.rst:
--------------------------------------------------------------------------------
 1 | Heritage
 2 | ========
 3 | 
 4 | While Python was originally intended as an imperative language
 5 | [`Guido`_], it contains all elements necessary to support a rich set of features
 6 | from the functional paradigm.  In particular its core data structures, lazy
 7 | iterators, and functions as first class objects can be combined to implement a
 8 | common standard library of functions shared among many functional languages.
 9 | 
10 | This was first recognized and supported through the standard libraries
11 | itertools_ and `functools`_ which contain functions like ``permutations``,
12 | ``chain`` and ``partial`` to complement the standard ``map``, ``filter``,
13 | ``reduce`` already found in the core language.  While these libraries contain
14 | substantial functionality they do not achieve the same level of adoption found
15 | in similar projects in other languages.  This may be because they are
16 | incomplete and lack a number of commonly related functions like ``compose`` and
17 | ``groupby`` which often complement these core operations.
18 | 
19 | A completion of this set of functions was first attempted in the projects
20 | `itertoolz`_ and `functoolz`_ (note the z).  These libraries contained
21 | several functions that were absent in the standard itertools_ / `functools`_
22 | libraries.  The ``itertoolz``/``functoolz`` libraries were eventually merged
23 | into the monolithic ``toolz`` project described here.
24 | 
25 | Most contemporary functional languages (Haskell, Scala, Clojure, ...) contain
26 | some variation of the functions found in ``toolz``.  The ``toolz`` project
27 | generally adheres closely to the API found in the Clojure standard library (see
28 | `cheatsheet`_) and where disagreements occur that API usually dominates.  The
29 | ``toolz`` API is also strongly affected by the principles of the Python
30 | language itself, and often makes deviations in order to be more approachable to
31 | that community.
32 | 
33 | The development of a functional standard library within a popular imperative
34 | language is not unique.  Similar projects have arisen in other
35 | imperative-by-design languages that contain the necessary elements to support a
36 | functional standard library.  `Underscore.js <https://underscorejs.org>`_ in JavaScript has attained
37 | notable popularity in the web community.  ``LINQ`` in C# follows a similar
38 | philosophy but mimics declarative database languages rather than functional
39 | ones. `Enumerable <https://ruby-doc.org/core-2.0.0/Enumerable.html>`_ is is the closest project in Ruby.  Other excellent projects
40 | also exist within the Python ecosystem, most notably `Fn.py <https://github.com/kachayev/fn.py>`_ and `Funcy <https://github.com/suor/funcy/>`_.
41 | 
42 | .. _itertools: https://docs.python.org/library/itertools.html
43 | .. _functools: https://docs.python.org/library/functools.html
44 | .. _itertoolz: https://github.com/mrocklin/itertoolz
45 | .. _functoolz: https://github.com/mrocklin/functoolz
46 | .. _cheatsheet: https://clojure.org/cheatsheet
47 | .. _Guido: https://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html
48 | 


--------------------------------------------------------------------------------
/toolz/tests/test_signatures.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import toolz._signatures as _sigs
 3 | from toolz._signatures import builtins, _is_valid_args, _is_partial_args
 4 | 
 5 | 
 6 | def test_is_valid(check_valid=_is_valid_args, incomplete=False):
 7 |     orig_check_valid = check_valid
 8 |     check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs)
 9 | 
10 |     assert check_valid(lambda x: None) is None
11 | 
12 |     f = builtins.abs
13 |     assert check_valid(f) is incomplete
14 |     assert check_valid(f, 1)
15 |     assert check_valid(f, x=1) is False
16 |     assert check_valid(f, 1, 2) is False
17 | 
18 |     f = builtins.complex
19 |     assert check_valid(f)
20 |     assert check_valid(f, 1)
21 |     assert check_valid(f, real=1)
22 |     assert check_valid(f, 1, 2)
23 |     assert check_valid(f, 1, imag=2)
24 |     assert check_valid(f, 1, real=2) is False
25 |     assert check_valid(f, 1, 2, 3) is False
26 |     assert check_valid(f, 1, 2, imag=3) is False
27 | 
28 |     f = builtins.int
29 |     assert check_valid(f)
30 |     assert check_valid(f, 1)
31 |     assert check_valid(f, x=1)
32 |     assert check_valid(f, 1, 2)
33 |     assert check_valid(f, 1, base=2)
34 |     assert check_valid(f, x=1, base=2)
35 |     assert check_valid(f, base=2) is incomplete
36 |     assert check_valid(f, 1, 2, 3) is False
37 | 
38 |     f = builtins.map
39 |     assert check_valid(f) is incomplete
40 |     assert check_valid(f, 1) is incomplete
41 |     assert check_valid(f, 1, 2)
42 |     assert check_valid(f, 1, 2, 3)
43 |     assert check_valid(f, 1, 2, 3, 4)
44 | 
45 |     f = builtins.min
46 |     assert check_valid(f) is incomplete
47 |     assert check_valid(f, 1)
48 |     assert check_valid(f, iterable=1) is False
49 |     assert check_valid(f, 1, 2)
50 |     assert check_valid(f, 1, 2, 3)
51 |     assert check_valid(f, key=None) is incomplete
52 |     assert check_valid(f, 1, key=None)
53 |     assert check_valid(f, 1, 2, key=None)
54 |     assert check_valid(f, 1, 2, 3, key=None)
55 |     assert check_valid(f, key=None, default=None) is incomplete
56 |     assert check_valid(f, 1, key=None, default=None)
57 |     assert check_valid(f, 1, 2, key=None, default=None) is False
58 |     assert check_valid(f, 1, 2, 3, key=None, default=None) is False
59 | 
60 |     f = builtins.range
61 |     assert check_valid(f) is incomplete
62 |     assert check_valid(f, 1)
63 |     assert check_valid(f, 1, 2)
64 |     assert check_valid(f, 1, 2, 3)
65 |     assert check_valid(f, 1, 2, step=3) is False
66 |     assert check_valid(f, 1, 2, 3, 4) is False
67 | 
68 |     f = functools.partial
69 |     assert orig_check_valid(f, (), {}) is incomplete
70 |     assert orig_check_valid(f, (), {'func': 1}) is incomplete
71 |     assert orig_check_valid(f, (1,), {})
72 |     assert orig_check_valid(f, (1,), {'func': 1})
73 |     assert orig_check_valid(f, (1, 2), {})
74 | 
75 | 
76 | def test_is_partial():
77 |     test_is_valid(check_valid=_is_partial_args, incomplete=True)
78 | 
79 | 
80 | def test_for_coverage():  # :)
81 |     assert _sigs._is_arity(1, 1) is None
82 |     assert _sigs._is_arity(1, all)
83 |     assert _sigs._has_varargs(None) is None
84 |     assert _sigs._has_keywords(None) is None
85 |     assert _sigs._num_required_args(None) is None
86 | 


--------------------------------------------------------------------------------
/toolz/curried/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Alternate namespace for toolz such that all functions are curried
  3 | 
  4 | Currying provides implicit partial evaluation of all functions
  5 | 
  6 | Example:
  7 | 
  8 |     Get usually requires two arguments, an index and a collection
  9 |     >>> from toolz.curried import get
 10 |     >>> get(0, ('a', 'b'))
 11 |     'a'
 12 | 
 13 |     When we use it in higher order functions we often want to pass a partially
 14 |     evaluated form
 15 |     >>> data = [(1, 2), (11, 22), (111, 222)]
 16 |     >>> list(map(lambda seq: get(0, seq), data))
 17 |     [1, 11, 111]
 18 | 
 19 |     The curried version allows simple expression of partial evaluation
 20 |     >>> list(map(get(0), data))
 21 |     [1, 11, 111]
 22 | 
 23 | See Also:
 24 |     toolz.functoolz.curry
 25 | """
 26 | import toolz
 27 | from . import operator
 28 | from toolz import (
 29 |     apply,
 30 |     comp,
 31 |     complement,
 32 |     compose,
 33 |     compose_left,
 34 |     concat,
 35 |     concatv,
 36 |     count,
 37 |     curry,
 38 |     diff,
 39 |     first,
 40 |     flip,
 41 |     frequencies,
 42 |     identity,
 43 |     interleave,
 44 |     isdistinct,
 45 |     isiterable,
 46 |     juxt,
 47 |     last,
 48 |     memoize,
 49 |     merge_sorted,
 50 |     peek,
 51 |     pipe,
 52 |     second,
 53 |     thread_first,
 54 |     thread_last,
 55 | )
 56 | from .exceptions import merge, merge_with
 57 | 
 58 | accumulate = toolz.curry(toolz.accumulate)
 59 | assoc = toolz.curry(toolz.assoc)
 60 | assoc_in = toolz.curry(toolz.assoc_in)
 61 | cons = toolz.curry(toolz.cons)
 62 | countby = toolz.curry(toolz.countby)
 63 | dissoc = toolz.curry(toolz.dissoc)
 64 | do = toolz.curry(toolz.do)
 65 | drop = toolz.curry(toolz.drop)
 66 | excepts = toolz.curry(toolz.excepts)
 67 | filter = toolz.curry(toolz.filter)
 68 | get = toolz.curry(toolz.get)
 69 | get_in = toolz.curry(toolz.get_in)
 70 | groupby = toolz.curry(toolz.groupby)
 71 | interpose = toolz.curry(toolz.interpose)
 72 | itemfilter = toolz.curry(toolz.itemfilter)
 73 | itemmap = toolz.curry(toolz.itemmap)
 74 | iterate = toolz.curry(toolz.iterate)
 75 | join = toolz.curry(toolz.join)
 76 | keyfilter = toolz.curry(toolz.keyfilter)
 77 | keymap = toolz.curry(toolz.keymap)
 78 | map = toolz.curry(toolz.map)
 79 | mapcat = toolz.curry(toolz.mapcat)
 80 | nth = toolz.curry(toolz.nth)
 81 | partial = toolz.curry(toolz.partial)
 82 | partition = toolz.curry(toolz.partition)
 83 | partition_all = toolz.curry(toolz.partition_all)
 84 | partitionby = toolz.curry(toolz.partitionby)
 85 | peekn = toolz.curry(toolz.peekn)
 86 | pluck = toolz.curry(toolz.pluck)
 87 | random_sample = toolz.curry(toolz.random_sample)
 88 | reduce = toolz.curry(toolz.reduce)
 89 | reduceby = toolz.curry(toolz.reduceby)
 90 | remove = toolz.curry(toolz.remove)
 91 | sliding_window = toolz.curry(toolz.sliding_window)
 92 | sorted = toolz.curry(toolz.sorted)
 93 | tail = toolz.curry(toolz.tail)
 94 | take = toolz.curry(toolz.take)
 95 | take_nth = toolz.curry(toolz.take_nth)
 96 | topk = toolz.curry(toolz.topk)
 97 | unique = toolz.curry(toolz.unique)
 98 | update_in = toolz.curry(toolz.update_in)
 99 | valfilter = toolz.curry(toolz.valfilter)
100 | valmap = toolz.curry(toolz.valmap)
101 | 
102 | del exceptions
103 | del toolz
104 | 


--------------------------------------------------------------------------------
/doc/source/purity.rst:
--------------------------------------------------------------------------------
 1 | Function Purity
 2 | ===============
 3 | 
 4 | We call a function *pure* if it meets the following criteria
 5 | 
 6 | 1.  It does not depend on hidden state, or equivalently it only depends on its
 7 |     inputs.
 8 | 2.  Evaluation of the function does not cause side effects
 9 | 
10 | In short the internal work of a pure function is isolated from the rest of the
11 | program.
12 | 
13 | Examples
14 | --------
15 | 
16 | This is made clear by two examples:
17 | 
18 | .. code::
19 | 
20 |     # A pure function
21 |     def min(x, y):
22 |         if x < y:
23 |             return x
24 |         else:
25 |             return y
26 | 
27 | 
28 |     # An impure function
29 |     exponent = 2
30 | 
31 |     def powers(L):
32 |         for i in range(len(L)):
33 |             L[i] = L[i]**exponent
34 |         return L
35 | 
36 | The function ``min`` is pure.  It always produces the same result given the
37 | same inputs and it doesn't affect any external variable.
38 | 
39 | The function ``powers`` is impure for two reasons.  First, it depends on a
40 | global variable, ``exponent``, which can change [*]_.  Second, it changes the
41 | input ``L`` which may have external state.  Consider the following execution:
42 | 
43 | .. code::
44 | 
45 |     >>> data = [1, 2, 3]
46 |     >>> result = powers(data)
47 | 
48 |     >>> print(result)
49 |     [1, 4, 9]
50 |     >>> print(data)
51 |     [1, 4, 9]
52 | 
53 | We see that ``powers`` affected the variable ``data``.  Users of our function
54 | might be surprised by this.  Usually we expect our inputs to be unchanged.
55 | 
56 | Another problem occurs when we run this code in a different context:
57 | 
58 | .. code::
59 | 
60 |     >>> data = [1, 2, 3]
61 |     >>> result = powers(data)
62 |     >>> print(result)
63 |     [1, 8, 27]
64 | 
65 | When we give ``powers`` the same inputs we receive different outputs; how could
66 | this be?  Someone must have changed the value of ``exponent`` to be ``3``,
67 | producing cubes rather than squares.  At first this flexibility may seem like a
68 | feature and indeed in many cases it may be.  The cost for this flexibility is
69 | that we need to keep track of the ``exponent`` variable separately whenever we
70 | use ``powers``.  As we use more functions these extra variables become a
71 | burden.
72 | 
73 | .. [*] A function depending on a global value can be pure if the value never
74 |        changes, i.e. is immutable.
75 | 
76 | State
77 | -----
78 | 
79 | Impure functions are often more efficient but also require that the programmer
80 | "keep track" of the state of several variables.  Keeping track of this state
81 | becomes increasingly difficult as programs grow in size.  By eschewing state
82 | programmers are able to conceptually scale out to solve much larger problems.
83 | The loss of performance is often negligible compared to the freedom to trust
84 | that your functions work as expected on your inputs.
85 | 
86 | Maintaining state provides efficiency at the cost of surprises.  Pure
87 | functions produce no surprises and so lighten the mental load of the
88 | programmer.
89 | 
90 | 
91 | Testing
92 | -------
93 | 
94 | As an added bonus, testing pure functions is substantially simpler than testing
95 | impure ones.  A programmer who has tried to test functions that include
96 | randomness will know this first-hand.
97 | 


--------------------------------------------------------------------------------
/toolz/sandbox/parallel.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | from toolz.itertoolz import partition_all
 3 | from toolz.utils import no_default
 4 | 
 5 | 
 6 | def _reduce(func, seq, initial=None):
 7 |     if initial is None:
 8 |         return functools.reduce(func, seq)
 9 |     else:
10 |         return functools.reduce(func, seq, initial)
11 | 
12 | 
13 | def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
14 |     """
15 |     Reduce without guarantee of ordered reduction.
16 | 
17 |     Parameters
18 |     ----------
19 |     binops
20 |         Associative operator. The associative property allows us to
21 |         leverage a parallel map to perform reductions in parallel.
22 | 
23 | 
24 |     inputs:
25 | 
26 |     ``binop``     - associative operator. The associative property allows us to
27 |                     leverage a parallel map to perform reductions in parallel.
28 | 
29 |     ``seq``       - a sequence to be aggregated
30 |     ``default``   - an identity element like 0 for ``add`` or 1 for mul
31 | 
32 |     ``map``       - an implementation of ``map``. This may be parallel and
33 |                     determines how work is distributed.
34 |     ``chunksize`` - Number of elements of ``seq`` that should be handled
35 |                     within a single function call
36 |     ``combine``   - Binary operator to combine two intermediate results.
37 |                     If ``binop`` is of type (total, item) -> total
38 |                     then ``combine`` is of type (total, total) -> total
39 |                     Defaults to ``binop`` for common case of operators like add
40 | 
41 |     Fold chunks up the collection into blocks of size ``chunksize`` and then
42 |     feeds each of these to calls to ``reduce``. This work is distributed
43 |     with a call to ``map``, gathered back and then refolded to finish the
44 |     computation. In this way ``fold`` specifies only how to chunk up data but
45 |     leaves the distribution of this work to an externally provided ``map``
46 |     function. This function can be sequential or rely on multithreading,
47 |     multiprocessing, or even distributed solutions.
48 | 
49 |     If ``map`` intends to serialize functions it should be prepared to accept
50 |     and serialize lambdas. Note that the standard ``pickle`` module fails
51 |     here.
52 | 
53 |     Example
54 |     -------
55 | 
56 |     >>> # Provide a parallel map to accomplish a parallel sum
57 |     >>> from operator import add
58 |     >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
59 |     10
60 |     """
61 |     assert chunksize > 1
62 | 
63 |     if combine is None:
64 |         combine = binop
65 | 
66 |     chunks = partition_all(chunksize, seq)
67 | 
68 |     # Evaluate sequence in chunks via map
69 |     if default == no_default:
70 |         results = map(
71 |             functools.partial(_reduce, binop),
72 |             chunks)
73 |     else:
74 |         results = map(
75 |             functools.partial(_reduce, binop, initial=default),
76 |             chunks)
77 | 
78 |     results = list(results)  # TODO: Support complete laziness
79 | 
80 |     if len(results) == 1:    # Return completed result
81 |         return results[0]
82 |     else:                    # Recurse to reaggregate intermediate results
83 |         return fold(combine, results, map=map, chunksize=chunksize)
84 | 


--------------------------------------------------------------------------------
/doc/source/tips-and-tricks.rst:
--------------------------------------------------------------------------------
  1 | Tips and Tricks
  2 | ===============
  3 | 
  4 | Toolz functions can be combined to make functions that, while common, aren't
  5 | a part of toolz's standard offerings. This section presents
  6 | a few of these recipes.
  7 | 
  8 | 
  9 | * .. function:: pick(allowlist, dictionary)
 10 | 
 11 |   Return a subset of the provided dictionary with keys contained in the
 12 |   allowlist.
 13 | 
 14 |   ::
 15 | 
 16 |     from toolz import keyfilter
 17 | 
 18 |     def pick(allowlist, d):
 19 |         return keyfilter(lambda k: k in allowlist, d)
 20 | 
 21 | 
 22 |   Example:
 23 | 
 24 |     >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
 25 |     >>> pick(['a', 'b'], alphabet)
 26 |     {'a': 1, 'b': 2}
 27 | 
 28 | 
 29 | * .. function:: omit(denylist, dictionary)
 30 | 
 31 |   Return a subset of the provided dictionary with keys *not* contained in the
 32 |   denylist.
 33 | 
 34 |   ::
 35 | 
 36 |     from toolz import keyfilter
 37 | 
 38 |     def omit(denylist, d):
 39 |         return keyfilter(lambda k: k not in denylist, d)
 40 | 
 41 | 
 42 |   Example:
 43 | 
 44 |     >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
 45 |     >>> omit(['a', 'b'], alphabet)
 46 |     {'c': 3, 'd': 4}
 47 | 
 48 | 
 49 | * .. function:: compact(iterable)
 50 | 
 51 |   Filter an iterable on "truthy" values.
 52 | 
 53 |   ::
 54 | 
 55 |     from toolz import filter
 56 | 
 57 |     def compact(iter):
 58 |         return filter(None, iter)
 59 | 
 60 | 
 61 |   Example:
 62 | 
 63 |     >>> results = [0, 1, 2, None, 3, False]
 64 |     >>> list(compact(results))
 65 |     [1, 2, 3]
 66 | 
 67 | * .. function:: keyjoin(leftkey, leftseq, rightkey, rightseq)
 68 | 
 69 |   Inner join two sequences of dictionaries on specified keys, merging matches with right value
 70 |   precedence.
 71 | 
 72 |   ::
 73 | 
 74 |     from itertools import starmap
 75 |     from toolz import join, merge
 76 | 
 77 |     def keyjoin(leftkey, leftseq, rightkey, rightseq):
 78 |         return starmap(merge, join(leftkey, leftseq, rightkey, rightseq))
 79 | 
 80 | 
 81 |   Example:
 82 | 
 83 |    >>> people = [{'id': 0, 'name': 'Anonymous Guy', 'location': 'Unknown'},
 84 |                  {'id': 1, 'name': 'Karan', 'location': 'San Francisco'},
 85 |                  {'id': 2, 'name': 'Matthew', 'location': 'Oakland'}]
 86 |    >>> hobbies = [{'person_id': 1, 'hobby': 'Tennis'},
 87 |                   {'person_id': 1, 'hobby': 'Acting'},
 88 |                   {'person_id': 2, 'hobby': 'Biking'}]
 89 |    >>> list(keyjoin('id', people, 'person_id', hobbies))
 90 |    [{'hobby': 'Tennis',
 91 |      'id': 1,
 92 |      'location': 'San Francisco',
 93 |      'name': 'Karan',
 94 |      'person_id': 1},
 95 |     {'hobby': 'Acting',
 96 |      'id': 1,
 97 |      'location': 'San Francisco',
 98 |      'name': 'Karan',
 99 |      'person_id': 1},
100 |     {'hobby': 'Biking',
101 |      'id': 2,
102 |      'location': 'Oakland',
103 |      'name': 'Matthew',
104 |      'person_id': 2}]
105 | 
106 | * .. function:: areidentical(\*seqs)
107 | 
108 |   Determine if sequences are identical element-wise.
109 |   This lazily evaluates the sequences and stops as soon as the result
110 |   is determined.
111 | 
112 |   ::
113 | 
114 |     from toolz import diff
115 | 
116 |     def areidentical(*seqs):
117 |         return not any(diff(*seqs, default=object()))
118 | 
119 | 
120 |   Example:
121 | 
122 |    >>> areidentical([1, 2, 3], (1, 2, 3))
123 |    True
124 | 
125 |    >>> areidentical([1, 2, 3], [1, 2])
126 |    False
127 | 


--------------------------------------------------------------------------------
/tlz/_build_tlz.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import types
 3 | import toolz
 4 | from importlib import import_module
 5 | from importlib.machinery import ModuleSpec
 6 | 
 7 | 
 8 | class TlzLoader:
 9 |     """ Finds and loads ``tlz`` modules when added to sys.meta_path"""
10 | 
11 |     def __init__(self):
12 |         self.always_from_toolz = {
13 |             toolz.pipe,
14 |         }
15 | 
16 |     def _load_toolz(self, fullname):
17 |         rv = {}
18 |         package, dot, submodules = fullname.partition('.')
19 |         try:
20 |             module_name = ''.join(['cytoolz', dot, submodules])
21 |             rv['cytoolz'] = import_module(module_name)
22 |         except ImportError:
23 |             pass
24 |         try:
25 |             module_name = ''.join(['toolz', dot, submodules])
26 |             rv['toolz'] = import_module(module_name)
27 |         except ImportError:
28 |             pass
29 |         if not rv:
30 |             raise ImportError(fullname)
31 |         return rv
32 | 
33 |     def find_module(self, fullname, path=None):  # pragma: py3 no cover
34 |         package, dot, submodules = fullname.partition('.')
35 |         if package == 'tlz':
36 |             return self
37 | 
38 |     def load_module(self, fullname):  # pragma: py3 no cover
39 |         if fullname in sys.modules:  # pragma: no cover
40 |             return sys.modules[fullname]
41 |         spec = ModuleSpec(fullname, self)
42 |         module = self.create_module(spec)
43 |         sys.modules[fullname] = module
44 |         self.exec_module(module)
45 |         return module
46 | 
47 |     def find_spec(self, fullname, path, target=None):  # pragma: no cover
48 |         package, dot, submodules = fullname.partition('.')
49 |         if package == 'tlz':
50 |             return ModuleSpec(fullname, self)
51 | 
52 |     def create_module(self, spec):
53 |         return types.ModuleType(spec.name)
54 | 
55 |     def exec_module(self, module):
56 |         toolz_mods = self._load_toolz(module.__name__)
57 |         fast_mod = toolz_mods.get('cytoolz') or toolz_mods['toolz']
58 |         slow_mod = toolz_mods.get('toolz') or toolz_mods['cytoolz']
59 |         module.__dict__.update(toolz.merge(fast_mod.__dict__, module.__dict__))
60 |         package = fast_mod.__package__
61 |         if package is not None:
62 |             package, dot, submodules = package.partition('.')
63 |             module.__package__ = ''.join(['tlz', dot, submodules])
64 |         if not module.__doc__:
65 |             module.__doc__ = fast_mod.__doc__
66 | 
67 |         # show file from toolz during introspection
68 |         try:
69 |             module.__file__ = slow_mod.__file__
70 |         except AttributeError:
71 |             pass
72 | 
73 |         for k, v in fast_mod.__dict__.items():
74 |             tv = slow_mod.__dict__.get(k)
75 |             try:
76 |                 hash(tv)
77 |             except TypeError:
78 |                 tv = None
79 |             if tv in self.always_from_toolz:
80 |                 module.__dict__[k] = tv
81 |             elif (
82 |                 isinstance(v, types.ModuleType)
83 |                 and v.__package__ == fast_mod.__name__
84 |             ):
85 |                 package, dot, submodules = v.__name__.partition('.')
86 |                 module_name = ''.join(['tlz', dot, submodules])
87 |                 submodule = import_module(module_name)
88 |                 module.__dict__[k] = submodule
89 | 
90 | 
91 | tlz_loader = TlzLoader()
92 | sys.meta_path.append(tlz_loader)
93 | tlz_loader.exec_module(sys.modules['tlz'])
94 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_pypi.yml:
--------------------------------------------------------------------------------
  1 | name: Builid Wheel and Release
  2 | 
  3 | on:
  4 |   pull_request:
  5 |   workflow_dispatch:
  6 |     inputs:
  7 |       upload_dest:
  8 |         type: choice
  9 |         description: Upload wheels to
 10 |         options:
 11 |           - No Upload
 12 |           - PyPI
 13 |           - Test PyPI
 14 |   push:
 15 |     branches:
 16 |       - master
 17 |     tags:
 18 |       - '[0-9]+.[0-9]+.[0-9]+*'
 19 | 
 20 | permissions:
 21 |   contents: read
 22 | 
 23 | jobs:
 24 |   build-artifacts:
 25 |     runs-on: ubuntu-latest
 26 |     defaults:
 27 |       run:
 28 |         shell: bash -l {0}
 29 |     steps:
 30 |       - name: Checkout
 31 |         uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
 32 |         with:
 33 |           fetch-depth: 0
 34 |           persist-credentials: false
 35 |       - name: Set up Python
 36 |         uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c  # v6.0.0
 37 |         with:
 38 |           python-version: "3.9"
 39 |       - name: Install build dependencies
 40 |         run: |
 41 |           python -m pip install --upgrade pip
 42 |           python -m pip install build twine
 43 |       - name: Build wheel and sdist
 44 |         run: python -m build
 45 |       - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02  # v4.6.2
 46 |         with:
 47 |           name: releases
 48 |           path: dist
 49 |           if-no-files-found: error
 50 |       - name: Check with twine
 51 |         run: python -m twine check --strict dist/*
 52 | 
 53 |   upload-to-test-pypi:
 54 |     needs: build-artifacts
 55 |     runs-on: ubuntu-latest
 56 |     if: github.repository == 'pytoolz/toolz' && (startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' && github.event.inputs.upload_dest == 'Test PyPI')
 57 | 
 58 |     environment:
 59 |       name: test-pypi
 60 |       url: https://test.pypi.org/p/toolz
 61 |     permissions:
 62 |       id-token: write
 63 | 
 64 |     steps:
 65 |       - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0  # v5.0.0
 66 |         with:
 67 |           name: releases
 68 |           path: dist
 69 |       - name: Publish to Test-PyPI
 70 |         uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e  # v1.13.0
 71 |         with:
 72 |           repository-url: https://test.pypi.org/legacy/
 73 |           print-hash: true
 74 |           verbose: true
 75 | 
 76 |   upload-to-pypi:
 77 |     needs: build-artifacts
 78 |     runs-on: ubuntu-latest
 79 |     if: github.repository == 'pytoolz/toolz' && startsWith(github.ref, 'refs/tags/') && (github.event_name != 'workflow_dispatch' || github.event.inputs.upload_dest == 'PyPI')
 80 | 
 81 |     environment:
 82 |       name: pypi
 83 |       url: https://pypi.org/p/toolz
 84 |     permissions:
 85 |       id-token: write
 86 |       attestations: write
 87 | 
 88 |     steps:
 89 |       - uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0  # v5.0.0
 90 |         with:
 91 |           name: releases
 92 |           path: dist
 93 | 
 94 |       - name: Generate artifact attestation for sdist and wheel
 95 |         uses: actions/attest-build-provenance@977bb373ede98d70efdf65b84cb5f73e068dcc2a  # v3.0.0
 96 |         with:
 97 |           subject-path: "dist/toolz-*"
 98 | 
 99 |       - name: Publish to PyPI
100 |         uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e  # v1.13.0
101 |         with:
102 |           attestations: true
103 |           print-hash: true
104 |           verbose: true
105 | 


--------------------------------------------------------------------------------
/doc/source/laziness.rst:
--------------------------------------------------------------------------------
  1 | Laziness
  2 | ========
  3 | 
  4 | Lazy iterators evaluate only when necessary.  They allow us to semantically
  5 | manipulate large amounts of data while keeping very little of it actually in
  6 | memory.  They act like lists but don't take up space.
  7 | 
  8 | 
  9 | Example - A Tale of Two Cities
 10 | ------------------------------
 11 | 
 12 | We open `a file <http://www.gutenberg.org/cache/epub/98/pg98.txt>`_ containing
 13 | the text of the classic text "A Tale of Two Cities"
 14 | by Charles Dickens.
 15 | 
 16 | .. code::
 17 | 
 18 |     >>> book = open('tale-of-two-cities.txt')
 19 | 
 20 | Much like a secondary school student, Python owns and opens the book without
 21 | reading a single line of the text.  The object ``book`` is a lazy iterator!
 22 | Python will give us a line of the text only when we explicitly ask it to do so
 23 | 
 24 | .. code::
 25 | 
 26 |     >>> next(book)
 27 |     "It was the best of times,"
 28 | 
 29 |     >>> next(book)
 30 |     "it was the worst of times,"
 31 | 
 32 | and so on.  Each time we call ``next`` on ``book`` we burn through another line
 33 | of the text and the ``book`` iterator marches slowly onwards through the text.
 34 | 
 35 | 
 36 | Computation
 37 | -----------
 38 | 
 39 | We can lazily operate on lazy iterators without doing any actual computation.
 40 | For example let's read the book in upper case
 41 | 
 42 | .. code::
 43 | 
 44 |     >>> from toolz import map  # toolz' map is lazy by default
 45 | 
 46 |     >>> loud_book = map(str.upper, book)
 47 | 
 48 |     >>> next(loud_book)
 49 |     "IT WAS THE AGE OF WISDOM,"
 50 |     >>> next(loud_book)
 51 |     "IT WAS THE AGE OF FOOLISHNESS,"
 52 | 
 53 | It is as if we applied the function ``str.upper`` onto every line of the book;
 54 | yet the first line completes instantaneously.  Instead Python does the
 55 | uppercasing work only when it becomes necessary, i.e.  when you call ``next``
 56 | to ask for another line.
 57 | 
 58 | 
 59 | Reductions
 60 | ----------
 61 | 
 62 | You can operate on lazy iterators just as you would with lists, tuples, or
 63 | sets.  You can use them in for loops as in
 64 | 
 65 | 
 66 | .. code::
 67 | 
 68 |     for line in loud_book:
 69 |         ...
 70 | 
 71 | You can instantiate them all into memory by calling them with the constructors
 72 | ``list``, or ``tuple``.
 73 | 
 74 | .. code::
 75 | 
 76 |     loud_book = list(loud_book)
 77 | 
 78 | Of course if they are very large then this might be unwise.  Often we use
 79 | laziness to avoid loading large datasets into memory at once.  Many
 80 | computations on large datasets don't require access to all of the data at a
 81 | single time.  In particular *reductions* (like sum) often take large amounts of
 82 | sequential data (like [1, 2, 3, 4]) and produce much more manageable results
 83 | (like 10) and can do so just by viewing the data a little bit at a time.  For
 84 | example we can count all of the letters in the Tale of Two Cities trivially
 85 | using functions from ``toolz``
 86 | 
 87 | .. code::
 88 | 
 89 |     >>> from toolz import concat, frequencies
 90 |     >>> letters = frequencies(concat(loud_book))
 91 |     { 'A': 48036,
 92 |       'B': 8402,
 93 |       'C': 13812,
 94 |       'D': 28000,
 95 |       'E': 74624,
 96 |       ...
 97 | 
 98 | In this case ``frequencies`` is a sort of reduction.  At no time were more than
 99 | a few hundred bytes of Tale of Two Cities necessarily in memory.  We could just
100 | have easily done this computation on the entire Gutenberg collection or on
101 | Wikipedia.  In this case we are limited by the size and speed of our hard drive
102 | and not by the capacity of our memory.
103 | 


--------------------------------------------------------------------------------
/doc/source/curry.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Curry
  3 | =====
  4 | 
  5 | Traditionally partial evaluation of functions is handled with the ``partial``
  6 | higher order function from ``functools``.  Currying provides syntactic sugar.
  7 | 
  8 | .. code::
  9 | 
 10 |     >>> double = partial(mul, 2)    # Partial evaluation
 11 |     >>> doubled = double(5)         # Currying
 12 | 
 13 | This syntactic sugar is valuable when developers chain several higher order
 14 | functions together.
 15 | 
 16 | Partial Evaluation
 17 | ------------------
 18 | 
 19 | Often when composing smaller functions to form big ones we need partial
 20 | evaluation.  We do this in the word counting example:
 21 | 
 22 | .. code::
 23 | 
 24 |     >>> def stem(word):
 25 |     ...     """ Stem word to primitive form """
 26 |     ...     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
 27 | 
 28 |     >>> wordcount = compose(frequencies, partial(map, stem), str.split)
 29 | 
 30 | Here we want to map the ``stem`` function onto each of the words produced by
 31 | ``str.split``.  We want a ``stem_many`` function that takes a list of words,
 32 | stems them, and returns a list back.  In full form this would look like the
 33 | following:
 34 | 
 35 | .. code::
 36 | 
 37 |     >>> def stem_many(words):
 38 |     ...     return map(stem, words)
 39 | 
 40 | The ``partial`` function lets us create this function more naturally.
 41 | 
 42 | .. code::
 43 | 
 44 |     >>> stem_many = partial(map, stem)
 45 | 
 46 | In general
 47 | 
 48 | .. code::
 49 | 
 50 |     >>> def f(x, y, z):
 51 |     ...     # Do stuff with x, y, and z
 52 | 
 53 |     >>> # partially evaluate f with known values a and b
 54 |     >>> def g(z):
 55 |     ...     return f(a, b, z)
 56 | 
 57 |     >>> # alternatively we could use `partial`
 58 |     >>> g = partial(f, a, b)
 59 | 
 60 | Curry
 61 | -----
 62 | 
 63 | In this context currying is just syntactic sugar for partial evaluation.  A
 64 | curried function partially evaluates if it does not receive enough arguments to
 65 | compute a result.
 66 | 
 67 | .. code::
 68 | 
 69 |     >>> from toolz import curry
 70 | 
 71 |     >>> @curry              # We can use curry as a decorator
 72 |     ... def mul(x, y):
 73 |     ...     return x * y
 74 | 
 75 |     >>> double = mul(2)     # mul didn't receive enough arguments to evaluate
 76 |     ...                     # so it holds onto the 2 and waits, returning a
 77 |     ...                     # partially evaluated function `double`
 78 | 
 79 |     >>> double(5)
 80 |     10
 81 | 
 82 | So if ``map`` was curried...
 83 | 
 84 | .. code::
 85 | 
 86 |     >>> map = curry(map)
 87 | 
 88 | Then we could replace the ``partial`` with a function evaluation
 89 | 
 90 | .. code::
 91 | 
 92 |     >>> # wordcount = compose(frequencies, partial(map, stem), str.split)
 93 |     >>> wordcount = compose(frequencies, map(stem), str.split)
 94 | 
 95 | In this particular example it's probably simpler to stick with ``partial``.
 96 | Once ``partial`` starts occurring several times in your code it may be time to
 97 | switch to the ``curried`` namespace.
 98 | 
 99 | The Curried Namespace
100 | ---------------------
101 | 
102 | All functions present in the ``toolz`` namespace are curried in the
103 | ``toolz.curried`` namespace.
104 | 
105 | So you can exchange an import line like the following
106 | 
107 | .. code::
108 | 
109 |     >>> from toolz import *
110 | 
111 | For the following
112 | 
113 | .. code::
114 | 
115 |     >>> from toolz.curried import *
116 | 
117 | And all of your favorite ``toolz`` functions will curry automatically.  We've
118 | also included curried versions of the standard Python higher order functions
119 | like ``map``, ``filter``, ``reduce`` so you'll get them too (whether you like
120 | it or not.)
121 | 


--------------------------------------------------------------------------------
/doc/source/parallelism.rst:
--------------------------------------------------------------------------------
 1 | Parallelism
 2 | ===========
 3 | 
 4 | PyToolz tries to support other parallel processing libraries.  It does this
 5 | by ensuring easy serialization of ``toolz`` functions and providing
 6 | architecture-agnostic parallel algorithms.
 7 | 
 8 | In practice ``toolz`` is developed against ``multiprocessing`` and
 9 | ``ipyparallel``.
10 | 
11 | 
12 | Serialization
13 | -------------
14 | 
15 | Multiprocessing or distributed computing requires the transmission of functions
16 | between different processes or computers.  This is done through serializing the
17 | function into text, sending that text over a wire, and deserializing the text
18 | back into a function.  To the extent possible PyToolz functions are compatible
19 | with the standard serialization library ``pickle``.
20 | 
21 | The ``pickle`` library often fails for complex functions including lambdas,
22 | closures, and class methods.  When this occurs we recommend the alternative
23 | serialization library ``dill``.
24 | 
25 | 
26 | Example with parallel map
27 | -------------------------
28 | 
29 | Most parallel processing tasks may be significantly accelerated using only a
30 | parallel map operation.  A number of high quality parallel map operations exist
31 | in other libraries, notably ``multiprocessing``, ``ipyparallel``, and
32 | ``threading`` (if your operation is not processor bound).
33 | 
34 | In the example below we extend our wordcounting solution with a parallel map.
35 | We show how one can progress in development from sequential, to
36 | multiprocessing, to distributed computation all with the same domain code.
37 | 
38 | 
39 | .. code::
40 | 
41 |     from toolz.curried import map
42 |     from toolz import frequencies, compose, concat, merge_with
43 | 
44 |     def stem(word):
45 |         """ Stem word to primitive form
46 | 
47 |         >>> stem("Hello!")
48 |         'hello'
49 |         """
50 |         return word.lower().rstrip(",.!)-*_?:;$'-\"").lstrip("-*'\"(_$'")
51 | 
52 | 
53 |     wordcount = compose(frequencies, map(stem), concat, map(str.split), open)
54 | 
55 |     if __name__ == '__main__':
56 |         # Filenames for thousands of books from which we'd like to count words
57 |         filenames = ['Book_%d.txt'%i for i in range(10000)]
58 | 
59 |         # Start with sequential map for development
60 |         # pmap = map
61 | 
62 |         # Advance to Multiprocessing map for heavy computation on single machine
63 |         # from multiprocessing import Pool
64 |         # p = Pool(8)
65 |         # pmap = p.map
66 | 
67 |         # Finish with distributed parallel map for big data
68 |         from ipyparallel import Client
69 |         p = Client()[:]
70 |         pmap = p.map_sync
71 | 
72 |         total = merge_with(sum, pmap(wordcount, filenames))
73 | 
74 | This smooth transition is possible because
75 | 
76 | 1.  The ``map`` abstraction is a simple function call and so can be replaced.
77 |     By contrast, this transformation would be difficult if we had written our code with a
78 |     for loop or list comprehension.
79 | 2.  The operation ``wordcount`` is separate from the parallel solution.
80 | 3.  The task is embarrassingly parallel, needing only a very simple parallel
81 |     strategy.  Fortunately this is the common case.
82 | 
83 | 
84 | Parallel Algorithms
85 | -------------------
86 | 
87 | PyToolz does not implement parallel processing systems.  It does however
88 | provide parallel algorithms that can extend existing parallel systems.  Our
89 | general solution is to build algorithms that operate around a user-supplied
90 | parallel map function.
91 | 
92 | In particular we provide a parallel ``fold`` in ``toolz.sandbox.parallel.fold``.
93 | This fold can work equally well with ``multiprocessing.Pool.map``,
94 | ``threading.Pool.map``, or ``ipyparallel``'s ``map_async``.
95 | 


--------------------------------------------------------------------------------
/doc/source/composition.rst:
--------------------------------------------------------------------------------
 1 | Composability
 2 | =============
 3 | 
 4 | Toolz functions interoperate because they consume and produce only a small
 5 | set of common, core data structures.  Each ``toolz`` function consumes
 6 | just iterables, dictionaries, and functions and each ``toolz`` function produces
 7 | just iterables, dictionaries, and functions.  This standardized interface
 8 | enables us to compose several general purpose functions to solve custom
 9 | problems.
10 | 
11 | Standard interfaces enable us to use many tools together, even if those tools
12 | were not designed with each other in mind.  We call this "using together"
13 | composition.
14 | 
15 | 
16 | Standard Interface
17 | ------------------
18 | 
19 | This is best explained by two examples; the automobile industry and LEGOs.
20 | 
21 | Autos
22 | ^^^^^
23 | 
24 | Automobile pieces are not widely composable because they do not adhere to a
25 | standard interface.  You can't connect a Porsche engine to the body of a
26 | Volkswagen Beetle but include the safety features of your favorite luxury car.
27 | As a result when something breaks you need to find a specialist who understands
28 | exactly your collection of components and, depending on the popularity of your
29 | model, replacement parts may be difficult to find.  While the customization
30 | provides a number of efficiencies important for automobiles, it limits the
31 | ability of downstream tinkerers.  This ability for future developers to tinker
32 | is paramount in good software design.
33 | 
34 | Lego
35 | ^^^^
36 | 
37 | Contrast this with Lego toys.  With Lego you *can* connect a rocket engine and
38 | skis to a rowboat.  This is a perfectly natural thing to do because every piece
39 | adheres to a simple interface - those simple and regular 5mm circular bumps.
40 | This freedom to connect pieces at will lets children unleash their imagination
41 | in such varied ways (like going arctic shark hunting with a rocket-ski-boat).
42 | 
43 | The abstractions in programming make it far more like Lego than like building
44 | cars.  This breaks down a little when we start to be constrained by performance
45 | or memory issues but this affects only a very small fraction of applications.
46 | Most of the time we have the freedom to operate in the Lego model if we choose
47 | to give up customization and embrace simple core standards.
48 | 
49 | 
50 | Other Standard Interfaces
51 | -------------------------
52 | 
53 | The Toolz project builds off of a standard interface -- this choice is not
54 | unique.  Other standard interfaces exist and provide immeasurable benefit to
55 | their application areas.
56 | 
57 | The NumPy array serves as a foundational object for numeric and scientific
58 | computing within Python.  The ability of any project to consume and produce
59 | NumPy arrays is largely responsible for the broad success of the
60 | various SciPy projects.  We see similar development today with the Pandas
61 | DataFrame.
62 | 
63 | The UNIX toolset relies on files and streams of text.
64 | 
65 | JSON emerged as the standard interface for communication over the web.  The
66 | virtues of standardization become glaringly apparent when we contrast JSON with
67 | its predecessor, XML.  XML was designed to be extensible/customizable, allowing
68 | each application to design its own interface.  This resulted in a sea of
69 | difficult to understand custom data languages that failed to develop a common
70 | analytic and data processing infrastructure.  In contrast JSON is very
71 | restrictive and allows only a fixed set of data structures, namely lists,
72 | dictionaries, numbers, strings.  Fortunately this set is common to most modern
73 | languages and so JSON is extremely widely supported, perhaps falling second
74 | only to CSV.
75 | 
76 | Standard interfaces permeate physical reality as well.  Examples range
77 | from supra-national currencies to drill bits and electrical circuitry.  In all
78 | cases the interoperation that results becomes a defining and invaluable feature
79 | of each solution.
80 | 


--------------------------------------------------------------------------------
/toolz/tests/test_curried.py:
--------------------------------------------------------------------------------
  1 | import toolz
  2 | import toolz.curried
  3 | from toolz.curried import (take, first, second, sorted, merge_with, reduce,
  4 |                            merge, operator as cop)
  5 | from collections import defaultdict
  6 | from importlib import import_module
  7 | from operator import add
  8 | 
  9 | 
 10 | def test_take():
 11 |     assert list(take(2)([1, 2, 3])) == [1, 2]
 12 | 
 13 | 
 14 | def test_first():
 15 |     assert first is toolz.itertoolz.first
 16 | 
 17 | 
 18 | def test_merge():
 19 |     assert merge(factory=lambda: defaultdict(int))({1: 1}) == {1: 1}
 20 |     assert merge({1: 1}) == {1: 1}
 21 |     assert merge({1: 1}, factory=lambda: defaultdict(int)) == {1: 1}
 22 | 
 23 | 
 24 | def test_merge_with():
 25 |     assert merge_with(sum)({1: 1}, {1: 2}) == {1: 3}
 26 | 
 27 | 
 28 | def test_merge_with_list():
 29 |     assert merge_with(sum, [{'a': 1}, {'a': 2}]) == {'a': 3}
 30 | 
 31 | 
 32 | def test_sorted():
 33 |     assert sorted(key=second)([(1, 2), (2, 1)]) == [(2, 1), (1, 2)]
 34 | 
 35 | 
 36 | def test_reduce():
 37 |     assert reduce(add)((1, 2, 3)) == 6
 38 | 
 39 | 
 40 | def test_module_name():
 41 |     assert toolz.curried.__name__ == 'toolz.curried'
 42 | 
 43 | 
 44 | def should_curry(func):
 45 |     if not callable(func) or isinstance(func, toolz.curry):
 46 |         return False
 47 |     nargs = toolz.functoolz.num_required_args(func)
 48 |     if nargs is None or nargs > 1:
 49 |         return True
 50 |     return nargs == 1 and toolz.functoolz.has_keywords(func)
 51 | 
 52 | 
 53 | def test_curried_operator():
 54 |     import operator
 55 | 
 56 |     for k, v in vars(cop).items():
 57 |         if not callable(v):
 58 |             continue
 59 | 
 60 |         if not isinstance(v, toolz.curry):
 61 |             try:
 62 |                 # Make sure it is unary
 63 |                 v(1)
 64 |             except TypeError:
 65 |                 try:
 66 |                     v('x')
 67 |                 except TypeError:
 68 |                     pass
 69 |                 else:
 70 |                     continue
 71 |                 raise AssertionError(
 72 |                     'toolz.curried.operator.%s is not curried!' % k,
 73 |                 )
 74 |         assert should_curry(getattr(operator, k)) == isinstance(v, toolz.curry), k
 75 | 
 76 |     # Make sure this isn't totally empty.
 77 |     assert len(set(vars(cop)) & {'add', 'sub', 'mul'}) == 3
 78 | 
 79 | 
 80 | def test_curried_namespace():
 81 |     exceptions = import_module('toolz.curried.exceptions')
 82 |     namespace = {}
 83 | 
 84 | 
 85 |     def curry_namespace(ns):
 86 |         return {
 87 |             name: toolz.curry(f) if should_curry(f) else f
 88 |             for name, f in ns.items() if '__' not in name
 89 |         }
 90 | 
 91 |     from_toolz = curry_namespace(vars(toolz))
 92 |     from_exceptions = curry_namespace(vars(exceptions))
 93 |     namespace.update(toolz.merge(from_toolz, from_exceptions))
 94 | 
 95 |     namespace = toolz.valfilter(callable, namespace)
 96 |     curried_namespace = toolz.valfilter(callable, toolz.curried.__dict__)
 97 | 
 98 |     if namespace != curried_namespace:
 99 |         missing = set(namespace) - set(curried_namespace)
100 |         if missing:
101 |             raise AssertionError('There are missing functions in toolz.curried:\n    %s'
102 |                                  % '    \n'.join(sorted(missing)))
103 |         extra = set(curried_namespace) - set(namespace)
104 |         if extra:
105 |             raise AssertionError('There are extra functions in toolz.curried:\n    %s'
106 |                                  % '    \n'.join(sorted(extra)))
107 |         unequal = toolz.merge_with(list, namespace, curried_namespace)
108 |         unequal = toolz.valfilter(lambda x: x[0] != x[1], unequal)
109 |         messages = []
110 |         for name, (orig_func, auto_func) in sorted(unequal.items()):
111 |             if name in from_exceptions:
112 |                 messages.append('%s should come from toolz.curried.exceptions' % name)
113 |             elif should_curry(getattr(toolz, name)):
114 |                 messages.append('%s should be curried from toolz' % name)
115 |             else:
116 |                 messages.append('%s should come from toolz and NOT be curried' % name)
117 |         raise AssertionError('\n'.join(messages))
118 | 


--------------------------------------------------------------------------------
/toolz/sandbox/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | from toolz import curry, unique, first, take
  2 | from toolz.sandbox.core import EqualityHashKey, unzip
  3 | from itertools import count, repeat
  4 | 
  5 | def test_EqualityHashKey_default_key():
  6 |     EqualityHashDefault = curry(EqualityHashKey, None)
  7 |     L1 = [1]
  8 |     L2 = [2]
  9 |     data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
 10 |     set1 = set(map(EqualityHashDefault, data1))
 11 |     set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
 12 |     assert set1 == set2
 13 |     assert len(set1) == 5
 14 | 
 15 |     # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
 16 |     T0 = ()
 17 |     T1 = (1,)
 18 |     data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)]))
 19 |     data2.extend([T0, T1, (), (1,)])
 20 |     set3 = set(data2)
 21 |     assert set3 == {(), (1,), EqualityHashDefault(()),
 22 |                         EqualityHashDefault((1,))}
 23 |     assert len(set3) == 4
 24 |     assert EqualityHashDefault(()) in set3
 25 |     assert EqualityHashDefault((1,)) in set3
 26 | 
 27 |     # Miscellaneous
 28 |     E1 = EqualityHashDefault(L1)
 29 |     E2 = EqualityHashDefault(L2)
 30 |     assert str(E1) == '=[1]='
 31 |     assert repr(E1) == '=[1]='
 32 |     assert E1 != E2
 33 |     assert not (E1 == E2)
 34 |     assert E1 == EqualityHashDefault(L1)
 35 |     assert not (E1 != EqualityHashDefault(L1))
 36 |     assert E1 != L1
 37 |     assert not (E1 == L1)
 38 | 
 39 | 
 40 | def test_EqualityHashKey_callable_key():
 41 |     # Common simple hash key functions.
 42 |     EqualityHashLen = curry(EqualityHashKey, len)
 43 |     EqualityHashType = curry(EqualityHashKey, type)
 44 |     EqualityHashId = curry(EqualityHashKey, id)
 45 |     EqualityHashFirst = curry(EqualityHashKey, first)
 46 |     data1 = [[], [1], (), (1,), {}, {1: 2}]
 47 |     data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}]
 48 |     assert list(unique(data1*3, key=EqualityHashLen)) == data1
 49 |     assert list(unique(data2*3, key=EqualityHashLen)) == data2
 50 |     assert list(unique(data1*3, key=EqualityHashType)) == data1
 51 |     assert list(unique(data2*3, key=EqualityHashType)) == data2
 52 |     assert list(unique(data1*3, key=EqualityHashId)) == data1
 53 |     assert list(unique(data2*3, key=EqualityHashId)) == data2
 54 |     assert list(unique(data2*3, key=EqualityHashFirst)) == data2
 55 | 
 56 | 
 57 | def test_EqualityHashKey_index_key():
 58 |     d1 = {'firstname': 'Alice', 'age': 21, 'data': {}}
 59 |     d2 = {'firstname': 'Alice', 'age': 34, 'data': {}}
 60 |     d3a = {'firstname': 'Bob', 'age': 56, 'data': {}}
 61 |     d3b = {'firstname': 'Bob', 'age': 56, 'data': {}}
 62 |     EqualityHashFirstname = curry(EqualityHashKey, 'firstname')
 63 |     assert list(unique(3*[d1, d2, d3a, d3b],
 64 |                        key=EqualityHashFirstname)) == [d1, d2, d3a]
 65 |     EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age'])
 66 |     assert list(unique(3*[d1, d2, d3a, d3b],
 67 |                        key=EqualityHashFirstnameAge)) == [d1, d2, d3a]
 68 |     list1 = [0] * 10
 69 |     list2 = [0] * 100
 70 |     list3a = [1] * 10
 71 |     list3b = [1] * 10
 72 |     EqualityHash0 = curry(EqualityHashKey, 0)
 73 |     assert list(unique(3*[list1, list2, list3a, list3b],
 74 |                        key=EqualityHash0)) == [list1, list2, list3a]
 75 | 
 76 | 
 77 | def test_unzip():
 78 |     def _to_lists(seq, n=10):
 79 |         """iter of iters -> finite list of finite lists
 80 |         """
 81 |         def initial(s):
 82 |             return list(take(n, s))
 83 | 
 84 |         return initial(map(initial, seq))
 85 | 
 86 |     def _assert_initial_matches(a, b, n=10):
 87 |         assert list(take(n, a)) == list(take(n, b))
 88 | 
 89 |     # Unzips a simple list correctly
 90 |     assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \
 91 |         == [['a', 'b', 'c'], [1, 2, 3]]
 92 | 
 93 |     # Can handle a finite number of infinite iterators (the naive unzip
 94 |     # implementation `zip(*args)` implementation fails on this example).
 95 |     a, b, c = unzip(zip(count(1), repeat(0), repeat(1)))
 96 |     _assert_initial_matches(a, count(1))
 97 |     _assert_initial_matches(b, repeat(0))
 98 |     _assert_initial_matches(c, repeat(1))
 99 | 
100 |     # Sensibly handles empty input
101 |     assert list(unzip(zip([]))) == []
102 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Toolz
  2 | =====
  3 | 
  4 | |Build Status| |Coverage Status| |Version Status|
  5 | 
  6 | A set of utility functions for iterators, functions, and dictionaries.
  7 | 
  8 | See the PyToolz documentation at https://toolz.readthedocs.io
  9 | 
 10 | LICENSE
 11 | -------
 12 | 
 13 | New BSD. See `License File <https://github.com/pytoolz/toolz/blob/master/LICENSE.txt>`__.
 14 | 
 15 | Install
 16 | -------
 17 | 
 18 | ``toolz`` is on the Python Package Index (PyPI):
 19 | 
 20 | ::
 21 | 
 22 |     pip install toolz
 23 | 
 24 | Structure and Heritage
 25 | ----------------------
 26 | 
 27 | ``toolz`` is implemented in three parts:
 28 | 
 29 | |literal itertoolz|_, for operations on iterables. Examples: ``groupby``,
 30 | ``unique``, ``interpose``,
 31 | 
 32 | |literal functoolz|_, for higher-order functions. Examples: ``memoize``,
 33 | ``curry``, ``compose``,
 34 | 
 35 | |literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``,
 36 | ``update-in``, ``merge``.
 37 | 
 38 | .. |literal itertoolz| replace:: ``itertoolz``
 39 | .. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py
 40 | 
 41 | .. |literal functoolz| replace:: ``functoolz``
 42 | .. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py
 43 | 
 44 | .. |literal dicttoolz| replace:: ``dicttoolz``
 45 | .. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py
 46 | 
 47 | These functions come from the legacy of functional languages for list
 48 | processing. They interoperate well to accomplish common complex tasks.
 49 | 
 50 | Read our `API
 51 | Documentation <https://toolz.readthedocs.io/en/latest/api.html>`__ for
 52 | more details.
 53 | 
 54 | Example
 55 | -------
 56 | 
 57 | This builds a standard wordcount function from pieces within ``toolz``:
 58 | 
 59 | .. code:: python
 60 | 
 61 |     >>> def stem(word):
 62 |     ...     """ Stem word to primitive form """
 63 |     ...     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
 64 | 
 65 |     >>> from toolz import compose, frequencies
 66 |     >>> from toolz.curried import map
 67 |     >>> wordcount = compose(frequencies, map(stem), str.split)
 68 | 
 69 |     >>> sentence = "This cat jumped over this other cat!"
 70 |     >>> wordcount(sentence)
 71 |     {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1}
 72 | 
 73 | Dependencies
 74 | ------------
 75 | 
 76 | ``toolz`` supports Python 3.9+ with a common codebase.
 77 | It is pure Python and requires no dependencies beyond the standard
 78 | library.
 79 | 
 80 | It is, in short, a lightweight dependency.
 81 | 
 82 | 
 83 | CyToolz
 84 | -------
 85 | 
 86 | The ``toolz`` project has been reimplemented in `Cython <http://cython.org>`__.
 87 | The ``cytoolz`` project is a drop-in replacement for the Pure Python
 88 | implementation.
 89 | See `CyToolz GitHub Page <https://github.com/pytoolz/cytoolz/>`__ for more
 90 | details.
 91 | 
 92 | See Also
 93 | --------
 94 | 
 95 | -  `Underscore.js <https://underscorejs.org/>`__: A similar library for
 96 |    JavaScript
 97 | -  `Enumerable <https://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A
 98 |    similar library for Ruby
 99 | -  `Clojure <https://clojure.org/>`__: A functional language whose
100 |    standard library has several counterparts in ``toolz``
101 | -  `itertools <https://docs.python.org/2/library/itertools.html>`__: The
102 |    Python standard library for iterator tools
103 | -  `functools <https://docs.python.org/2/library/functools.html>`__: The
104 |    Python standard library for function tools
105 | 
106 | Project Status
107 | --------------
108 | 
109 | **This project is alive but inactive.**
110 | 
111 | The original maintainers have mostly moved on to other endeavors.  We're still
112 | around for critical bug fixes, Python version bumps, and security issues and
113 | will commit to keeping the project alive (it's highly depended upon).
114 | However, beyond that we don't plan to spend much time reviewing contributions.
115 | We view Toolz as mostly complete.
116 | 
117 | We encourage enthusiasts to innovate in new and wonderful places 🚀
118 | 
119 | .. |Build Status| image:: https://github.com/pytoolz/toolz/actions/workflows/test.yml/badge.svg?branch=master
120 |    :target: https://github.com/pytoolz/toolz/actions
121 | .. |Coverage Status| image:: https://codecov.io/gh/pytoolz/toolz/graph/badge.svg?token=4ZFc9dwKqY
122 |    :target: https://codecov.io/gh/pytoolz/toolz
123 | .. |Version Status| image:: https://badge.fury.io/py/toolz.svg
124 |    :target: https://badge.fury.io/py/toolz
125 | 


--------------------------------------------------------------------------------
/toolz/sandbox/core.py:
--------------------------------------------------------------------------------
  1 | from toolz.itertoolz import getter, cons, pluck
  2 | from itertools import tee, starmap
  3 | 
  4 | 
  5 | # See #166: https://github.com/pytoolz/toolz/issues/166
  6 | # See #173: https://github.com/pytoolz/toolz/pull/173
  7 | class EqualityHashKey:
  8 |     """ Create a hash key that uses equality comparisons between items.
  9 | 
 10 |     This may be used to create hash keys for otherwise unhashable types:
 11 | 
 12 |     >>> from toolz import curry
 13 |     >>> EqualityHashDefault = curry(EqualityHashKey, None)
 14 |     >>> set(map(EqualityHashDefault, [[], (), [1], [1]]))  # doctest: +SKIP
 15 |     {=[]=, =()=, =[1]=}
 16 | 
 17 |     **Caution:** adding N ``EqualityHashKey`` items to a hash container
 18 |     may require O(N**2) operations, not O(N) as for typical hashable types.
 19 |     Therefore, a suitable key function such as ``tuple`` or ``frozenset``
 20 |     is usually preferred over using ``EqualityHashKey`` if possible.
 21 | 
 22 |     The ``key`` argument to ``EqualityHashKey`` should be a function or
 23 |     index that returns a hashable object that effectively distinguishes
 24 |     unequal items.  This helps avoid the poor scaling that occurs when
 25 |     using the default key.  For example, the above example can be improved
 26 |     by using a key function that distinguishes items by length or type:
 27 | 
 28 |     >>> EqualityHashLen = curry(EqualityHashKey, len)
 29 |     >>> EqualityHashType = curry(EqualityHashKey, type)  # this works too
 30 |     >>> set(map(EqualityHashLen, [[], (), [1], [1]]))  # doctest: +SKIP
 31 |     {=[]=, =()=, =[1]=}
 32 | 
 33 |     ``EqualityHashKey`` is convenient to use when a suitable key function
 34 |     is complicated or unavailable.  For example, the following returns all
 35 |     unique values based on equality:
 36 | 
 37 |     >>> from toolz import unique
 38 |     >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}]
 39 |     >>> list(unique(vals, key=EqualityHashDefault))
 40 |     [[], (), [1], [2], {}]
 41 | 
 42 |     **Warning:** don't change the equality value of an item already in a hash
 43 |     container.  Unhashable types are unhashable for a reason.  For example:
 44 | 
 45 |     >>> L1 = [1] ; L2 = [2]
 46 |     >>> s = set(map(EqualityHashDefault, [L1, L2]))
 47 |     >>> s  # doctest: +SKIP
 48 |     {=[1]=, =[2]=}
 49 | 
 50 |     >>> L1[0] = 2  # Don't do this!  ``s`` now has duplicate items!
 51 |     >>> s  # doctest: +SKIP
 52 |     {=[2]=, =[2]=}
 53 | 
 54 |     Although this may appear problematic, immutable data types is a common
 55 |     idiom in functional programming, and``EqualityHashKey`` easily allows
 56 |     the same idiom to be used by convention rather than strict requirement.
 57 | 
 58 |     See Also:
 59 |         identity
 60 |     """
 61 |     __slots__ = ['item', 'key']
 62 |     _default_hashkey = '__default__hashkey__'
 63 | 
 64 |     def __init__(self, key, item):
 65 |         if key is None:
 66 |             self.key = self._default_hashkey
 67 |         elif not callable(key):
 68 |             self.key = getter(key)
 69 |         else:
 70 |             self.key = key
 71 |         self.item = item
 72 | 
 73 |     def __hash__(self):
 74 |         if self.key == self._default_hashkey:
 75 |             val = self.key
 76 |         else:
 77 |             val = self.key(self.item)
 78 |         return hash(val)
 79 | 
 80 |     def __eq__(self, other):
 81 |         try:
 82 |             return (self._default_hashkey == other._default_hashkey and
 83 |                     self.item == other.item)
 84 |         except AttributeError:
 85 |             return False
 86 | 
 87 |     def __ne__(self, other):
 88 |         return not self.__eq__(other)
 89 | 
 90 |     def __str__(self):
 91 |         return '=%s=' % str(self.item)
 92 | 
 93 |     def __repr__(self):
 94 |         return '=%s=' % repr(self.item)
 95 | 
 96 | 
 97 | # See issue #293: https://github.com/pytoolz/toolz/issues/239
 98 | def unzip(seq):
 99 |     """Inverse of ``zip``
100 | 
101 |     >>> a, b = unzip([('a', 1), ('b', 2)])
102 |     >>> list(a)
103 |     ['a', 'b']
104 |     >>> list(b)
105 |     [1, 2]
106 | 
107 |     Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this
108 |     implementation can handle an infinite sequence ``seq``.
109 | 
110 |     Caveats:
111 | 
112 |     * The implementation uses ``tee``, and so can use a significant amount
113 |       of auxiliary storage if the resulting iterators are consumed at
114 |       different times.
115 | 
116 |     * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be
117 |       used if ``seq`` is a finite sequence of infinite sequences.
118 | 
119 |     """
120 | 
121 |     seq = iter(seq)
122 | 
123 |     # Check how many iterators we need
124 |     try:
125 |         first = tuple(next(seq))
126 |     except StopIteration:
127 |         return tuple()
128 | 
129 |     # and create them
130 |     niters = len(first)
131 |     seqs = tee(cons(first, seq), niters)
132 | 
133 |     return tuple(starmap(pluck, enumerate(seqs)))
134 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Toolz.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Toolz.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Toolz.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Toolz.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Toolz"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Toolz"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/toolz/tests/test_serialization.py:
--------------------------------------------------------------------------------
  1 | from toolz import *
  2 | import toolz
  3 | import toolz.curried
  4 | import pickle
  5 | from toolz.utils import raises
  6 | 
  7 | 
  8 | def test_compose():
  9 |     f = compose(str, sum)
 10 |     g = pickle.loads(pickle.dumps(f))
 11 |     assert f((1, 2)) == g((1, 2))
 12 | 
 13 | 
 14 | def test_curry():
 15 |     f = curry(map)(str)
 16 |     g = pickle.loads(pickle.dumps(f))
 17 |     assert list(f((1, 2, 3))) == list(g((1, 2, 3)))
 18 | 
 19 | 
 20 | def test_juxt():
 21 |     f = juxt(str, int, bool)
 22 |     g = pickle.loads(pickle.dumps(f))
 23 |     assert f(1) == g(1)
 24 |     assert f.funcs == g.funcs
 25 | 
 26 | 
 27 | def test_complement():
 28 |     f = complement(bool)
 29 |     assert f(True) is False
 30 |     assert f(False) is True
 31 |     g = pickle.loads(pickle.dumps(f))
 32 |     assert f(True) == g(True)
 33 |     assert f(False) == g(False)
 34 | 
 35 | 
 36 | def test_instanceproperty():
 37 |     p = toolz.functoolz.InstanceProperty(bool)
 38 |     assert p.__get__(None) is None
 39 |     assert p.__get__(0) is False
 40 |     assert p.__get__(1) is True
 41 |     p2 = pickle.loads(pickle.dumps(p))
 42 |     assert p2.__get__(None) is None
 43 |     assert p2.__get__(0) is False
 44 |     assert p2.__get__(1) is True
 45 | 
 46 | 
 47 | def f(x, y):
 48 |     return x, y
 49 | 
 50 | 
 51 | def test_flip():
 52 |     flip = pickle.loads(pickle.dumps(toolz.functoolz.flip))
 53 |     assert flip is toolz.functoolz.flip
 54 |     g1 = flip(f)
 55 |     g2 = pickle.loads(pickle.dumps(g1))
 56 |     assert g1(1, 2) == g2(1, 2) == f(2, 1)
 57 |     g1 = flip(f)(1)
 58 |     g2 = pickle.loads(pickle.dumps(g1))
 59 |     assert g1(2) == g2(2) == f(2, 1)
 60 | 
 61 | 
 62 | def test_curried_exceptions():
 63 |     # This tests a global curried object that isn't defined in toolz.functoolz
 64 |     merge = pickle.loads(pickle.dumps(toolz.curried.merge))
 65 |     assert merge is toolz.curried.merge
 66 | 
 67 | 
 68 | @toolz.curry
 69 | class GlobalCurried:
 70 |     def __init__(self, x, y):
 71 |         self.x = x
 72 |         self.y = y
 73 | 
 74 |     @toolz.curry
 75 |     def f1(self, a, b):
 76 |         return self.x + self.y + a + b
 77 | 
 78 |     def g1(self):
 79 |         pass
 80 | 
 81 |     def __reduce__(self):
 82 |         """Allow us to serialize instances of GlobalCurried"""
 83 |         return GlobalCurried, (self.x, self.y)
 84 | 
 85 |     @toolz.curry
 86 |     class NestedCurried:
 87 |         def __init__(self, x, y):
 88 |             self.x = x
 89 |             self.y = y
 90 | 
 91 |         @toolz.curry
 92 |         def f2(self, a, b):
 93 |             return self.x + self.y + a + b
 94 | 
 95 |         def g2(self):
 96 |             pass
 97 | 
 98 |         def __reduce__(self):
 99 |             """Allow us to serialize instances of NestedCurried"""
100 |             return GlobalCurried.NestedCurried, (self.x, self.y)
101 | 
102 |     class Nested:
103 |         def __init__(self, x, y):
104 |             self.x = x
105 |             self.y = y
106 | 
107 |         @toolz.curry
108 |         def f3(self, a, b):
109 |             return self.x + self.y + a + b
110 | 
111 |         def g3(self):
112 |             pass
113 | 
114 | 
115 | def test_curried_qualname():
116 | 
117 |     def preserves_identity(obj):
118 |         return pickle.loads(pickle.dumps(obj)) is obj
119 | 
120 |     assert preserves_identity(GlobalCurried)
121 |     assert preserves_identity(GlobalCurried.func.f1)
122 |     assert preserves_identity(GlobalCurried.func.NestedCurried)
123 |     assert preserves_identity(GlobalCurried.func.NestedCurried.func.f2)
124 |     assert preserves_identity(GlobalCurried.func.Nested.f3)
125 | 
126 |     global_curried1 = GlobalCurried(1)
127 |     global_curried2 = pickle.loads(pickle.dumps(global_curried1))
128 |     assert global_curried1 is not global_curried2
129 |     assert global_curried1(2).f1(3, 4) == global_curried2(2).f1(3, 4) == 10
130 | 
131 |     global_curried3 = global_curried1(2)
132 |     global_curried4 = pickle.loads(pickle.dumps(global_curried3))
133 |     assert global_curried3 is not global_curried4
134 |     assert global_curried3.f1(3, 4) == global_curried4.f1(3, 4) == 10
135 | 
136 |     func1 = global_curried1(2).f1(3)
137 |     func2 = pickle.loads(pickle.dumps(func1))
138 |     assert func1 is not func2
139 |     assert func1(4) == func2(4) == 10
140 | 
141 |     nested_curried1 = GlobalCurried.func.NestedCurried(1)
142 |     nested_curried2 = pickle.loads(pickle.dumps(nested_curried1))
143 |     assert nested_curried1 is not nested_curried2
144 |     assert nested_curried1(2).f2(3, 4) == nested_curried2(2).f2(3, 4) == 10
145 | 
146 |     # If we add `curry.__getattr__` forwarding, the following tests will pass
147 | 
148 |     # if not PY34:
149 |     #     assert preserves_identity(GlobalCurried.func.g1)
150 |     #     assert preserves_identity(GlobalCurried.func.NestedCurried.func.g2)
151 |     #     assert preserves_identity(GlobalCurried.func.Nested)
152 |     #     assert preserves_identity(GlobalCurried.func.Nested.g3)
153 |     #
154 |     # # Rely on curry.__getattr__
155 |     # assert preserves_identity(GlobalCurried.f1)
156 |     # assert preserves_identity(GlobalCurried.NestedCurried)
157 |     # assert preserves_identity(GlobalCurried.NestedCurried.f2)
158 |     # assert preserves_identity(GlobalCurried.Nested.f3)
159 |     # if not PY34:
160 |     #     assert preserves_identity(GlobalCurried.g1)
161 |     #     assert preserves_identity(GlobalCurried.NestedCurried.g2)
162 |     #     assert preserves_identity(GlobalCurried.Nested)
163 |     #     assert preserves_identity(GlobalCurried.Nested.g3)
164 |     #
165 |     # nested_curried3 = nested_curried1(2)
166 |     # nested_curried4 = pickle.loads(pickle.dumps(nested_curried3))
167 |     # assert nested_curried3 is not nested_curried4
168 |     # assert nested_curried3.f2(3, 4) == nested_curried4.f2(3, 4) == 10
169 |     #
170 |     # func1 = nested_curried1(2).f2(3)
171 |     # func2 = pickle.loads(pickle.dumps(func1))
172 |     # assert func1 is not func2
173 |     # assert func1(4) == func2(4) == 10
174 |     #
175 |     # if not PY34:
176 |     #     nested3 = GlobalCurried.func.Nested(1, 2)
177 |     #     nested4 = pickle.loads(pickle.dumps(nested3))
178 |     #     assert nested3 is not nested4
179 |     #     assert nested3.f3(3, 4) == nested4.f3(3, 4) == 10
180 |     #
181 |     #     func1 = nested3.f3(3)
182 |     #     func2 = pickle.loads(pickle.dumps(func1))
183 |     #     assert func1 is not func2
184 |     #     assert func1(4) == func2(4) == 10
185 | 
186 | 
187 | def test_curried_bad_qualname():
188 |     @toolz.curry
189 |     class Bad:
190 |         __qualname__ = 'toolz.functoolz.not.a.valid.path'
191 | 
192 |     assert raises(pickle.PicklingError, lambda: pickle.dumps(Bad))
193 | 


--------------------------------------------------------------------------------
/doc/source/control.rst:
--------------------------------------------------------------------------------
  1 | Control Flow
  2 | ============
  3 | 
  4 | Programming is hard when we think simultaneously about several concepts.  Good
  5 | programming breaks down big problems into small problems and
  6 | builds up small solutions into big solutions.  By this practice the
  7 | need for simultaneous thought is restricted to only a few elements at a time.
  8 | 
  9 | All modern languages provide mechanisms to build data into data structures and
 10 | to build functions out of other functions.  The third element of programming,
 11 | besides data and functions, is control flow.  Building complex control flow
 12 | out of simple control flow presents deeper challenges.
 13 | 
 14 | 
 15 | What?
 16 | -----
 17 | 
 18 | Each element in a computer program is either
 19 | 
 20 | -   A variable or value literal like ``x``, ``total``, or ``5``
 21 | -   A function or computation like the ``+`` in ``x + 1``, the function ``fib``
 22 |     in ``fib(3)``, the method ``split`` in ``line.split(',')``, or the ``=`` in
 23 |     ``x = 0``
 24 | -   Control flow like ``if``, ``for``, or ``return``
 25 | 
 26 | Here is a piece of code; see if you can label each term as either
 27 | variable/value, function/computation, or control flow
 28 | 
 29 | .. code::
 30 | 
 31 |     def fib(n):
 32 |         a, b = 0, 1
 33 |         for i in range(n):
 34 |             a, b = b, a + b
 35 |         return b
 36 | 
 37 | Programming is hard when we have to juggle many code elements of each type at
 38 | the same time.  Good programming is about managing these three elements so that
 39 | the developer is only required to think about a handful of them at a time.  For
 40 | example we might collect many integer variables into a list of integers or
 41 | build a big function out of smaller ones.
 42 | 
 43 | We organize our data into **data structures** like lists, dictionaries, or objects
 44 | in order to group related data together -- this allows us to manipulate large
 45 | collections of related data as if we were only manipulating a single entity.
 46 | 
 47 | We **build large functions out of smaller ones**, enabling us to break up a
 48 | complex task like doing laundry into a sequence of simpler tasks.
 49 | 
 50 | .. code::
 51 | 
 52 |     def do_laundry(clothes):
 53 |         wet_clothes = wash(clothes)
 54 |         dry_clothes = dry(wet_clothes)
 55 |         return fold(dry_clothes)
 56 | 
 57 | While we have natural ways to manage data and functions, **control flow presents more of a challenge**.
 58 | How do we break down complex control flow into simpler pieces that fit in our brain?
 59 | How do we encapsulate commonly recurring patterns?
 60 | 
 61 | Let's motivate this with an example of a common control structure, applying a
 62 | function to each element in a list.  Imagine we want to download the HTML
 63 | source for a number of webpages.
 64 | 
 65 | .. code::
 66 | 
 67 |     from urllib import urlopen
 68 | 
 69 |     urls = ['http://www.google.com', 'http://www.wikipedia.com', 'http://www.apple.com']
 70 |     html_texts = []
 71 |     for item in urls:
 72 |         html_texts.append(urlopen(item))
 73 | 
 74 | Or maybe we want to compute the Fibonacci numbers on a particular set of
 75 | integers
 76 | 
 77 | .. code::
 78 | 
 79 |     integers = [1, 2, 3, 4, 5]
 80 |     fib_integers = []
 81 |     for item in integers:
 82 |         fib_integers.append(fib(item))
 83 | 
 84 | These two unrelated applications share an identical control flow pattern.  They
 85 | apply a function (``urlopen`` or ``fib``) onto each element of an input list
 86 | (``urls``, or ``integers``), appending the result onto an output list.  Because
 87 | this control flow pattern is so common we give it a name, ``map``, and say that
 88 | we map a function (like ``urlopen``) onto a list (like ``urls``).
 89 | 
 90 | Because Python can treat functions like variables we can encode this control
 91 | pattern into a higher-order-function as follows:
 92 | 
 93 | .. code::
 94 | 
 95 |     def map(function, sequence):
 96 |         output = []
 97 |         for item in sequence:
 98 |             output.append(function(item))
 99 |         return output
100 | 
101 | This allows us to simplify our code above to the following, pithy solutions
102 | 
103 | .. code::
104 | 
105 |     html_texts = map(urlopen, urls)
106 |     fib_integers = map(fib, integers)
107 | 
108 | Experienced Python programmers know that this control pattern is so popular
109 | that it has been elevated to the status of **syntax** with the popular list
110 | comprehension
111 | 
112 | .. code::
113 | 
114 |     html_texts = [urlopen(url) for url in urls]
115 | 
116 | 
117 | Why?
118 | ----
119 | 
120 | So maybe you already knew about ``map`` and don't use it or maybe you just
121 | prefer list comprehensions.  Why should you keep reading?
122 | 
123 | Managing Complexity
124 | ^^^^^^^^^^^^^^^^^^^
125 | 
126 | The higher order function ``map`` gives us a name to call a particular control
127 | pattern.  Regardless of whether or not you use a for loop, a list
128 | comprehension, or ``map`` itself, it is useful to recognize the operation
129 | and to give it a name.  Naming control patterns lets us tackle
130 | complex problems at larger scale without burdening our mind with rote details.
131 | It is just as important as bundling data into data structures or building
132 | complex functions out of simple ones.
133 | 
134 | *Naming control flow patterns enables programmers to manipulate increasingly
135 | complex operations.*
136 | 
137 | Other Patterns
138 | ^^^^^^^^^^^^^^
139 | 
140 | The function ``map`` has friends.  Advanced programmers may know about
141 | ``map``'s siblings, ``filter`` and ``reduce``.  The ``filter`` control pattern
142 | is also handled by list comprehension syntax and ``reduce`` is often replaced
143 | by straight for loops, so if you don't want to use them there is no immediately
144 | practical reason why you would care.
145 | 
146 | Most programmers however don't know about the many cousins of
147 | ``map``/``filter``/``reduce``.  Consider for example the unsung heroine,
148 | ``groupby``.  A brief example grouping names by their length follows:
149 | 
150 | .. code::
151 | 
152 |     >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
153 |     >>> groupby(len, names)
154 |     {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
155 | 
156 | ``groupby`` collects each element of a list into sublists determined by the value
157 | of a function.  Let's see ``groupby`` in action again, grouping numbers by
158 | evenness.
159 | 
160 | .. code::
161 | 
162 |     >>> def iseven(n):
163 |     ...     return n % 2 == 0
164 | 
165 |     >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7])
166 |     {True: [2, 4, 6], False: [1, 3, 5, 7]}
167 | 
168 | If we were to write this second operation out by hand it might look something
169 | like the following:
170 | 
171 | .. code::
172 | 
173 |     evens = []
174 |     odds = []
175 |     for item in numbers:
176 |         if iseven(item):
177 |             evens.append(item)
178 |         else:
179 |             odds.append(item)
180 | 
181 | Most programmers have written code exactly like this over and over again, just
182 | like they may have repeated the ``map`` control pattern.  When we identify code
183 | as a ``groupby`` operation we mentally collapse the detailed manipulation into
184 | a single concept.
185 | 
186 | Additional Considerations
187 | ^^^^^^^^^^^^^^^^^^^^^^^^^
188 | 
189 | The Toolz library contains dozens of patterns like ``map`` and ``groupby``.
190 | Learning a core set (maybe a dozen) covers the vast majority of common
191 | programming tasks often done by hand.
192 | 
193 | *A rich vocabulary of core control functions conveys the following benefits:*
194 | 
195 | -   You identify new patterns
196 | -   You make fewer errors in rote coding
197 | -   You can depend on well tested and benchmarked implementations
198 | 
199 | But this does not come for free.  As in spoken language the use of a rich
200 | vocabulary can alienate new practitioners.  Most functional languages have
201 | fallen into this trap and are seen as unapproachable and smug.  Python
202 | maintains a low-brow reputation and benefits from it.  Just as with spoken
203 | language the value of using just-the-right-word must be moderated with the
204 | comprehension of the intended audience.
205 | 


--------------------------------------------------------------------------------
/toolz/tests/test_dicttoolz.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict as _defaultdict
  2 | from collections.abc import Mapping
  3 | import os
  4 | from toolz.dicttoolz import (merge, merge_with, valmap, keymap, update_in,
  5 |                              assoc, dissoc, keyfilter, valfilter, itemmap,
  6 |                              itemfilter, assoc_in)
  7 | from toolz.functoolz import identity
  8 | from toolz.utils import raises
  9 | 
 10 | 
 11 | def inc(x):
 12 |     return x + 1
 13 | 
 14 | 
 15 | def iseven(i):
 16 |     return i % 2 == 0
 17 | 
 18 | 
 19 | class TestDict:
 20 |     """Test typical usage: dict inputs, no factory keyword.
 21 | 
 22 |     Class attributes:
 23 |         D: callable that inputs a dict and creates or returns a MutableMapping
 24 |         kw: kwargs dict to specify "factory" keyword (if applicable)
 25 |     """
 26 |     D = dict
 27 |     kw = {}
 28 | 
 29 |     def test_merge(self):
 30 |         D, kw = self.D, self.kw
 31 |         assert merge(D({1: 1, 2: 2}), D({3: 4}), **kw) == D({1: 1, 2: 2, 3: 4})
 32 | 
 33 |     def test_merge_iterable_arg(self):
 34 |         D, kw = self.D, self.kw
 35 |         assert merge([D({1: 1, 2: 2}), D({3: 4})], **kw) == D({1: 1, 2: 2, 3: 4})
 36 | 
 37 |     def test_merge_with(self):
 38 |         D, kw = self.D, self.kw
 39 |         dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20})
 40 |         assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22})
 41 |         assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20)})
 42 | 
 43 |         dicts = D({1: 1, 2: 2, 3: 3}), D({1: 10, 2: 20})
 44 |         assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22, 3: 3})
 45 |         assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20), 3: (3,)})
 46 | 
 47 |         assert not merge_with(sum)
 48 | 
 49 |     def test_merge_with_iterable_arg(self):
 50 |         D, kw = self.D, self.kw
 51 |         dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20})
 52 |         assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22})
 53 |         assert merge_with(sum, dicts, **kw) == D({1: 11, 2: 22})
 54 |         assert merge_with(sum, iter(dicts), **kw) == D({1: 11, 2: 22})
 55 | 
 56 |     def test_valmap(self):
 57 |         D, kw = self.D, self.kw
 58 |         assert valmap(inc, D({1: 1, 2: 2}), **kw) == D({1: 2, 2: 3})
 59 | 
 60 |     def test_keymap(self):
 61 |         D, kw = self.D, self.kw
 62 |         assert keymap(inc, D({1: 1, 2: 2}), **kw) == D({2: 1, 3: 2})
 63 | 
 64 |     def test_itemmap(self):
 65 |         D, kw = self.D, self.kw
 66 |         assert itemmap(reversed, D({1: 2, 2: 4}), **kw) == D({2: 1, 4: 2})
 67 | 
 68 |     def test_valfilter(self):
 69 |         D, kw = self.D, self.kw
 70 |         assert valfilter(iseven, D({1: 2, 2: 3}), **kw) == D({1: 2})
 71 | 
 72 |     def test_keyfilter(self):
 73 |         D, kw = self.D, self.kw
 74 |         assert keyfilter(iseven, D({1: 2, 2: 3}), **kw) == D({2: 3})
 75 | 
 76 |     def test_itemfilter(self):
 77 |         D, kw = self.D, self.kw
 78 |         assert itemfilter(lambda item: iseven(item[0]), D({1: 2, 2: 3}), **kw) == D({2: 3})
 79 |         assert itemfilter(lambda item: iseven(item[1]), D({1: 2, 2: 3}), **kw) == D({1: 2})
 80 | 
 81 |     def test_assoc(self):
 82 |         D, kw = self.D, self.kw
 83 |         assert assoc(D({}), "a", 1, **kw) == D({"a": 1})
 84 |         assert assoc(D({"a": 1}), "a", 3, **kw) == D({"a": 3})
 85 |         assert assoc(D({"a": 1}), "b", 3, **kw) == D({"a": 1, "b": 3})
 86 | 
 87 |         # Verify immutability:
 88 |         d = D({'x': 1})
 89 |         oldd = d
 90 |         assoc(d, 'x', 2, **kw)
 91 |         assert d is oldd
 92 | 
 93 |     def test_dissoc(self):
 94 |         D, kw = self.D, self.kw
 95 |         assert dissoc(D({"a": 1}), "a", **kw) == D({})
 96 |         assert dissoc(D({"a": 1, "b": 2}), "a", **kw) == D({"b": 2})
 97 |         assert dissoc(D({"a": 1, "b": 2}), "b", **kw) == D({"a": 1})
 98 |         assert dissoc(D({"a": 1, "b": 2}), "a", "b", **kw) == D({})
 99 |         assert dissoc(D({"a": 1}), "a", **kw) == dissoc(dissoc(D({"a": 1}), "a", **kw), "a", **kw)
100 | 
101 |         # Verify immutability:
102 |         d = D({'x': 1})
103 |         oldd = d
104 |         d2 = dissoc(d, 'x', **kw)
105 |         assert d is oldd
106 |         assert d2 is not oldd
107 | 
108 |     def test_assoc_in(self):
109 |         D, kw = self.D, self.kw
110 |         assert assoc_in(D({"a": 1}), ["a"], 2, **kw) == D({"a": 2})
111 |         assert (assoc_in(D({"a": D({"b": 1})}), ["a", "b"], 2, **kw) ==
112 |                 D({"a": D({"b": 2})}))
113 |         assert assoc_in(D({}), ["a", "b"], 1, **kw) == D({"a": D({"b": 1})})
114 | 
115 |         # Verify immutability:
116 |         d = D({'x': 1})
117 |         oldd = d
118 |         d2 = assoc_in(d, ['x'], 2, **kw)
119 |         assert d is oldd
120 |         assert d2 is not oldd
121 | 
122 |     def test_update_in(self):
123 |         D, kw = self.D, self.kw
124 |         assert update_in(D({"a": 0}), ["a"], inc, **kw) == D({"a": 1})
125 |         assert update_in(D({"a": 0, "b": 1}), ["b"], str, **kw) == D({"a": 0, "b": "1"})
126 |         assert (update_in(D({"t": 1, "v": D({"a": 0})}), ["v", "a"], inc, **kw) ==
127 |                 D({"t": 1, "v": D({"a": 1})}))
128 |         # Handle one missing key.
129 |         assert update_in(D({}), ["z"], str, None, **kw) == D({"z": "None"})
130 |         assert update_in(D({}), ["z"], inc, 0, **kw) == D({"z": 1})
131 |         assert update_in(D({}), ["z"], lambda x: x+"ar", default="b", **kw) == D({"z": "bar"})
132 |         # Same semantics as Clojure for multiple missing keys, ie. recursively
133 |         # create nested empty dictionaries to the depth specified by the
134 |         # keys with the innermost value set to f(default).
135 |         assert update_in(D({}), [0, 1], inc, default=-1, **kw) == D({0: D({1: 0})})
136 |         assert update_in(D({}), [0, 1], str, default=100, **kw) == D({0: D({1: "100"})})
137 |         assert (update_in(D({"foo": "bar", 1: 50}), ["d", 1, 0], str, 20, **kw) ==
138 |                 D({"foo": "bar", 1: 50, "d": D({1: D({0: "20"})})}))
139 |         # Verify immutability:
140 |         d = D({'x': 1})
141 |         oldd = d
142 |         update_in(d, ['x'], inc, **kw)
143 |         assert d is oldd
144 | 
145 |     def test_factory(self):
146 |         D, kw = self.D, self.kw
147 |         assert merge(defaultdict(int, D({1: 2})), D({2: 3})) == {1: 2, 2: 3}
148 |         assert (merge(defaultdict(int, D({1: 2})), D({2: 3}),
149 |                       factory=lambda: defaultdict(int)) ==
150 |                 defaultdict(int, D({1: 2, 2: 3})))
151 |         assert not (merge(defaultdict(int, D({1: 2})), D({2: 3}),
152 |                           factory=lambda: defaultdict(int)) == {1: 2, 2: 3})
153 |         assert raises(TypeError, lambda: merge(D({1: 2}), D({2: 3}), factoryy=dict))
154 | 
155 | 
156 | class defaultdict(_defaultdict):
157 |     def __eq__(self, other):
158 |         return (super().__eq__(other) and
159 |                 isinstance(other, _defaultdict) and
160 |                 self.default_factory == other.default_factory)
161 | 
162 | 
163 | class TestDefaultDict(TestDict):
164 |     """Test defaultdict as input and factory
165 | 
166 |     Class attributes:
167 |         D: callable that inputs a dict and creates or returns a MutableMapping
168 |         kw: kwargs dict to specify "factory" keyword (if applicable)
169 |     """
170 |     @staticmethod
171 |     def D(dict_):
172 |         return defaultdict(int, dict_)
173 | 
174 |     kw = {'factory': lambda: defaultdict(int)}
175 | 
176 | 
177 | class CustomMapping:
178 |     """Define methods of the MutableMapping protocol required by dicttoolz"""
179 |     def __init__(self, *args, **kwargs):
180 |         self._d = dict(*args, **kwargs)
181 | 
182 |     def __getitem__(self, key):
183 |         return self._d[key]
184 | 
185 |     def __setitem__(self, key, val):
186 |         self._d[key] = val
187 | 
188 |     def __delitem__(self, key):
189 |         del self._d[key]
190 | 
191 |     def __iter__(self):
192 |         return iter(self._d)
193 | 
194 |     def __len__(self):
195 |         return len(self._d)
196 | 
197 |     def __contains__(self, key):
198 |         return key in self._d
199 | 
200 |     def __eq__(self, other):
201 |         return isinstance(other, CustomMapping) and self._d == other._d
202 | 
203 |     def __ne__(self, other):
204 |         return not isinstance(other, CustomMapping) or self._d != other._d
205 | 
206 |     def keys(self):
207 |         return self._d.keys()
208 | 
209 |     def values(self):
210 |         return self._d.values()
211 | 
212 |     def items(self):
213 |         return self._d.items()
214 | 
215 |     def update(self, *args, **kwargs):
216 |         self._d.update(*args, **kwargs)
217 | 
218 |     # Unused methods that are part of the MutableMapping protocol
219 |     #def get(self, key, *args):
220 |     #    return self._d.get(key, *args)
221 | 
222 |     #def pop(self, key, *args):
223 |     #    return self._d.pop(key, *args)
224 | 
225 |     #def popitem(self, key):
226 |     #    return self._d.popitem()
227 | 
228 |     #def clear(self):
229 |     #    self._d.clear()
230 | 
231 |     #def setdefault(self, key, *args):
232 |     #    return self._d.setdefault(self, key, *args)
233 | 
234 | 
235 | class TestCustomMapping(TestDict):
236 |     """Test CustomMapping as input and factory
237 | 
238 |     Class attributes:
239 |         D: callable that inputs a dict and creates or returns a MutableMapping
240 |         kw: kwargs dict to specify "factory" keyword (if applicable)
241 |     """
242 |     D = CustomMapping
243 |     kw = {'factory': lambda: CustomMapping()}
244 | 
245 | 
246 | def test_environ():
247 |     # See: https://github.com/pytoolz/cytoolz/issues/127
248 |     assert keymap(identity, os.environ) == os.environ
249 |     assert valmap(identity, os.environ) == os.environ
250 |     assert itemmap(identity, os.environ) == os.environ
251 | 
252 | 
253 | def test_merge_with_non_dict_mappings():
254 |     class Foo(Mapping):
255 |         def __init__(self, d):
256 |             self.d = d
257 | 
258 |         def __iter__(self):
259 |             return iter(self.d)
260 | 
261 |         def __getitem__(self, key):
262 |             return self.d[key]
263 | 
264 |         def __len__(self):
265 |             return len(self.d)
266 | 
267 |     d = Foo({1: 1})
268 | 
269 |     assert merge(d) is d or merge(d) == {1: 1}
270 |     assert merge_with(sum, d) == {1: 1}
271 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Toolz documentation build configuration file, created by
  2 | # sphinx-quickstart on Sun Sep 22 18:06:00 2013.
  3 | #
  4 | # This file is execfile()d with the current directory set to its containing dir.
  5 | #
  6 | # Note that not all possible configuration values are present in this
  7 | # autogenerated file.
  8 | #
  9 | # All configuration values have a default; values that are commented out
 10 | # serve to show the default.
 11 | 
 12 | import sys, os
 13 | 
 14 | # If extensions (or modules to document with autodoc) are in another directory,
 15 | # add these directories to sys.path here. If the directory is relative to the
 16 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 17 | #sys.path.insert(0, os.path.abspath('.'))
 18 | sys.path.insert(0, os.path.abspath('.'))
 19 | 
 20 | # -- General configuration -----------------------------------------------------
 21 | 
 22 | # If your documentation needs a minimal Sphinx version, state it here.
 23 | #needs_sphinx = '1.0'
 24 | 
 25 | # Add any Sphinx extension module names here, as strings. They can be extensions
 26 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 27 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.autosummary']
 28 | 
 29 | # Add any paths that contain templates here, relative to this directory.
 30 | templates_path = ['_templates']
 31 | 
 32 | # The suffix of source filenames.
 33 | source_suffix = '.rst'
 34 | 
 35 | # The encoding of source files.
 36 | #source_encoding = 'utf-8-sig'
 37 | 
 38 | # The master toctree document.
 39 | master_doc = 'index'
 40 | 
 41 | # General information about the project.
 42 | project = 'Toolz'
 43 | copyright = '2013, Matthew Rocklin, John Jacobsen'
 44 | 
 45 | # The version info for the project you're documenting, acts as replacement for
 46 | # |version| and |release|, also used in various other places throughout the
 47 | # built documents.
 48 | #
 49 | # The short X.Y version.
 50 | import toolz
 51 | version = toolz.__version__
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = toolz.__version__
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = []
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'furo'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | html_title = "Toolz"
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'Toolzdoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'Toolz.tex', 'Toolz Documentation',
187 |    'Matthew Rocklin, John Jacobsen', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'toolz', 'Toolz Documentation',
217 |      ['Matthew Rocklin, John Jacobsen'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'Toolz', 'Toolz Documentation',
231 |    'Matthew Rocklin, John Jacobsen', 'Toolz', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 
244 | 
245 | # -- Options for Epub output ---------------------------------------------------
246 | 
247 | # Bibliographic Dublin Core info.
248 | epub_title = 'Toolz'
249 | epub_author = 'Matthew Rocklin, John Jacobsen'
250 | epub_publisher = 'Matthew Rocklin, John Jacobsen'
251 | epub_copyright = '2013, Matthew Rocklin, John Jacobsen'
252 | 
253 | # The language of the text. It defaults to the language option
254 | # or en if the language is not set.
255 | #epub_language = ''
256 | 
257 | # The scheme of the identifier. Typical schemes are ISBN or URL.
258 | #epub_scheme = ''
259 | 
260 | # The unique identifier of the text. This can be a ISBN number
261 | # or the project homepage.
262 | #epub_identifier = ''
263 | 
264 | # A unique identification for the text.
265 | #epub_uid = ''
266 | 
267 | # A tuple containing the cover image and cover page html template filenames.
268 | #epub_cover = ()
269 | 
270 | # HTML files that should be inserted before the pages created by sphinx.
271 | # The format is a list of tuples containing the path and title.
272 | #epub_pre_files = []
273 | 
274 | # HTML files that should be inserted after the pages created by sphinx.
275 | # The format is a list of tuples containing the path and title.
276 | #epub_post_files = []
277 | 
278 | # A list of files that should not be packed into the epub file.
279 | #epub_exclude_files = []
280 | 
281 | # The depth of the table of contents in toc.ncx.
282 | #epub_tocdepth = 3
283 | 
284 | # Allow duplicate toc entries.
285 | #epub_tocdup = True
286 | 


--------------------------------------------------------------------------------
/toolz/dicttoolz.py:
--------------------------------------------------------------------------------
  1 | import operator
  2 | import collections
  3 | from functools import reduce
  4 | from collections.abc import Mapping
  5 | 
  6 | __all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap',
  7 |            'valfilter', 'keyfilter', 'itemfilter',
  8 |            'assoc', 'dissoc', 'assoc_in', 'update_in', 'get_in')
  9 | 
 10 | 
 11 | def _get_factory(f, kwargs):
 12 |     factory = kwargs.pop('factory', dict)
 13 |     if kwargs:
 14 |         raise TypeError("{}() got an unexpected keyword argument "
 15 |                         "'{}'".format(f.__name__, kwargs.popitem()[0]))
 16 |     return factory
 17 | 
 18 | 
 19 | def merge(*dicts, **kwargs):
 20 |     """ Merge a collection of dictionaries
 21 | 
 22 |     >>> merge({1: 'one'}, {2: 'two'})
 23 |     {1: 'one', 2: 'two'}
 24 | 
 25 |     Later dictionaries have precedence
 26 | 
 27 |     >>> merge({1: 2, 3: 4}, {3: 3, 4: 4})
 28 |     {1: 2, 3: 3, 4: 4}
 29 | 
 30 |     See Also:
 31 |         merge_with
 32 |     """
 33 |     if len(dicts) == 1 and not isinstance(dicts[0], Mapping):
 34 |         dicts = dicts[0]
 35 |     factory = _get_factory(merge, kwargs)
 36 | 
 37 |     rv = factory()
 38 |     for d in dicts:
 39 |         rv.update(d)
 40 |     return rv
 41 | 
 42 | 
 43 | def merge_with(func, *dicts, **kwargs):
 44 |     """ Merge dictionaries and apply function to combined values
 45 | 
 46 |     A key may occur in more than one dict, and all values mapped from the key
 47 |     will be passed to the function as a list, such as func([val1, val2, ...]).
 48 | 
 49 |     >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
 50 |     {1: 11, 2: 22}
 51 | 
 52 |     >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30})  # doctest: +SKIP
 53 |     {1: 1, 2: 2, 3: 30}
 54 | 
 55 |     See Also:
 56 |         merge
 57 |     """
 58 |     if len(dicts) == 1 and not isinstance(dicts[0], Mapping):
 59 |         dicts = dicts[0]
 60 |     factory = _get_factory(merge_with, kwargs)
 61 | 
 62 |     values = collections.defaultdict(lambda: [].append)
 63 |     for d in dicts:
 64 |         for k, v in d.items():
 65 |             values[k](v)
 66 | 
 67 |     result = factory()
 68 |     for k, v in values.items():
 69 |         result[k] = func(v.__self__)
 70 |     return result
 71 | 
 72 | 
 73 | def valmap(func, d, factory=dict):
 74 |     """ Apply function to values of dictionary
 75 | 
 76 |     >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
 77 |     >>> valmap(sum, bills)  # doctest: +SKIP
 78 |     {'Alice': 65, 'Bob': 45}
 79 | 
 80 |     See Also:
 81 |         keymap
 82 |         itemmap
 83 |     """
 84 |     rv = factory()
 85 |     rv.update(zip(d.keys(), map(func, d.values())))
 86 |     return rv
 87 | 
 88 | 
 89 | def keymap(func, d, factory=dict):
 90 |     """ Apply function to keys of dictionary
 91 | 
 92 |     >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
 93 |     >>> keymap(str.lower, bills)  # doctest: +SKIP
 94 |     {'alice': [20, 15, 30], 'bob': [10, 35]}
 95 | 
 96 |     See Also:
 97 |         valmap
 98 |         itemmap
 99 |     """
100 |     rv = factory()
101 |     rv.update(zip(map(func, d.keys()), d.values()))
102 |     return rv
103 | 
104 | 
105 | def itemmap(func, d, factory=dict):
106 |     """ Apply function to items of dictionary
107 | 
108 |     >>> accountids = {"Alice": 10, "Bob": 20}
109 |     >>> itemmap(reversed, accountids)  # doctest: +SKIP
110 |     {10: "Alice", 20: "Bob"}
111 | 
112 |     See Also:
113 |         keymap
114 |         valmap
115 |     """
116 |     rv = factory()
117 |     rv.update(map(func, d.items()))
118 |     return rv
119 | 
120 | 
121 | def valfilter(predicate, d, factory=dict):
122 |     """ Filter items in dictionary by value
123 | 
124 |     >>> iseven = lambda x: x % 2 == 0
125 |     >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
126 |     >>> valfilter(iseven, d)
127 |     {1: 2, 3: 4}
128 | 
129 |     See Also:
130 |         keyfilter
131 |         itemfilter
132 |         valmap
133 |     """
134 |     rv = factory()
135 |     for k, v in d.items():
136 |         if predicate(v):
137 |             rv[k] = v
138 |     return rv
139 | 
140 | 
141 | def keyfilter(predicate, d, factory=dict):
142 |     """ Filter items in dictionary by key
143 | 
144 |     >>> iseven = lambda x: x % 2 == 0
145 |     >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
146 |     >>> keyfilter(iseven, d)
147 |     {2: 3, 4: 5}
148 | 
149 |     See Also:
150 |         valfilter
151 |         itemfilter
152 |         keymap
153 |     """
154 |     rv = factory()
155 |     for k, v in d.items():
156 |         if predicate(k):
157 |             rv[k] = v
158 |     return rv
159 | 
160 | 
161 | def itemfilter(predicate, d, factory=dict):
162 |     """ Filter items in dictionary by item
163 | 
164 |     >>> def isvalid(item):
165 |     ...     k, v = item
166 |     ...     return k % 2 == 0 and v < 4
167 | 
168 |     >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
169 |     >>> itemfilter(isvalid, d)
170 |     {2: 3}
171 | 
172 |     See Also:
173 |         keyfilter
174 |         valfilter
175 |         itemmap
176 |     """
177 |     rv = factory()
178 |     for item in d.items():
179 |         if predicate(item):
180 |             k, v = item
181 |             rv[k] = v
182 |     return rv
183 | 
184 | 
185 | def assoc(d, key, value, factory=dict):
186 |     """ Return a new dict with new key value pair
187 | 
188 |     New dict has d[key] set to value. Does not modify the initial dictionary.
189 | 
190 |     >>> assoc({'x': 1}, 'x', 2)
191 |     {'x': 2}
192 |     >>> assoc({'x': 1}, 'y', 3)   # doctest: +SKIP
193 |     {'x': 1, 'y': 3}
194 |     """
195 |     d2 = factory()
196 |     d2.update(d)
197 |     d2[key] = value
198 |     return d2
199 | 
200 | 
201 | def dissoc(d, *keys, **kwargs):
202 |     """ Return a new dict with the given key(s) removed.
203 | 
204 |     New dict has d[key] deleted for each supplied key.
205 |     Does not modify the initial dictionary.
206 | 
207 |     >>> dissoc({'x': 1, 'y': 2}, 'y')
208 |     {'x': 1}
209 |     >>> dissoc({'x': 1, 'y': 2}, 'y', 'x')
210 |     {}
211 |     >>> dissoc({'x': 1}, 'y') # Ignores missing keys
212 |     {'x': 1}
213 |     """
214 |     factory = _get_factory(dissoc, kwargs)
215 |     d2 = factory()
216 | 
217 |     if len(keys) < len(d) * .6:
218 |         d2.update(d)
219 |         for key in keys:
220 |             if key in d2:
221 |                 del d2[key]
222 |     else:
223 |         remaining = set(d)
224 |         remaining.difference_update(keys)
225 |         for k in remaining:
226 |             d2[k] = d[k]
227 |     return d2
228 | 
229 | 
230 | def assoc_in(d, keys, value, factory=dict):
231 |     """ Return a new dict with new, potentially nested, key value pair
232 | 
233 |     >>> purchase = {'name': 'Alice',
234 |     ...             'order': {'items': ['Apple', 'Orange'],
235 |     ...                       'costs': [0.50, 1.25]},
236 |     ...             'credit card': '5555-1234-1234-1234'}
237 |     >>> assoc_in(purchase, ['order', 'costs'], [0.25, 1.00]) # doctest: +SKIP
238 |     {'credit card': '5555-1234-1234-1234',
239 |      'name': 'Alice',
240 |      'order': {'costs': [0.25, 1.00], 'items': ['Apple', 'Orange']}}
241 |     """
242 |     return update_in(d, keys, lambda x: value, value, factory)
243 | 
244 | 
245 | def update_in(d, keys, func, default=None, factory=dict):
246 |     """ Update value in a (potentially) nested dictionary
247 | 
248 |     inputs:
249 |     d - dictionary on which to operate
250 |     keys - list or tuple giving the location of the value to be changed in d
251 |     func - function to operate on that value
252 | 
253 |     If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the
254 |     original dictionary with v replaced by func(v), but does not mutate the
255 |     original dictionary.
256 | 
257 |     If k0 is not a key in d, update_in creates nested dictionaries to the depth
258 |     specified by the keys, with the innermost value set to func(default).
259 | 
260 |     >>> inc = lambda x: x + 1
261 |     >>> update_in({'a': 0}, ['a'], inc)
262 |     {'a': 1}
263 | 
264 |     >>> transaction = {'name': 'Alice',
265 |     ...                'purchase': {'items': ['Apple', 'Orange'],
266 |     ...                             'costs': [0.50, 1.25]},
267 |     ...                'credit card': '5555-1234-1234-1234'}
268 |     >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP
269 |     {'credit card': '5555-1234-1234-1234',
270 |      'name': 'Alice',
271 |      'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}}
272 | 
273 |     >>> # updating a value when k0 is not in d
274 |     >>> update_in({}, [1, 2, 3], str, default="bar")
275 |     {1: {2: {3: 'bar'}}}
276 |     >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0)
277 |     {1: 'foo', 2: {3: {4: 1}}}
278 |     """
279 |     ks = iter(keys)
280 |     k = next(ks)
281 | 
282 |     rv = inner = factory()
283 |     rv.update(d)
284 | 
285 |     for key in ks:
286 |         if k in d:
287 |             d = d[k]
288 |             dtemp = factory()
289 |             dtemp.update(d)
290 |         else:
291 |             d = dtemp = factory()
292 | 
293 |         inner[k] = inner = dtemp
294 |         k = key
295 | 
296 |     if k in d:
297 |         inner[k] = func(d[k])
298 |     else:
299 |         inner[k] = func(default)
300 |     return rv
301 | 
302 | 
303 | def get_in(keys, coll, default=None, no_default=False):
304 |     """ Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.
305 | 
306 |     If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
307 |     ``no_default`` is specified, then it raises KeyError or IndexError.
308 | 
309 |     ``get_in`` is a generalization of ``operator.getitem`` for nested data
310 |     structures such as dictionaries and lists.
311 | 
312 |     >>> transaction = {'name': 'Alice',
313 |     ...                'purchase': {'items': ['Apple', 'Orange'],
314 |     ...                             'costs': [0.50, 1.25]},
315 |     ...                'credit card': '5555-1234-1234-1234'}
316 |     >>> get_in(['purchase', 'items', 0], transaction)
317 |     'Apple'
318 |     >>> get_in(['name'], transaction)
319 |     'Alice'
320 |     >>> get_in(['purchase', 'total'], transaction)
321 |     >>> get_in(['purchase', 'items', 'apple'], transaction)
322 |     >>> get_in(['purchase', 'items', 10], transaction)
323 |     >>> get_in(['purchase', 'total'], transaction, 0)
324 |     0
325 |     >>> get_in(['y'], {}, no_default=True)
326 |     Traceback (most recent call last):
327 |         ...
328 |     KeyError: 'y'
329 | 
330 |     See Also:
331 |         itertoolz.get
332 |         operator.getitem
333 |     """
334 |     try:
335 |         return reduce(operator.getitem, keys, coll)
336 |     except (KeyError, IndexError, TypeError):
337 |         if no_default:
338 |             raise
339 |         return default
340 | 


--------------------------------------------------------------------------------
/doc/source/streaming-analytics.rst:
--------------------------------------------------------------------------------
  1 | Streaming Analytics
  2 | ===================
  3 | 
  4 | The toolz functions can be composed to analyze large streaming datasets.
  5 | Toolz supports common analytics patterns like the selection, grouping,
  6 | reduction, and joining of data through pure composable functions.  These
  7 | functions often have analogs to familiar operations in other data analytics
  8 | platforms like SQL or Pandas.
  9 | 
 10 | Throughout this document we'll use this simple dataset of accounts
 11 | 
 12 | .. code::
 13 | 
 14 |    >>> accounts = [(1, 'Alice', 100, 'F'),  # id, name, balance, gender
 15 |    ...             (2, 'Bob', 200, 'M'),
 16 |    ...             (3, 'Charlie', 150, 'M'),
 17 |    ...             (4, 'Dennis', 50, 'M'),
 18 |    ...             (5, 'Edith', 300, 'F')]
 19 | 
 20 | Selecting with ``map`` and ``filter``
 21 | -------------------------------------
 22 | 
 23 | Simple projection and linear selection from a sequence is achieved through the
 24 | standard functions ``map`` and ``filter``.
 25 | 
 26 | .. code::
 27 | 
 28 |    SELECT name, balance
 29 |    FROM accounts
 30 |    WHERE balance > 150;
 31 | 
 32 | These functions correspond to the SQL commands ``SELECT`` and ``WHERE``.
 33 | 
 34 | .. code::
 35 | 
 36 |    >>> from toolz.curried import pipe, map, filter, get
 37 |    >>> pipe(accounts, filter(lambda acc: acc[2] > 150),
 38 |    ...                map(get([1, 2])),
 39 |    ...                list)
 40 | 
 41 | Note: this uses the `curried`` versions of ``map`` and ``filter``.
 42 | 
 43 | Of course, these operations are also well supported with standard
 44 | list/generator comprehension syntax.  This syntax is more often used and
 45 | generally considered to be more Pythonic.
 46 | 
 47 | .. code::
 48 | 
 49 |    >>> [(name, balance) for (id, name, balance, gender) in accounts
 50 |    ...                  if balance > 150]
 51 | 
 52 | 
 53 | Split-apply-combine with ``groupby`` and ``reduceby``
 54 | -----------------------------------------------------
 55 | 
 56 | We separate split-apply-combine operations into the following two concepts
 57 | 
 58 | 1.  Split the dataset into groups by some property
 59 | 2.  Reduce each of the groups with some synopsis function
 60 | 
 61 | Toolz supports this common workflow with
 62 | 
 63 | 1.  a simple in-memory solution
 64 | 2.  a more sophisticated streaming solution.
 65 | 
 66 | 
 67 | In Memory Split-Apply-Combine
 68 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 69 | 
 70 | The in-memory solution depends on the functions `groupby`_ to split, and
 71 | `valmap`_ to apply/combine.
 72 | 
 73 | .. code::
 74 | 
 75 |    SELECT gender, SUM(balance)
 76 |    FROM accounts
 77 |    GROUP BY gender;
 78 | 
 79 | We first show these two functions piece by piece to show the intermediate
 80 | groups.
 81 | 
 82 | .. code::
 83 | 
 84 |    >>> from toolz import compose
 85 |    >>> from toolz.curried import get, pluck, groupby, valmap
 86 | 
 87 |    >>> groupby(get(3), accounts)
 88 |    {'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')],
 89 |     'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')]}
 90 | 
 91 |    >>> valmap(compose(sum, pluck(2)),
 92 |    ...        _)  # The underscore captures results from the previous prompt
 93 |    {'F': 400, 'M': 400}
 94 | 
 95 | 
 96 | Then we chain them together into a single computation
 97 | 
 98 | .. code::
 99 | 
100 |    >>> pipe(accounts, groupby(get(3)),
101 |    ...                valmap(compose(sum, pluck(2))))
102 |    {'F': 400, 'M': 400}
103 | 
104 | 
105 | Streaming Split-Apply-Combine
106 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
107 | 
108 | The ``groupby`` function collects the entire dataset in memory into a
109 | dictionary.  While convenient, the ``groupby`` operation is *not streaming* and
110 | so this approach is limited to datasets that can fit comfortably into memory.
111 | 
112 | Toolz achieves streaming split-apply-combine with `reduceby`_, a function that
113 | performs a simultaneous reduction on each group as the elements stream in.  To
114 | understand this section you should first be familiar with the builtin function
115 | ``reduce``.
116 | 
117 | The ``reduceby`` operation takes a key function, like ``get(3)`` or ``lambda x:
118 | x[3]``, and a binary operator like ``add`` or ``lesser = lambda acc, x: acc if
119 | acc < x else x``.  It applies the key function to each item in succession,
120 | accumulating running totals for each key by combining each new
121 | value with the previous using the binary operator.  It can't accept full
122 | reduction operations like ``sum`` or ``min`` as these require access to the
123 | entire group at once.  Here is a simple example:
124 | 
125 | .. code::
126 | 
127 |    >>> from toolz import reduceby
128 | 
129 |    >>> def iseven(n):
130 |    ...     return n % 2 == 0
131 | 
132 |    >>> def add(x, y):
133 |    ...     return x + y
134 | 
135 |    >>> reduceby(iseven, add, [1, 2, 3, 4])
136 |    {True: 6, False: 4}
137 | 
138 | The even numbers are added together ``(2 + 4 = 6)`` into group ``True``, and
139 | the odd numbers are added together ``(1 + 3 = 4)`` into group ``False``.
140 | 
141 | 
142 | Note that we have to replace the reduction ``sum`` with the binary operator
143 | ``add``.  The incremental nature of ``add`` allows us to do the summation work as
144 | new data comes in.  The use of binary operators like ``add`` over full reductions
145 | like ``sum`` enables computation on very large streaming datasets.
146 | 
147 | The challenge to using ``reduceby`` often lies in the construction of a
148 | suitable binary operator. Here is the solution for our accounts example
149 | that adds up the balances for each group:
150 | 
151 | .. code::
152 | 
153 |    >>> binop = lambda total, account: total + account[2]
154 | 
155 |    >>> reduceby(get(3), binop, accounts, 0)
156 |    {'F': 400, 'M': 400}
157 | 
158 | 
159 | This construction supports datasets that are much larger than available memory.
160 | Only the output must be able to fit comfortably in memory and this is rarely an
161 | issue, even for very large split-apply-combine computations.
162 | 
163 | 
164 | Semi-Streaming ``join``
165 | -----------------------
166 | 
167 | We register multiple datasets together with `join`_.  Consider a second
168 | dataset storing addresses by ID
169 | 
170 | .. code::
171 | 
172 |    >>> addresses = [(1, '123 Main Street'),  # id, address
173 |    ...              (2, '5 Adams Way'),
174 |    ...              (5, '34 Rue St Michel')]
175 | 
176 | We can join this dataset against our accounts dataset by specifying attributes
177 | which register different elements with each other; in this case they share a
178 | common first column, id.
179 | 
180 | .. code::
181 | 
182 |    SELECT accounts.name, addresses.address
183 |    FROM accounts
184 |    JOIN addresses
185 |    ON accounts.id = addresses.id;
186 | 
187 | 
188 | .. code::
189 | 
190 |    >>> from toolz import join, first
191 | 
192 |    >>> result = join(first, accounts,
193 |    ...               first, addresses)
194 | 
195 |    >>> for ((id, name, bal, gender), (id, address)) in result:
196 |    ...     print((name, address))
197 |    ('Alice', '123 Main Street')
198 |    ('Bob', '5 Adams Way')
199 |    ('Edith', '34 Rue St Michel')
200 | 
201 | Join takes four main arguments, a left and right key function and a left
202 | and right sequence. It returns a sequence of pairs of matching items. In our
203 | case the return value of ``join`` is a sequence of pairs of tuples such that the
204 | first element of each tuple (the ID) is the same.  In the example above we
205 | unpack this pair of tuples to get the fields that we want (``name`` and
206 | ``address``) from the result.
207 | 
208 | 
209 | Join on arbitrary functions / data
210 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
211 | 
212 | Those familiar with SQL are accustomed to this kind of join on columns.
213 | However a functional join is more general than this; it doesn't need to operate
214 | on tuples, and key functions do not need to get particular columns.  In the
215 | example below we match numbers from two collections so that exactly one is even
216 | and one is odd.
217 | 
218 | .. code::
219 | 
220 |    >>> def iseven(x):
221 |    ...     return x % 2 == 0
222 |    >>> def isodd(x):
223 |    ...     return x % 2 == 1
224 | 
225 |    >>> list(join(iseven, [1, 2, 3, 4],
226 |    ...           isodd, [7, 8, 9]))
227 |    [(2, 7), (4, 7), (1, 8), (3, 8), (2, 9), (4, 9)]
228 | 
229 | 
230 | Semi-Streaming Join
231 | ^^^^^^^^^^^^^^^^^^^
232 | 
233 | The Toolz Join operation fully evaluates the *left* sequence and streams the
234 | *right* sequence through memory.  Thus, if streaming support is desired the
235 | larger of the two sequences should always occupy the right side of the join.
236 | 
237 | 
238 | Algorithmic Details
239 | ^^^^^^^^^^^^^^^^^^^
240 | 
241 | The semi-streaming join operation in ``toolz`` is asymptotically optimal.
242 | Computationally it is linear in the size of the input + output.  In terms of
243 | storage the left sequence must fit in memory but the right sequence is free to
244 | stream.
245 | 
246 | The results are not normalized, as in SQL, in that they permit repeated values.  If
247 | normalization is desired, consider composing with the function ``unique`` (note
248 | that ``unique`` is not fully streaming.)
249 | 
250 | 
251 | More Complex Example
252 | ^^^^^^^^^^^^^^^^^^^^
253 | 
254 | The accounts example above connects two one-to-one relationships, ``accounts``
255 | and ``addresses``; there was exactly one name per ID and one address per ID.
256 | This need not be the case.  The join abstraction is sufficiently flexible to
257 | join one-to-many or even many-to-many relationships.  The following example
258 | finds city/person pairs where that person has a friend who has a residence in
259 | that city.  This is an example of joining two many-to-many relationships,
260 | because a person may have many friends and because a friend may have many
261 | residences.
262 | 
263 | 
264 | .. code::
265 | 
266 |    >>> friends = [('Alice', 'Edith'),
267 |    ...            ('Alice', 'Zhao'),
268 |    ...            ('Edith', 'Alice'),
269 |    ...            ('Zhao', 'Alice'),
270 |    ...            ('Zhao', 'Edith')]
271 | 
272 |    >>> cities = [('Alice', 'NYC'),
273 |    ...           ('Alice', 'Chicago'),
274 |    ...           ('Dan', 'Sydney'),
275 |    ...           ('Edith', 'Paris'),
276 |    ...           ('Edith', 'Berlin'),
277 |    ...           ('Zhao', 'Shanghai')]
278 | 
279 |    >>> # Vacation opportunities
280 |    >>> # In what cities do people have friends?
281 |    >>> result = join(second, friends,
282 |    ...               first, cities)
283 |    >>> for ((name, friend), (friend, city)) in sorted(unique(result)):
284 |    ...     print((name, city))
285 |    ('Alice', 'Berlin')
286 |    ('Alice', 'Paris')
287 |    ('Alice', 'Shanghai')
288 |    ('Edith', 'Chicago')
289 |    ('Edith', 'NYC')
290 |    ('Zhao', 'Chicago')
291 |    ('Zhao', 'NYC')
292 |    ('Zhao', 'Berlin')
293 |    ('Zhao', 'Paris')
294 | 
295 | Join is computationally powerful:
296 | 
297 | *   It is expressive enough to cover a wide set of analytics operations
298 | *   It runs in linear time relative to the size of the input and output
299 | *   Only the left sequence must fit in memory
300 | 
301 | 
302 | Disclaimer
303 | ----------
304 | 
305 | Toolz is a general purpose functional standard library, not a library
306 | specifically for data analytics.  While there are obvious benefits (streaming,
307 | composition, ...) users interested in data analytics might be better served by
308 | using projects specific to data analytics like Pandas_ or SQLAlchemy.
309 | 
310 | 
311 | .. _groupby: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.groupby
312 | .. _join: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.join
313 | .. _reduceby: https://toolz.readthedocs.io/en/latest/api.html#toolz.itertoolz.reduceby
314 | .. _valmap: https://toolz.readthedocs.io/en/latest/api.html#toolz.dicttoolz.valmap
315 | .. _Pandas: http://pandas.pydata.org/pandas-docs/stable/groupby.html
316 | .. _curried: https://toolz.readthedocs.io/en/latest/curry.html
317 | 


--------------------------------------------------------------------------------
/toolz/tests/test_inspect_args.py:
--------------------------------------------------------------------------------
  1 | import functools
  2 | import inspect
  3 | import itertools
  4 | import operator
  5 | import sys
  6 | import toolz
  7 | from toolz.functoolz import (curry, is_valid_args, is_partial_args, is_arity,
  8 |                              num_required_args, has_varargs, has_keywords)
  9 | from toolz._signatures import builtins
 10 | import toolz._signatures as _sigs
 11 | from toolz.utils import raises
 12 | 
 13 | 
 14 | def make_func(param_string, raise_if_called=True):
 15 |     if not param_string.startswith('('):
 16 |         param_string = '(%s)' % param_string
 17 |     if raise_if_called:
 18 |         body = 'raise ValueError("function should not be called")'
 19 |     else:
 20 |         body = 'return True'
 21 |     d = {}
 22 |     exec(f'def func{param_string}:\n    {body}', globals(), d)
 23 |     return d['func']
 24 | 
 25 | 
 26 | def test_make_func():
 27 |     f = make_func('')
 28 |     assert raises(ValueError, lambda: f())
 29 |     assert raises(TypeError, lambda: f(1))
 30 | 
 31 |     f = make_func('', raise_if_called=False)
 32 |     assert f()
 33 |     assert raises(TypeError, lambda: f(1))
 34 | 
 35 |     f = make_func('x, y=1', raise_if_called=False)
 36 |     assert f(1)
 37 |     assert f(x=1)
 38 |     assert f(1, 2)
 39 |     assert f(x=1, y=2)
 40 |     assert raises(TypeError, lambda: f(1, 2, 3))
 41 | 
 42 |     f = make_func('(x, y=1)', raise_if_called=False)
 43 |     assert f(1)
 44 |     assert f(x=1)
 45 |     assert f(1, 2)
 46 |     assert f(x=1, y=2)
 47 |     assert raises(TypeError, lambda: f(1, 2, 3))
 48 | 
 49 | 
 50 | def test_is_valid(check_valid=is_valid_args, incomplete=False):
 51 |     orig_check_valid = check_valid
 52 |     check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs)
 53 | 
 54 |     f = make_func('')
 55 |     assert check_valid(f)
 56 |     assert check_valid(f, 1) is False
 57 |     assert check_valid(f, x=1) is False
 58 | 
 59 |     f = make_func('x')
 60 |     assert check_valid(f) is incomplete
 61 |     assert check_valid(f, 1)
 62 |     assert check_valid(f, x=1)
 63 |     assert check_valid(f, 1, x=2) is False
 64 |     assert check_valid(f, 1, y=2) is False
 65 |     assert check_valid(f, 1, 2) is False
 66 |     assert check_valid(f, x=1, y=2) is False
 67 | 
 68 |     f = make_func('x=1')
 69 |     assert check_valid(f)
 70 |     assert check_valid(f, 1)
 71 |     assert check_valid(f, x=1)
 72 |     assert check_valid(f, 1, x=2) is False
 73 |     assert check_valid(f, 1, y=2) is False
 74 |     assert check_valid(f, 1, 2) is False
 75 |     assert check_valid(f, x=1, y=2) is False
 76 | 
 77 |     f = make_func('*args')
 78 |     assert check_valid(f)
 79 |     assert check_valid(f, 1)
 80 |     assert check_valid(f, 1, 2)
 81 |     assert check_valid(f, x=1) is False
 82 | 
 83 |     f = make_func('**kwargs')
 84 |     assert check_valid(f)
 85 |     assert check_valid(f, x=1)
 86 |     assert check_valid(f, x=1, y=2)
 87 |     assert check_valid(f, 1) is False
 88 | 
 89 |     f = make_func('x, *args')
 90 |     assert check_valid(f) is incomplete
 91 |     assert check_valid(f, 1)
 92 |     assert check_valid(f, 1, 2)
 93 |     assert check_valid(f, x=1)
 94 |     assert check_valid(f, 1, x=1) is False
 95 |     assert check_valid(f, 1, y=1) is False
 96 | 
 97 |     f = make_func('x, y=1, **kwargs')
 98 |     assert check_valid(f) is incomplete
 99 |     assert check_valid(f, 1)
100 |     assert check_valid(f, x=1)
101 |     assert check_valid(f, 1, 2)
102 |     assert check_valid(f, x=1, y=2, z=3)
103 |     assert check_valid(f, 1, 2, y=3) is False
104 | 
105 |     f = make_func('a, b, c=3, d=4')
106 |     assert check_valid(f) is incomplete
107 |     assert check_valid(f, 1) is incomplete
108 |     assert check_valid(f, 1, 2)
109 |     assert check_valid(f, 1, c=3) is incomplete
110 |     assert check_valid(f, 1, e=3) is False
111 |     assert check_valid(f, 1, 2, e=3) is False
112 |     assert check_valid(f, 1, 2, b=3) is False
113 | 
114 |     assert check_valid(1) is False
115 | 
116 | 
117 | def test_is_valid_py3(check_valid=is_valid_args, incomplete=False):
118 |     orig_check_valid = check_valid
119 |     check_valid = lambda func, *args, **kwargs: orig_check_valid(func, args, kwargs)
120 | 
121 |     f = make_func('x, *, y=1')
122 |     assert check_valid(f) is incomplete
123 |     assert check_valid(f, 1)
124 |     assert check_valid(f, x=1)
125 |     assert check_valid(f, 1, y=2)
126 |     assert check_valid(f, 1, 2) is False
127 |     assert check_valid(f, 1, z=2) is False
128 | 
129 |     f = make_func('x, *args, y=1')
130 |     assert check_valid(f) is incomplete
131 |     assert check_valid(f, 1)
132 |     assert check_valid(f, x=1)
133 |     assert check_valid(f, 1, y=2)
134 |     assert check_valid(f, 1, 2, y=2)
135 |     assert check_valid(f, 1, 2)
136 |     assert check_valid(f, 1, z=2) is False
137 | 
138 |     f = make_func('*, y=1')
139 |     assert check_valid(f)
140 |     assert check_valid(f, 1) is False
141 |     assert check_valid(f, y=1)
142 |     assert check_valid(f, z=1) is False
143 | 
144 |     f = make_func('x, *, y')
145 |     assert check_valid(f) is incomplete
146 |     assert check_valid(f, 1) is incomplete
147 |     assert check_valid(f, x=1) is incomplete
148 |     assert check_valid(f, 1, y=2)
149 |     assert check_valid(f, x=1, y=2)
150 |     assert check_valid(f, 1, 2) is False
151 |     assert check_valid(f, 1, z=2) is False
152 |     assert check_valid(f, 1, y=1, z=2) is False
153 | 
154 |     f = make_func('x=1, *, y, z=3')
155 |     assert check_valid(f) is incomplete
156 |     assert check_valid(f, 1, z=3) is incomplete
157 |     assert check_valid(f, y=2)
158 |     assert check_valid(f, 1, y=2)
159 |     assert check_valid(f, x=1, y=2)
160 |     assert check_valid(f, x=1, y=2, z=3)
161 |     assert check_valid(f, 1, x=1, y=2) is False
162 |     assert check_valid(f, 1, 3, y=2) is False
163 | 
164 |     f = make_func('w, x=2, *args, y, z=4')
165 |     assert check_valid(f) is incomplete
166 |     assert check_valid(f, 1) is incomplete
167 |     assert check_valid(f, 1, y=3)
168 | 
169 |     f = make_func('a, b, c=3, d=4, *args, e=5, f=6, g, h')
170 |     assert check_valid(f) is incomplete
171 |     assert check_valid(f, 1) is incomplete
172 |     assert check_valid(f, 1, 2) is incomplete
173 |     assert check_valid(f, 1, 2, g=7) is incomplete
174 |     assert check_valid(f, 1, 2, g=7, h=8)
175 |     assert check_valid(f, 1, 2, 3, 4, 5, 6, 7, 8, 9) is incomplete
176 | 
177 |     f = make_func('a: int, b: float')
178 |     assert check_valid(f) is incomplete
179 |     assert check_valid(f, 1) is incomplete
180 |     assert check_valid(f, b=1) is incomplete
181 |     assert check_valid(f, 1, 2)
182 | 
183 |     f = make_func('(a: int, b: float) -> float')
184 |     assert check_valid(f) is incomplete
185 |     assert check_valid(f, 1) is incomplete
186 |     assert check_valid(f, b=1) is incomplete
187 |     assert check_valid(f, 1, 2)
188 | 
189 |     f.__signature__ = 34
190 |     assert check_valid(f) is False
191 | 
192 |     class RaisesValueError:
193 |         def __call__(self):
194 |             pass
195 |         @property
196 |         def __signature__(self):
197 |             raise ValueError('Testing Python 3.4')
198 | 
199 |     f = RaisesValueError()
200 |     assert check_valid(f) is None
201 | 
202 | 
203 | def test_is_partial():
204 |     test_is_valid(check_valid=is_partial_args, incomplete=True)
205 |     test_is_valid_py3(check_valid=is_partial_args, incomplete=True)
206 | 
207 | 
208 | def test_is_valid_curry():
209 |     def check_curry(func, args, kwargs, incomplete=True):
210 |         try:
211 |             curry(func)(*args, **kwargs)
212 |             curry(func, *args)(**kwargs)
213 |             curry(func, **kwargs)(*args)
214 |             curry(func, *args, **kwargs)()
215 |             if not isinstance(func, type(lambda: None)):
216 |                 return None
217 |             return incomplete
218 |         except ValueError:
219 |             return True
220 |         except TypeError:
221 |             return False
222 | 
223 |     check_valid = functools.partial(check_curry, incomplete=True)
224 |     test_is_valid(check_valid=check_valid, incomplete=True)
225 |     test_is_valid_py3(check_valid=check_valid, incomplete=True)
226 | 
227 |     check_valid = functools.partial(check_curry, incomplete=False)
228 |     test_is_valid(check_valid=check_valid, incomplete=False)
229 |     test_is_valid_py3(check_valid=check_valid, incomplete=False)
230 | 
231 | 
232 | def test_func_keyword():
233 |     def f(func=None):
234 |         pass
235 |     assert is_valid_args(f, (), {})
236 |     assert is_valid_args(f, (None,), {})
237 |     assert is_valid_args(f, (), {'func': None})
238 |     assert is_valid_args(f, (None,), {'func': None}) is False
239 |     assert is_partial_args(f, (), {})
240 |     assert is_partial_args(f, (None,), {})
241 |     assert is_partial_args(f, (), {'func': None})
242 |     assert is_partial_args(f, (None,), {'func': None}) is False
243 | 
244 | 
245 | def test_has_unknown_args():
246 |     assert has_varargs(1) is False
247 |     assert has_varargs(map)
248 |     assert has_varargs(make_func('')) is False
249 |     assert has_varargs(make_func('x, y, z')) is False
250 |     assert has_varargs(make_func('*args'))
251 |     assert has_varargs(make_func('**kwargs')) is False
252 |     assert has_varargs(make_func('x, y, *args, **kwargs'))
253 |     assert has_varargs(make_func('x, y, z=1')) is False
254 |     assert has_varargs(make_func('x, y, z=1, **kwargs')) is False
255 | 
256 |     f = make_func('*args')
257 |     f.__signature__ = 34
258 |     assert has_varargs(f) is False
259 | 
260 |     class RaisesValueError:
261 |         def __call__(self):
262 |             pass
263 |         @property
264 |         def __signature__(self):
265 |             raise ValueError('Testing Python 3.4')
266 | 
267 |     f = RaisesValueError()
268 |     assert has_varargs(f) is None
269 | 
270 | 
271 | def test_num_required_args():
272 |     assert num_required_args(lambda: None) == 0
273 |     assert num_required_args(lambda x: None) == 1
274 |     assert num_required_args(lambda x, *args: None) == 1
275 |     assert num_required_args(lambda x, **kwargs: None) == 1
276 |     assert num_required_args(lambda x, y, *args, **kwargs: None) == 2
277 |     assert num_required_args(map) == 2
278 |     assert num_required_args(dict) is None
279 | 
280 | 
281 | def test_has_keywords():
282 |     assert has_keywords(lambda: None) is False
283 |     assert has_keywords(lambda x: None) is False
284 |     assert has_keywords(lambda x=1: None)
285 |     assert has_keywords(lambda **kwargs: None)
286 |     assert has_keywords(int)
287 |     assert has_keywords(sorted)
288 |     assert has_keywords(max)
289 |     # map gained `strict=False` keyword in Python 3.14
290 |     assert has_keywords(map) == (sys.version_info[1] >= 14)
291 |     assert has_keywords(bytearray) is None
292 | 
293 | 
294 | def test_has_varargs():
295 |     assert has_varargs(lambda: None) is False
296 |     assert has_varargs(lambda *args: None)
297 |     assert has_varargs(lambda **kwargs: None) is False
298 |     assert has_varargs(map)
299 |     assert has_varargs(max) is None
300 | 
301 | 
302 | def test_is_arity():
303 |     assert is_arity(0, lambda: None)
304 |     assert is_arity(1, lambda: None) is False
305 |     assert is_arity(1, lambda x: None)
306 |     assert is_arity(3, lambda x, y, z: None)
307 |     assert is_arity(1, lambda x, *args: None) is False
308 |     assert is_arity(1, lambda x, **kwargs: None) is False
309 |     assert is_arity(1, all)
310 |     assert is_arity(2, map) is False
311 |     assert is_arity(2, range) is None
312 | 
313 | 
314 | def test_introspect_curry_valid_py3(check_valid=is_valid_args, incomplete=False):
315 |     orig_check_valid = check_valid
316 |     check_valid = lambda _func, *args, **kwargs: orig_check_valid(_func, args, kwargs)
317 | 
318 |     f = toolz.curry(make_func('x, y, z=0'))
319 |     assert check_valid(f)
320 |     assert check_valid(f, 1)
321 |     assert check_valid(f, 1, 2)
322 |     assert check_valid(f, 1, 2, 3)
323 |     assert check_valid(f, 1, 2, 3, 4) is False
324 |     assert check_valid(f, invalid_keyword=True) is False
325 |     assert check_valid(f(1))
326 |     assert check_valid(f(1), 2)
327 |     assert check_valid(f(1), 2, 3)
328 |     assert check_valid(f(1), 2, 3, 4) is False
329 |     assert check_valid(f(1), x=2) is False
330 |     assert check_valid(f(1), y=2)
331 |     assert check_valid(f(x=1), 2) is False
332 |     assert check_valid(f(x=1), y=2)
333 |     assert check_valid(f(y=2), 1)
334 |     assert check_valid(f(y=2), 1, z=3)
335 |     assert check_valid(f(y=2), 1, 3) is False
336 | 
337 |     f = toolz.curry(make_func('x, y, z=0'), 1, x=1)
338 |     assert check_valid(f) is False
339 |     assert check_valid(f, z=3) is False
340 | 
341 |     f = toolz.curry(make_func('x, y, *args, z'))
342 |     assert check_valid(f)
343 |     assert check_valid(f, 0)
344 |     assert check_valid(f(1), 0)
345 |     assert check_valid(f(1, 2), 0)
346 |     assert check_valid(f(1, 2, 3), 0)
347 |     assert check_valid(f(1, 2, 3, 4), 0)
348 |     assert check_valid(f(1, 2, 3, 4), z=4)
349 |     assert check_valid(f(x=1))
350 |     assert check_valid(f(x=1), 1) is False
351 |     assert check_valid(f(x=1), y=2)
352 | 
353 | 
354 | def test_introspect_curry_partial_py3():
355 |     test_introspect_curry_valid_py3(check_valid=is_partial_args, incomplete=True)
356 | 
357 | 
358 | def test_introspect_curry_py3():
359 |     f = toolz.curry(make_func(''))
360 |     assert num_required_args(f) == 0
361 |     assert is_arity(0, f)
362 |     assert has_varargs(f) is False
363 |     assert has_keywords(f) is False
364 | 
365 |     f = toolz.curry(make_func('x'))
366 |     assert num_required_args(f) == 0
367 |     assert is_arity(0, f) is False
368 |     assert is_arity(1, f) is False
369 |     assert has_varargs(f) is False
370 |     assert has_keywords(f)  # A side-effect of being curried
371 | 
372 |     f = toolz.curry(make_func('x, y, z=0'))
373 |     assert num_required_args(f) == 0
374 |     assert is_arity(0, f) is False
375 |     assert is_arity(1, f) is False
376 |     assert is_arity(2, f) is False
377 |     assert is_arity(3, f) is False
378 |     assert has_varargs(f) is False
379 |     assert has_keywords(f)
380 | 
381 |     f = toolz.curry(make_func('*args, **kwargs'))
382 |     assert num_required_args(f) == 0
383 |     assert has_varargs(f)
384 |     assert has_keywords(f)
385 | 
386 | 
387 | def test_introspect_builtin_modules():
388 |     mods = [builtins, functools, itertools, operator, toolz,
389 |             toolz.functoolz, toolz.itertoolz, toolz.dicttoolz, toolz.recipes]
390 | 
391 |     denylist = set()
392 | 
393 |     def add_denylist(mod, attr):
394 |         if hasattr(mod, attr):
395 |             denylist.add(getattr(mod, attr))
396 | 
397 |     add_denylist(builtins, 'basestring')
398 |     add_denylist(builtins, 'NoneType')
399 |     add_denylist(builtins, '__metaclass__')
400 |     add_denylist(builtins, 'sequenceiterator')
401 | 
402 |     def is_missing(modname, name, func):
403 |         if name.startswith('_') and not name.startswith('__'):
404 |             return False
405 |         if name.startswith('__pyx_unpickle_') or name.endswith('_cython__'):
406 |             return False
407 |         try:
408 |             if issubclass(func, BaseException):
409 |                 return False
410 |         except TypeError:
411 |             pass
412 |         try:
413 |             return (callable(func)
414 |                     and func.__module__ is not None
415 |                     and modname in func.__module__
416 |                     and is_partial_args(func, (), {}) is not True
417 |                     and func not in denylist)
418 |         except AttributeError:
419 |             return False
420 | 
421 |     missing = {}
422 |     for mod in mods:
423 |         modname = mod.__name__
424 |         for name, func in vars(mod).items():
425 |             if is_missing(modname, name, func):
426 |                 if modname not in missing:
427 |                     missing[modname] = []
428 |                 missing[modname].append(name)
429 |     if missing:
430 |         messages = []
431 |         for modname, names in sorted(missing.items()):
432 |             msg = '{}:\n    {}'.format(modname, '\n    '.join(sorted(names)))
433 |             messages.append(msg)
434 |         message = 'Missing introspection for the following callables:\n\n'
435 |         raise AssertionError(message + '\n\n'.join(messages))
436 | 
437 | 
438 | def test_inspect_signature_property():
439 | 
440 |     # By adding AddX to our signature registry, we can inspect the class
441 |     # itself and objects of the class.  `inspect.signature` doesn't like
442 |     # it when `obj.__signature__` is a property.
443 |     class AddX:
444 |         def __init__(self, func):
445 |             self.func = func
446 | 
447 |         def __call__(self, addx, *args, **kwargs):
448 |             return addx + self.func(*args, **kwargs)
449 | 
450 |         @property
451 |         def __signature__(self):
452 |             sig = inspect.signature(self.func)
453 |             params = list(sig.parameters.values())
454 |             kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
455 |             newparam = inspect.Parameter('addx', kind)
456 |             params = [newparam] + params
457 |             return sig.replace(parameters=params)
458 | 
459 |     addx = AddX(lambda x: x)
460 |     sig = inspect.signature(addx)
461 |     assert sig == inspect.Signature(parameters=[
462 |         inspect.Parameter('addx', inspect.Parameter.POSITIONAL_OR_KEYWORD),
463 |         inspect.Parameter('x', inspect.Parameter.POSITIONAL_OR_KEYWORD)])
464 | 
465 |     assert num_required_args(AddX) is False
466 |     _sigs.signatures[AddX] = (_sigs.expand_sig((0, lambda func: None)),)
467 |     assert num_required_args(AddX) == 1
468 |     del _sigs.signatures[AddX]
469 | 
470 | 
471 | def test_inspect_wrapped_property():
472 |     class Wrapped:
473 |         def __init__(self, func):
474 |             self.func = func
475 | 
476 |         def __call__(self, *args, **kwargs):
477 |             return self.func(*args, **kwargs)
478 | 
479 |         @property
480 |         def __wrapped__(self):
481 |             return self.func
482 | 
483 |     func = lambda x: x
484 |     wrapped = Wrapped(func)
485 |     assert inspect.signature(func) == inspect.signature(wrapped)
486 | 
487 |     # inspect.signature did not used to work properly on wrappers,
488 |     # but it was fixed in Python 3.11.9, Python 3.12.3 and Python
489 |     # 3.13+
490 |     inspectbroken = True
491 |     if sys.version_info.major > 3:
492 |         inspectbroken = False
493 |     if sys.version_info.minor == 11 and sys.version_info.micro > 8:
494 |         inspectbroken = False
495 |     if sys.version_info.minor == 12 and sys.version_info.micro > 2:
496 |         inspectbroken = False
497 |     if sys.version_info.minor > 12:
498 |         inspectbroken = False
499 | 
500 |     if inspectbroken:
501 |         assert num_required_args(Wrapped) is None
502 |         _sigs.signatures[Wrapped] = (_sigs.expand_sig((0, lambda func: None)),)
503 | 
504 |     assert num_required_args(Wrapped) == 1
505 | 


--------------------------------------------------------------------------------
/toolz/tests/test_itertoolz.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from itertools import starmap
  3 | from toolz.utils import raises
  4 | from functools import partial
  5 | from random import Random
  6 | from pickle import dumps, loads
  7 | from toolz.itertoolz import (remove, groupby, merge_sorted,
  8 |                              concat, concatv, interleave, unique,
  9 |                              isiterable, getter,
 10 |                              mapcat, isdistinct, first, second,
 11 |                              nth, take, tail, drop, interpose, get,
 12 |                              rest, last, cons, frequencies,
 13 |                              reduceby, iterate, accumulate,
 14 |                              sliding_window, count, partition,
 15 |                              partition_all, take_nth, pluck, join,
 16 |                              diff, topk, peek, peekn, random_sample)
 17 | from operator import add, mul
 18 | 
 19 | 
 20 | # is comparison will fail between this and no_default
 21 | no_default2 = loads(dumps('__no__default__'))
 22 | 
 23 | 
 24 | def identity(x):
 25 |     return x
 26 | 
 27 | 
 28 | def iseven(x):
 29 |     return x % 2 == 0
 30 | 
 31 | 
 32 | def isodd(x):
 33 |     return x % 2 == 1
 34 | 
 35 | 
 36 | def inc(x):
 37 |     return x + 1
 38 | 
 39 | 
 40 | def double(x):
 41 |     return 2 * x
 42 | 
 43 | 
 44 | def test_remove():
 45 |     r = remove(iseven, range(5))
 46 |     assert type(r) is not list
 47 |     assert list(r) == list(filter(isodd, range(5)))
 48 | 
 49 | 
 50 | def test_groupby():
 51 |     assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]}
 52 | 
 53 | 
 54 | def test_groupby_non_callable():
 55 |     assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
 56 |         {1: [(1, 2), (1, 3)],
 57 |          2: [(2, 2), (2, 4)]}
 58 | 
 59 |     assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
 60 |         {(1,): [(1, 2), (1, 3)],
 61 |          (2,): [(2, 2), (2, 4)]}
 62 | 
 63 |     assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
 64 |         {(1, 1): [(1, 2), (1, 3)],
 65 |          (2, 2): [(2, 2), (2, 4)]}
 66 | 
 67 | 
 68 | def test_merge_sorted():
 69 |     assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3]
 70 |     assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6]
 71 |     assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4]
 72 |     assert list(merge_sorted([5, 3, 1], [6, 4, 3], [],
 73 |                              key=lambda x: -x)) == [6, 5, 4, 3, 3, 1]
 74 |     assert list(merge_sorted([2, 1, 3], [1, 2, 3],
 75 |                              key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3]
 76 |     assert list(merge_sorted([2, 3], [1, 3],
 77 |                              key=lambda x: x // 3)) == [2, 1, 3, 3]
 78 |     assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc'
 79 |     assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc'
 80 |     assert ''.join(merge_sorted('cba', 'cba', 'cba',
 81 |                                 key=lambda x: -ord(x))) == 'cccbbbaaa'
 82 |     assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4]
 83 | 
 84 |     data = [[(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)],
 85 |             [(9, 1), (9, 8), (9, 9)]]
 86 |     assert list(merge_sorted(*data, key=lambda x: x[1])) == [
 87 |         (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9)]
 88 |     assert list(merge_sorted()) == []
 89 |     assert list(merge_sorted([1, 2, 3])) == [1, 2, 3]
 90 |     assert list(merge_sorted([1, 4, 5], [2, 3])) == [1, 2, 3, 4, 5]
 91 |     assert list(merge_sorted([1, 4, 5], [2, 3], key=identity)) == [
 92 |         1, 2, 3, 4, 5]
 93 |     assert list(merge_sorted([1, 5], [2], [4, 7], [3, 6], key=identity)) == [
 94 |         1, 2, 3, 4, 5, 6, 7]
 95 | 
 96 | 
 97 | def test_interleave():
 98 |     assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3'
 99 |     assert ''.join(interleave(('ABC', '1'))) == 'A1BC'
100 | 
101 | 
102 | def test_unique():
103 |     assert tuple(unique((1, 2, 3))) == (1, 2, 3)
104 |     assert tuple(unique((1, 2, 1, 3))) == (1, 2, 3)
105 |     assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2)
106 | 
107 | 
108 | def test_isiterable():
109 |     # objects that have a __iter__() or __getitem__() method are iterable
110 |     # https://docs.python.org/3/library/functions.html#iter
111 |     class IterIterable:
112 |         def __iter__(self):
113 |             return iter(["a", "b", "c"])
114 | 
115 |     class GetItemIterable:
116 |         def __getitem__(self, item):
117 |             return ["a", "b", "c"][item]
118 | 
119 |     # "if a class sets __iter__() to None, the class is not iterable"
120 |     # https://docs.python.org/3/reference/datamodel.html#special-method-names
121 |     class NotIterable:
122 |         __iter__ = None
123 | 
124 |     class NotIterableEvenWithGetItem:
125 |         __iter__ = None
126 | 
127 |         def __getitem__(self, item):
128 |             return ["a", "b", "c"][item]
129 | 
130 |     assert isiterable([1, 2, 3]) is True
131 |     assert isiterable('abc') is True
132 |     assert isiterable(IterIterable()) is True
133 |     assert isiterable(GetItemIterable()) is True
134 |     assert isiterable(5) is False
135 |     assert isiterable(NotIterable()) is False
136 |     assert isiterable(NotIterableEvenWithGetItem()) is False
137 | 
138 | 
139 | def test_isdistinct():
140 |     assert isdistinct([1, 2, 3]) is True
141 |     assert isdistinct([1, 2, 1]) is False
142 | 
143 |     assert isdistinct("Hello") is False
144 |     assert isdistinct("World") is True
145 | 
146 |     assert isdistinct(iter([1, 2, 3])) is True
147 |     assert isdistinct(iter([1, 2, 1])) is False
148 | 
149 | 
150 | def test_nth():
151 |     assert nth(2, 'ABCDE') == 'C'
152 |     assert nth(2, iter('ABCDE')) == 'C'
153 |     assert nth(1, (3, 2, 1)) == 2
154 |     assert nth(0, {'foo': 'bar'}) == 'foo'
155 |     assert raises(StopIteration, lambda: nth(10, {10: 'foo'}))
156 |     assert nth(-2, 'ABCDE') == 'D'
157 |     assert raises(ValueError, lambda: nth(-2, iter('ABCDE')))
158 | 
159 | 
160 | def test_first():
161 |     assert first('ABCDE') == 'A'
162 |     assert first((3, 2, 1)) == 3
163 |     assert isinstance(first({0: 'zero', 1: 'one'}), int)
164 | 
165 | 
166 | def test_second():
167 |     assert second('ABCDE') == 'B'
168 |     assert second((3, 2, 1)) == 2
169 |     assert isinstance(second({0: 'zero', 1: 'one'}), int)
170 | 
171 | 
172 | def test_last():
173 |     assert last('ABCDE') == 'E'
174 |     assert last((3, 2, 1)) == 1
175 |     assert isinstance(last({0: 'zero', 1: 'one'}), int)
176 | 
177 | 
178 | def test_rest():
179 |     assert list(rest('ABCDE')) == list('BCDE')
180 |     assert list(rest((3, 2, 1))) == list((2, 1))
181 | 
182 | 
183 | def test_take():
184 |     assert list(take(3, 'ABCDE')) == list('ABC')
185 |     assert list(take(2, (3, 2, 1))) == list((3, 2))
186 | 
187 | 
188 | def test_tail():
189 |     assert list(tail(3, 'ABCDE')) == list('CDE')
190 |     assert list(tail(3, iter('ABCDE'))) == list('CDE')
191 |     assert list(tail(2, (3, 2, 1))) == list((2, 1))
192 | 
193 | 
194 | def test_drop():
195 |     assert list(drop(3, 'ABCDE')) == list('DE')
196 |     assert list(drop(1, (3, 2, 1))) == list((2, 1))
197 | 
198 | 
199 | def test_take_nth():
200 |     assert list(take_nth(2, 'ABCDE')) == list('ACE')
201 | 
202 | 
203 | def test_get():
204 |     assert get(1, 'ABCDE') == 'B'
205 |     assert list(get([1, 3], 'ABCDE')) == list('BD')
206 |     assert get('a', {'a': 1, 'b': 2, 'c': 3}) == 1
207 |     assert get(['a', 'b'], {'a': 1, 'b': 2, 'c': 3}) == (1, 2)
208 | 
209 |     assert get('foo', {}, default='bar') == 'bar'
210 |     assert get({}, [1, 2, 3], default='bar') == 'bar'
211 |     assert get([0, 2], 'AB', 'C') == ('A', 'C')
212 | 
213 |     assert get([0], 'AB') == ('A',)
214 |     assert get([], 'AB') == ()
215 | 
216 |     assert raises(IndexError, lambda: get(10, 'ABC'))
217 |     assert raises(KeyError, lambda: get(10, {'a': 1}))
218 |     assert raises(TypeError, lambda: get({}, [1, 2, 3]))
219 |     assert raises(TypeError, lambda: get([1, 2, 3], 1, None))
220 |     assert raises(KeyError, lambda: get('foo', {}, default=no_default2))
221 | 
222 | 
223 | def test_mapcat():
224 |     assert (list(mapcat(identity, [[1, 2, 3], [4, 5, 6]])) ==
225 |             [1, 2, 3, 4, 5, 6])
226 | 
227 |     assert (list(mapcat(reversed, [[3, 2, 1, 0], [6, 5, 4], [9, 8, 7]])) ==
228 |             list(range(10)))
229 | 
230 |     inc = lambda i: i + 1
231 |     assert ([4, 5, 6, 7, 8, 9] ==
232 |             list(mapcat(partial(map, inc), [[3, 4, 5], [6, 7, 8]])))
233 | 
234 | 
235 | def test_cons():
236 |     assert list(cons(1, [2, 3])) == [1, 2, 3]
237 | 
238 | 
239 | def test_concat():
240 |     assert list(concat([[], [], []])) == []
241 |     assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) ==
242 |             ['a', 'b', 0, 1, 2])
243 | 
244 | 
245 | def test_concatv():
246 |     assert list(concatv([], [], [])) == []
247 |     assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) ==
248 |             ['a', 'b', 0, 1, 2])
249 | 
250 | 
251 | def test_interpose():
252 |     assert "a" == first(rest(interpose("a", range(1000000000))))
253 |     assert "tXaXrXzXaXn" == "".join(interpose("X", "tarzan"))
254 |     assert list(interpose(0, itertools.repeat(1, 4))) == [1, 0, 1, 0, 1, 0, 1]
255 |     assert list(interpose('.', ['a', 'b', 'c'])) == ['a', '.', 'b', '.', 'c']
256 | 
257 | 
258 | def test_frequencies():
259 |     assert (frequencies(["cat", "pig", "cat", "eel",
260 |                         "pig", "dog", "dog", "dog"]) ==
261 |             {"cat": 2, "eel": 1, "pig": 2, "dog": 3})
262 |     assert frequencies([]) == {}
263 |     assert frequencies("onomatopoeia") == {"a": 2, "e": 1, "i": 1, "m": 1,
264 |                                            "o": 4, "n": 1, "p": 1, "t": 1}
265 | 
266 | 
267 | def test_reduceby():
268 |     data = [1, 2, 3, 4, 5]
269 |     iseven = lambda x: x % 2 == 0
270 |     assert reduceby(iseven, add, data, 0) == {False: 9, True: 6}
271 |     assert reduceby(iseven, mul, data, 1) == {False: 15, True: 8}
272 | 
273 |     projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
274 |                 {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
275 |                 {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
276 |                 {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
277 |     assert reduceby(lambda x: x['state'],
278 |                     lambda acc, x: acc + x['cost'],
279 |                     projects, 0) == {'CA': 1200000, 'IL': 2100000}
280 | 
281 |     assert reduceby('state',
282 |                     lambda acc, x: acc + x['cost'],
283 |                     projects, 0) == {'CA': 1200000, 'IL': 2100000}
284 | 
285 | 
286 | def test_reduce_by_init():
287 |     assert reduceby(iseven, add, [1, 2, 3, 4]) == {True: 2 + 4, False: 1 + 3}
288 |     assert reduceby(iseven, add, [1, 2, 3, 4], no_default2) == {True: 2 + 4,
289 |                                                                 False: 1 + 3}
290 | 
291 | 
292 | def test_reduce_by_callable_default():
293 |     def set_add(s, i):
294 |         s.add(i)
295 |         return s
296 | 
297 |     assert reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2], set) == \
298 |         {True: {2, 4}, False: {1, 3}}
299 | 
300 | 
301 | def test_iterate():
302 |     assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4]
303 |     assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8]
304 | 
305 | 
306 | def test_accumulate():
307 |     assert list(accumulate(add, [1, 2, 3, 4, 5])) == [1, 3, 6, 10, 15]
308 |     assert list(accumulate(mul, [1, 2, 3, 4, 5])) == [1, 2, 6, 24, 120]
309 |     assert list(accumulate(add, [1, 2, 3, 4, 5], -1)) == [-1, 0, 2, 5, 9, 14]
310 | 
311 |     def binop(a, b):
312 |         raise AssertionError('binop should not be called')
313 | 
314 |     start = object()
315 |     assert list(accumulate(binop, [], start)) == [start]
316 |     assert list(accumulate(binop, [])) == []
317 |     assert list(accumulate(add, [1, 2, 3], no_default2)) == [1, 3, 6]
318 | 
319 | 
320 | def test_accumulate_works_on_consumable_iterables():
321 |     assert list(accumulate(add, iter((1, 2, 3)))) == [1, 3, 6]
322 | 
323 | 
324 | def test_sliding_window():
325 |     assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)]
326 |     assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)]
327 | 
328 | 
329 | def test_sliding_window_of_short_iterator():
330 |     assert list(sliding_window(3, [1, 2])) == []
331 |     assert list(sliding_window(7, [1, 2])) == []
332 | 
333 | 
334 | def test_partition():
335 |     assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)]
336 |     assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)]
337 |     assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2),
338 |                                                     (3, -1, -1)]
339 |     assert list(partition(2, [])) == []
340 | 
341 | 
342 | def test_partition_all():
343 |     assert list(partition_all(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)]
344 |     assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)]
345 |     assert list(partition_all(2, [])) == []
346 | 
347 |     # Regression test: https://github.com/pytoolz/toolz/issues/387
348 |     class NoCompare:
349 |         def __eq__(self, other):
350 |             if self.__class__ == other.__class__:
351 |                 return True
352 |             raise ValueError()
353 |     obj = NoCompare()
354 |     result = [(obj, obj, obj, obj), (obj, obj, obj)]
355 |     assert list(partition_all(4, [obj]*7)) == result
356 |     assert list(partition_all(4, iter([obj]*7))) == result
357 | 
358 |     # Test invalid __len__: https://github.com/pytoolz/toolz/issues/602
359 |     class ListWithBadLength(list):
360 |         def __init__(self, contents, off_by=1):
361 |             self.off_by = off_by
362 |             super().__init__(contents)
363 | 
364 |         def __len__(self):
365 |             return super().__len__() + self.off_by
366 | 
367 |     too_long_list = ListWithBadLength([1, 2], off_by=+1)
368 |     assert raises(LookupError, lambda: list(partition_all(5, too_long_list)))
369 |     too_short_list = ListWithBadLength([1, 2], off_by=-1)
370 |     assert raises(LookupError, lambda: list(partition_all(5, too_short_list)))
371 | 
372 | 
373 | def test_count():
374 |     assert count((1, 2, 3)) == 3
375 |     assert count([]) == 0
376 |     assert count(iter((1, 2, 3, 4))) == 4
377 | 
378 |     assert count('hello') == 5
379 |     assert count(iter('hello')) == 5
380 | 
381 | 
382 | def test_pluck():
383 |     assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4]
384 |     assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)]
385 |     assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1]
386 | 
387 |     data = [{'id': 1, 'name': 'cheese'}, {'id': 2, 'name': 'pies', 'price': 1}]
388 |     assert list(pluck('id', data)) == [1, 2]
389 |     assert list(pluck('price', data, 0)) == [0, 1]
390 |     assert list(pluck(['id', 'name'], data)) == [(1, 'cheese'), (2, 'pies')]
391 |     assert list(pluck(['name'], data)) == [('cheese',), ('pies',)]
392 |     assert list(pluck(['price', 'other'], data, 0)) == [(0, 0), (1, 0)]
393 | 
394 |     assert raises(IndexError, lambda: list(pluck(1, [[0]])))
395 |     assert raises(KeyError, lambda: list(pluck('name', [{'id': 1}])))
396 | 
397 |     assert list(pluck(0, [[0, 1], [2, 3], [4, 5]], no_default2)) == [0, 2, 4]
398 |     assert raises(IndexError, lambda: list(pluck(1, [[0]], no_default2)))
399 | 
400 | 
401 | def test_join():
402 |     names = [(1, 'one'), (2, 'two'), (3, 'three')]
403 |     fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)]
404 | 
405 |     def addpair(pair):
406 |         return pair[0] + pair[1]
407 | 
408 |     result = set(starmap(add, join(first, names, second, fruit)))
409 | 
410 |     expected = {(1, 'one', 'apple', 1),
411 |                     (1, 'one', 'orange', 1),
412 |                     (2, 'two', 'banana', 2),
413 |                     (2, 'two', 'coconut', 2)}
414 | 
415 |     assert result == expected
416 | 
417 |     result = set(starmap(add, join(first, names, second, fruit,
418 |                                    left_default=no_default2,
419 |                                    right_default=no_default2)))
420 |     assert result == expected
421 | 
422 | 
423 | def test_getter():
424 |     assert getter(0)('Alice') == 'A'
425 |     assert getter([0])('Alice') == ('A',)
426 |     assert getter([])('Alice') == ()
427 | 
428 | 
429 | def test_key_as_getter():
430 |     squares = [(i, i**2) for i in range(5)]
431 |     pows = [(i, i**2, i**3) for i in range(5)]
432 | 
433 |     assert set(join(0, squares, 0, pows)) == set(join(lambda x: x[0], squares,
434 |                                                       lambda x: x[0], pows))
435 | 
436 |     get = lambda x: (x[0], x[1])
437 |     assert set(join([0, 1], squares, [0, 1], pows)) == set(join(get, squares,
438 |                                                                 get, pows))
439 | 
440 |     get = lambda x: (x[0],)
441 |     assert set(join([0], squares, [0], pows)) == set(join(get, squares,
442 |                                                           get, pows))
443 | 
444 | 
445 | def test_join_double_repeats():
446 |     names = [(1, 'one'), (2, 'two'), (3, 'three'), (1, 'uno'), (2, 'dos')]
447 |     fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)]
448 | 
449 |     result = set(starmap(add, join(first, names, second, fruit)))
450 | 
451 |     expected = {(1, 'one', 'apple', 1),
452 |                     (1, 'one', 'orange', 1),
453 |                     (2, 'two', 'banana', 2),
454 |                     (2, 'two', 'coconut', 2),
455 |                     (1, 'uno', 'apple', 1),
456 |                     (1, 'uno', 'orange', 1),
457 |                     (2, 'dos', 'banana', 2),
458 |                     (2, 'dos', 'coconut', 2)}
459 | 
460 |     assert result == expected
461 | 
462 | 
463 | def test_join_missing_element():
464 |     names = [(1, 'one'), (2, 'two'), (3, 'three')]
465 |     fruit = [('apple', 5), ('orange', 1)]
466 | 
467 |     result = set(starmap(add, join(first, names, second, fruit)))
468 | 
469 |     expected = {(1, 'one', 'orange', 1)}
470 | 
471 |     assert result == expected
472 | 
473 | 
474 | def test_left_outer_join():
475 |     result = set(join(identity, [1, 2], identity, [2, 3], left_default=None))
476 |     expected = {(2, 2), (None, 3)}
477 | 
478 |     assert result == expected
479 | 
480 | 
481 | def test_right_outer_join():
482 |     result = set(join(identity, [1, 2], identity, [2, 3], right_default=None))
483 |     expected = {(2, 2), (1, None)}
484 | 
485 |     assert result == expected
486 | 
487 | 
488 | def test_outer_join():
489 |     result = set(join(identity, [1, 2], identity, [2, 3],
490 |                       left_default=None, right_default=None))
491 |     expected = {(2, 2), (1, None), (None, 3)}
492 | 
493 |     assert result == expected
494 | 
495 | 
496 | def test_diff():
497 |     assert raises(TypeError, lambda: list(diff()))
498 |     assert raises(TypeError, lambda: list(diff([1, 2])))
499 |     assert raises(TypeError, lambda: list(diff([1, 2], 3)))
500 |     assert list(diff([1, 2], (1, 2), iter([1, 2]))) == []
501 |     assert list(diff([1, 2, 3], (1, 10, 3), iter([1, 2, 10]))) == [
502 |         (2, 10, 2), (3, 3, 10)]
503 |     assert list(diff([1, 2], [10])) == [(1, 10)]
504 |     assert list(diff([1, 2], [10], default=None)) == [(1, 10), (2, None)]
505 |     # non-variadic usage
506 |     assert raises(TypeError, lambda: list(diff([])))
507 |     assert raises(TypeError, lambda: list(diff([[]])))
508 |     assert raises(TypeError, lambda: list(diff([[1, 2]])))
509 |     assert raises(TypeError, lambda: list(diff([[1, 2], 3])))
510 |     assert list(diff([(1, 2), (1, 3)])) == [(2, 3)]
511 | 
512 |     data1 = [{'cost': 1, 'currency': 'dollar'},
513 |              {'cost': 2, 'currency': 'dollar'}]
514 | 
515 |     data2 = [{'cost': 100, 'currency': 'yen'},
516 |              {'cost': 300, 'currency': 'yen'}]
517 | 
518 |     conversions = {'dollar': 1, 'yen': 0.01}
519 | 
520 |     def indollars(item):
521 |         return conversions[item['currency']] * item['cost']
522 | 
523 |     list(diff(data1, data2, key=indollars)) == [
524 |         ({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})]
525 | 
526 | 
527 | def test_topk():
528 |     assert topk(2, [4, 1, 5, 2]) == (5, 4)
529 |     assert topk(2, [4, 1, 5, 2], key=lambda x: -x) == (1, 2)
530 |     assert topk(2, iter([5, 1, 4, 2]), key=lambda x: -x) == (1, 2)
531 | 
532 |     assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9},
533 |                     {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='a') == \
534 |         ({'a': 10, 'b': 1}, {'a': 9, 'b': 2})
535 | 
536 |     assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9},
537 |                     {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='b') == \
538 |         ({'a': 1, 'b': 10}, {'a': 2, 'b': 9})
539 |     assert topk(2, [(0, 4), (1, 3), (2, 2), (3, 1), (4, 0)], 0) == \
540 |         ((4, 0), (3, 1))
541 | 
542 | 
543 | def test_topk_is_stable():
544 |     assert topk(4, [5, 9, 2, 1, 5, 3], key=lambda x: 1) == (5, 9, 2, 1)
545 | 
546 | 
547 | def test_peek():
548 |     alist = ["Alice", "Bob", "Carol"]
549 |     element, blist = peek(alist)
550 |     assert element == alist[0]
551 |     assert list(blist) == alist
552 | 
553 |     assert raises(StopIteration, lambda: peek([]))
554 | 
555 | 
556 | def test_peekn():
557 |     alist = ("Alice", "Bob", "Carol")
558 |     elements, blist = peekn(2, alist)
559 |     assert elements == alist[:2]
560 |     assert tuple(blist) == alist
561 | 
562 |     elements, blist = peekn(len(alist) * 4, alist)
563 |     assert elements == alist
564 |     assert tuple(blist) == alist
565 | 
566 | 
567 | def test_random_sample():
568 |     alist = list(range(100))
569 | 
570 |     assert list(random_sample(prob=1, seq=alist, random_state=2016)) == alist
571 | 
572 |     mk_rsample = lambda rs=1: list(random_sample(prob=0.1,
573 |                                                  seq=alist,
574 |                                                  random_state=rs))
575 |     rsample1 = mk_rsample()
576 |     assert rsample1 == mk_rsample()
577 | 
578 |     rsample2 = mk_rsample(1984)
579 |     randobj = Random(1984)
580 |     assert rsample2 == mk_rsample(randobj)
581 | 
582 |     assert rsample1 != rsample2
583 | 
584 |     assert mk_rsample(hash(object)) == mk_rsample(hash(object))
585 |     assert mk_rsample(hash(object)) != mk_rsample(hash(object()))
586 |     assert mk_rsample(b"a") == mk_rsample("a")
587 | 
588 |     assert raises(TypeError, lambda: mk_rsample([]))
589 | 


--------------------------------------------------------------------------------