├── .binstar.yml ├── .coveragerc ├── .gitignore ├── .travis.yml ├── AUTHORS.md ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── bench ├── test_curry.py ├── test_curry_baseline.py ├── test_first.py ├── test_first_iter.py ├── test_frequencies.py ├── test_get.py ├── test_get_list.py ├── test_groupby.py ├── test_join.py ├── test_memoize.py ├── test_memoize_kwargs.py ├── test_pluck.py ├── test_sliding_window.py └── test_wordcount.py ├── conda.recipe ├── bld.bat ├── build.sh └── meta.yaml ├── doc ├── Makefile ├── make.bat └── source │ ├── api.rst │ ├── composition.rst │ ├── conf.py │ ├── control.rst │ ├── curry.rst │ ├── heritage.rst │ ├── index.rst │ ├── install.rst │ ├── laziness.rst │ ├── parallelism.rst │ ├── purity.rst │ ├── references.rst │ ├── streaming-analytics.rst │ └── tips-and-tricks.rst ├── examples ├── fib.py ├── graph.py └── wordcount.py ├── release-notes ├── setup.py └── toolz ├── __init__.py ├── compatibility.py ├── curried ├── __init__.py ├── exceptions.py └── operator.py ├── dicttoolz.py ├── functoolz.py ├── itertoolz.py ├── recipes.py ├── sandbox ├── __init__.py ├── core.py ├── parallel.py └── tests │ ├── test_core.py │ └── test_parallel.py ├── tests ├── test_compatibility.py ├── test_curried.py ├── test_dicttoolz.py ├── test_functoolz.py ├── test_itertoolz.py ├── test_recipes.py ├── test_serialization.py └── test_utils.py └── utils.py /.binstar.yml: -------------------------------------------------------------------------------- 1 | package: toolz 2 | platform: 3 | - linux-64 4 | - linux-32 5 | - osx-64 6 | - win-64 7 | - win-32 8 | engine: 9 | - python=2.6 10 | - python=2.7 11 | - python=3.3 12 | - python=3.4 13 | script: 14 | - conda build conda.recipe 15 | build_targets: 16 | files: conda 17 | channels: main 18 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | toolz/tests/test* 4 | toolz/*/tests/test* 5 | toolz/compatibility.py 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | build/ 3 | dist/ 4 | *.egg-info/ 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.6" 5 | - "2.7" 6 | - "3.3" 7 | - "3.4" 8 | - "3.5" 9 | - "pypy" 10 | - "pypy3" 11 | 12 | env: 13 | - PEP8_IGNORE="E731,W503" 14 | 15 | # command to install dependencies 16 | install: 17 | - pip install coverage pep8 18 | 19 | # command to run tests 20 | # require 100% coverage (not including test files) to pass Travis CI test 21 | # To skip pypy: - if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then DOSTUFF ; fi 22 | script: 23 | - coverage run --source=toolz $(which nosetests) 24 | --with-doctest 25 | - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage report --show-missing --fail-under=100 ; fi 26 | - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then pep8 --ignore=$PEP8_IGNORE --exclude=conf.py,tests,examples,bench -r --show-source . ; fi 27 | 28 | # load coverage status to https://coveralls.io 29 | after_success: 30 | - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then pip install coveralls --use-mirrors ; coveralls ; fi 31 | 32 | notifications: 33 | email: false 34 | -------------------------------------------------------------------------------- /AUTHORS.md: -------------------------------------------------------------------------------- 1 | [Matthew Rocklin](http://matthewrocklin.com) [@mrocklin](http://github.com/mrocklin/) 2 | 3 | [John Jacobsen](http://eigenhombre.com) [@eigenhombre](http://github.com/eigenhombre/) 4 | 5 | Erik Welch [@eriknw](https://github.com/eriknw/) 6 | 7 | John Crichton [@jcrichton](https://github.com/jcrichton/) 8 | 9 | Han Semaj [@microamp](https://github.com/microamp/) 10 | 11 | [Graeme Coupar](https://twitter.com/obmarg) [@obmarg](https://github.com/obmarg/) 12 | 13 | [Leonid Shvechikov](http://brainstorage.me/shvechikov) [@shvechikov](https://github.com/shvechikov) 14 | 15 | Lars Buitinck [@larsmans](http://github.com/larsmans) 16 | 17 | José Ricardo [@josericardo](https://github.com/josericardo) 18 | 19 | Tom Prince [@tomprince](https://github.com/tomprince) 20 | 21 | Bart van Merriënboer [@bartvm](https://github.com/bartvm) 22 | 23 | Nikolaos-Digenis Karagiannis [@digenis](https://github.com/digenis/) 24 | 25 | [Antonio Lima](https://twitter.com/themiurgo) [@themiurgo](https://github.com/themiurgo/) 26 | 27 | Joe Jevnik [@llllllllll](https://github.com/llllllllll) 28 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Matthew Rocklin 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | a. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | b. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | c. Neither the name of toolz nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 28 | DAMAGE. 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include toolz/tests/*.py 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Toolz 2 | ===== 3 | 4 | |Build Status| |Coverage Status| |Version Status| |Downloads| 5 | 6 | A set of utility functions for iterators, functions, and dictionaries. 7 | 8 | See the PyToolz documentation at http://toolz.readthedocs.org 9 | 10 | LICENSE 11 | ------- 12 | 13 | New BSD. See `License File `__. 14 | 15 | Install 16 | ------- 17 | 18 | ``toolz`` is on the Python Package Index (PyPI): 19 | 20 | :: 21 | 22 | pip install toolz 23 | 24 | or 25 | 26 | :: 27 | 28 | easy_install toolz 29 | 30 | Structure and Heritage 31 | ---------------------- 32 | 33 | ``toolz`` is implemented in three parts: 34 | 35 | |literal itertoolz|_, for operations on iterables. Examples: ``groupby``, 36 | ``unique``, ``interpose``, 37 | 38 | |literal functoolz|_, for higher-order functions. Examples: ``memoize``, 39 | ``curry``, ``compose`` 40 | 41 | |literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``, 42 | ``update-in``, ``merge``. 43 | 44 | .. |literal itertoolz| replace:: ``itertoolz`` 45 | .. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py 46 | 47 | .. |literal functoolz| replace:: ``functoolz`` 48 | .. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py 49 | 50 | .. |literal dicttoolz| replace:: ``dicttoolz`` 51 | .. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py 52 | 53 | These functions come from the legacy of functional languages for list 54 | processing. They interoperate well to accomplish common complex tasks. 55 | 56 | Read our `API 57 | Documentation `__ for 58 | more details. 59 | 60 | Example 61 | ------- 62 | 63 | This builds a standard wordcount function from pieces within ``toolz``: 64 | 65 | .. code:: python 66 | 67 | >>> def stem(word): 68 | ... """ Stem word to primitive form """ 69 | ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 70 | 71 | >>> from toolz import compose, frequencies, partial 72 | >>> wordcount = compose(frequencies, partial(map, stem), str.split) 73 | 74 | >>> sentence = "This cat jumped over this other cat!" 75 | >>> wordcount(sentence) 76 | {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} 77 | 78 | Dependencies 79 | ------------ 80 | 81 | ``toolz`` supports Python 2.6+ and Python 3.3+ with a common codebase. 82 | It is pure Python and requires no dependencies beyond the standard 83 | library. 84 | 85 | It is, in short, a light weight dependency. 86 | 87 | 88 | CyToolz 89 | ------- 90 | 91 | The ``toolz`` project has been reimplemented in `Cython `__. 92 | The ``cytoolz`` project is a drop-in replacement for the Pure Python 93 | implementation. 94 | See `CyToolz Github Page `__ for more 95 | details. 96 | 97 | See Also 98 | -------- 99 | 100 | - `Underscore.js `__: A similar library for 101 | JavaScript 102 | - `Enumerable `__: A 103 | similar library for Ruby 104 | - `Clojure `__: A functional language whose 105 | standard library has several counterparts in ``toolz`` 106 | - `itertools `__: The 107 | Python standard library for iterator tools 108 | - `functools `__: The 109 | Python standard library for function tools 110 | 111 | Contributions Welcome 112 | --------------------- 113 | 114 | ``toolz`` aims to be a repository for utility functions, particularly 115 | those that come from the functional programming and list processing 116 | traditions. We welcome contributions that fall within this scope. 117 | 118 | We also try to keep the API small to keep ``toolz`` manageable. The ideal 119 | contribution is significantly different from existing functions and has 120 | precedent in a few other functional systems. 121 | 122 | Please take a look at our 123 | `issue page `__ 124 | for contribution ideas. 125 | 126 | Community 127 | --------- 128 | 129 | See our `mailing list `__. 130 | We're friendly. 131 | 132 | .. |Build Status| image:: https://travis-ci.org/pytoolz/toolz.svg 133 | :target: https://travis-ci.org/pytoolz/toolz 134 | .. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg 135 | :target: https://coveralls.io/r/pytoolz/toolz 136 | .. |Version Status| image:: https://badge.fury.io/py/toolz.svg 137 | :target: http://badge.fury.io/py/toolz 138 | .. |Downloads| image:: https://img.shields.io/pypi/dm/toolz.svg 139 | :target: https://pypi.python.org/pypi/toolz/ 140 | -------------------------------------------------------------------------------- /bench/test_curry.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import get 2 | 3 | 4 | pairs = [(1, 2) for i in range(100000)] 5 | 6 | 7 | def test_get_curried(): 8 | first = get(0) 9 | for p in pairs: 10 | first(p) 11 | -------------------------------------------------------------------------------- /bench/test_curry_baseline.py: -------------------------------------------------------------------------------- 1 | from toolz import get 2 | from functools import partial 3 | 4 | 5 | pairs = [(1, 2) for i in range(100000)] 6 | 7 | 8 | def test_get(): 9 | first = partial(get, 0) 10 | for p in pairs: 11 | first(p) 12 | -------------------------------------------------------------------------------- /bench/test_first.py: -------------------------------------------------------------------------------- 1 | from toolz import first, second 2 | 3 | pairs = [(1, 2) for i in range(1000000)] 4 | 5 | 6 | def test_first(): 7 | for p in pairs: 8 | first(p) 9 | 10 | 11 | def test_second(): 12 | for p in pairs: 13 | second(p) 14 | -------------------------------------------------------------------------------- /bench/test_first_iter.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from toolz import first, second 3 | 4 | 5 | def test_first_iter(): 6 | iters = map(iter, [(1, 2) for i in range(1000000)]) 7 | for p in iters: 8 | first(p) 9 | 10 | 11 | def test_second_iter(): 12 | iters = map(iter, [(1, 2) for i in range(1000000)]) 13 | for p in iters: 14 | second(p) 15 | -------------------------------------------------------------------------------- /bench/test_frequencies.py: -------------------------------------------------------------------------------- 1 | from toolz import frequencies, identity 2 | 3 | 4 | big_data = list(range(1000)) * 1000 5 | small_data = list(range(100)) 6 | 7 | 8 | def test_frequencies(): 9 | frequencies(big_data) 10 | 11 | 12 | def test_frequencies_small(): 13 | for i in range(1000): 14 | frequencies(small_data) 15 | -------------------------------------------------------------------------------- /bench/test_get.py: -------------------------------------------------------------------------------- 1 | from toolz import get 2 | 3 | tuples = [(1, 2, 3) for i in range(100000)] 4 | 5 | 6 | def test_get(): 7 | for tup in tuples: 8 | get(1, tup) 9 | -------------------------------------------------------------------------------- /bench/test_get_list.py: -------------------------------------------------------------------------------- 1 | from toolz import get 2 | 3 | tuples = [(1, 2, 3) for i in range(100000)] 4 | 5 | 6 | def test_get(): 7 | for tup in tuples: 8 | get([1, 2], tup) 9 | -------------------------------------------------------------------------------- /bench/test_groupby.py: -------------------------------------------------------------------------------- 1 | from toolz import groupby, identity 2 | 3 | 4 | data = list(range(1000)) * 1000 5 | 6 | 7 | def test_groupby(): 8 | groupby(identity, data) 9 | -------------------------------------------------------------------------------- /bench/test_join.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import * 2 | import random 3 | 4 | try: 5 | xrange 6 | except NameError: 7 | xrange = range 8 | 9 | def burn(seq): 10 | for item in seq: 11 | pass 12 | 13 | 14 | small = [(i, str(i)) for i in range(100)] * 10 15 | big = pipe([110]*10000, map(range), concat, list) 16 | 17 | 18 | def test_many_to_many_large(): 19 | burn(join(get(0), small, identity, big)) 20 | 21 | 22 | def test_one_to_one_tiny(): 23 | A = list(range(20)) 24 | B = A[::2] + A[1::2][::-1] 25 | 26 | for i in xrange(50000): 27 | burn(join(identity, A, identity, B)) 28 | 29 | 30 | def test_one_to_many(): 31 | A = list(range(20)) 32 | B = pipe([20]*1000, map(range), concat, list) 33 | 34 | for i in xrange(100): 35 | burn(join(identity, A, identity, B)) 36 | -------------------------------------------------------------------------------- /bench/test_memoize.py: -------------------------------------------------------------------------------- 1 | from toolz import memoize 2 | 3 | 4 | def test_memoize_no_kwargs(): 5 | @memoize 6 | def f(x): 7 | return x 8 | 9 | for i in range(100000): 10 | f(3) 11 | -------------------------------------------------------------------------------- /bench/test_memoize_kwargs.py: -------------------------------------------------------------------------------- 1 | from toolz import memoize 2 | 3 | 4 | def test_memoize_kwargs(): 5 | @memoize 6 | def f(x, y=3): 7 | return x 8 | 9 | for i in range(100000): 10 | f(3) 11 | -------------------------------------------------------------------------------- /bench/test_pluck.py: -------------------------------------------------------------------------------- 1 | from toolz import pluck 2 | 3 | tuples = [(1, 2, 3) for i in range(100000)] 4 | less_tuples = [(1, 2, 3) for i in range(100)] 5 | 6 | 7 | def test_pluck(): 8 | for i in pluck(2, tuples): 9 | pass 10 | 11 | for i in range(1000): 12 | tuple(pluck(2, less_tuples)) 13 | -------------------------------------------------------------------------------- /bench/test_sliding_window.py: -------------------------------------------------------------------------------- 1 | from toolz import sliding_window 2 | 3 | seq = range(1000000) 4 | 5 | 6 | def test_sliding_window(): 7 | list(sliding_window(3, seq)) 8 | -------------------------------------------------------------------------------- /bench/test_wordcount.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import * 2 | import os 3 | 4 | if not os.path.exists('bench/shakespeare.txt'): 5 | os.system('wget http://www.gutenberg.org/ebooks/100.txt.utf-8' 6 | ' -O bench/shakespeare.txt') 7 | 8 | 9 | def stem(word): 10 | """ Stem word to primitive form """ 11 | return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 12 | 13 | wordcount = comp(frequencies, map(stem), concat, map(str.split)) 14 | 15 | 16 | def test_shakespeare(): 17 | with open('bench/shakespeare.txt') as f: 18 | counts = wordcount(f) 19 | -------------------------------------------------------------------------------- /conda.recipe/bld.bat: -------------------------------------------------------------------------------- 1 | cd %RECIPE_DIR%\.. 2 | %PYTHON% setup.py install 3 | -------------------------------------------------------------------------------- /conda.recipe/build.sh: -------------------------------------------------------------------------------- 1 | cd $RECIPE_DIR/.. 2 | $PYTHON setup.py install 3 | -------------------------------------------------------------------------------- /conda.recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: toolz 3 | version: "0.7.4" 4 | 5 | build: 6 | number: {{environ.get('BINSTAR_BUILD', 1)}} 7 | 8 | requirements: 9 | build: 10 | - setuptools 11 | - python 12 | 13 | run: 14 | - python 15 | 16 | test: 17 | requires: 18 | - pytest 19 | imports: 20 | - toolz 21 | commands: 22 | - py.test -x --doctest-modules --pyargs toolz 23 | 24 | about: 25 | home: http://toolz.readthedocs.org/ 26 | license: BSD 27 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Toolz.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Toolz.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/Toolz" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Toolz" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Toolz.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Toolz.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /doc/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | This page contains a comprehensive list of all functions within ``toolz``. 5 | Docstrings should provide sufficient understanding for any individual function. 6 | 7 | Itertoolz 8 | --------- 9 | 10 | .. currentmodule:: toolz.itertoolz 11 | 12 | .. autosummary:: 13 | accumulate 14 | concat 15 | concatv 16 | cons 17 | count 18 | diff 19 | drop 20 | first 21 | frequencies 22 | get 23 | groupby 24 | interleave 25 | interpose 26 | isdistinct 27 | isiterable 28 | iterate 29 | join 30 | last 31 | mapcat 32 | merge_sorted 33 | nth 34 | partition 35 | partition_all 36 | peek 37 | pluck 38 | reduceby 39 | remove 40 | second 41 | sliding_window 42 | take 43 | tail 44 | take_nth 45 | topk 46 | unique 47 | 48 | .. currentmodule:: toolz.recipes 49 | 50 | .. autosummary:: 51 | countby 52 | partitionby 53 | 54 | Functoolz 55 | --------- 56 | 57 | .. currentmodule:: toolz.functoolz 58 | 59 | .. autosummary:: 60 | complement 61 | compose 62 | curry 63 | do 64 | identity 65 | juxt 66 | memoize 67 | pipe 68 | thread_first 69 | thread_last 70 | 71 | Dicttoolz 72 | --------- 73 | 74 | .. currentmodule:: toolz.dicttoolz 75 | 76 | .. autosummary:: 77 | assoc 78 | dissoc 79 | get_in 80 | keyfilter 81 | keymap 82 | itemfilter 83 | itemmap 84 | merge 85 | merge_with 86 | update_in 87 | valfilter 88 | valmap 89 | 90 | Sandbox 91 | ------- 92 | 93 | .. currentmodule:: toolz.sandbox 94 | 95 | .. autosummary:: 96 | parallel.fold 97 | core.EqualityHashKey 98 | core.unzip 99 | 100 | 101 | Definitions 102 | ----------- 103 | 104 | .. automodule:: toolz.itertoolz 105 | :members: 106 | 107 | .. automodule:: toolz.recipes 108 | :members: 109 | 110 | .. automodule:: toolz.functoolz 111 | :members: 112 | 113 | .. automodule:: toolz.dicttoolz 114 | :members: 115 | 116 | .. automodule:: toolz.sandbox.core 117 | :members: 118 | 119 | .. automodule:: toolz.sandbox.parallel 120 | :members: 121 | -------------------------------------------------------------------------------- /doc/source/composition.rst: -------------------------------------------------------------------------------- 1 | Composability 2 | ============= 3 | 4 | Toolz functions interoperate because they consume and produce only a small 5 | set of common, core data structures. Each ``toolz`` function consumes 6 | just iterables, dictionaries, and functions and each ``toolz`` function produces 7 | just iterables, dictionaries, and functions. This standardized interface 8 | enables us to compose several general purpose functions to solve custom 9 | problems. 10 | 11 | Standard interfaces enable us to use many tools together, even if those tools 12 | were not designed with each other in mind. We call this "using together" 13 | composition. 14 | 15 | 16 | Standard Interface 17 | ------------------ 18 | 19 | This is best explained by two examples; the automobile industry and LEGOs. 20 | 21 | Autos 22 | ^^^^^ 23 | 24 | Automobile pieces are not widely composable because they do not adhere to a 25 | standard interface. You can't connect a Porsche engine to the body of a 26 | Volkswagen Beetle but include the safety features of your favorite luxury car. 27 | As a result when something breaks you need to find a specialist who understands 28 | exactly your collection of components and, depending on the popularity of your 29 | model, replacement parts may be difficult to find. While the customization 30 | provides a number of efficiencies important for automobiles, it limits the 31 | ability of downstream tinkerers. This ability for future developers to tinker 32 | is paramount in good software design. 33 | 34 | Lego 35 | ^^^^ 36 | 37 | Contrast this with Lego toys. With Lego you *can* connect a rocket engine and 38 | skis to a rowboat. This is a perfectly natural thing to do because every piece 39 | adheres to a simple interface - those simple and regular 5mm circular bumps. 40 | This freedom to connect pieces at will lets children unleash their imagination 41 | in such varied ways (like going arctic shark hunting with a rocket-ski-boat). 42 | 43 | The abstractions in programming make it far more like Lego than like building 44 | cars. This breaks down a little when we start to be constrained by performance 45 | or memory issues but this affects only a very small fraction of applications. 46 | Most of the time we have the freedom to operate in the Lego model if we choose 47 | to give up customization and embrace simple core standards. 48 | 49 | 50 | Other Standard Interfaces 51 | ------------------------- 52 | 53 | The Toolz project builds off of a standard interface -- this choice is not 54 | unique. Other standard interfaces exist and provide immeasurable benefit to 55 | their application areas. 56 | 57 | The NumPy array serves as a foundational object for numeric and scientific 58 | computing within Python. The ability of any project to consume and produce 59 | NumPy arrays is largely responsible for the broad success of the 60 | various SciPy projects. We see similar development today with the Pandas 61 | DataFrame. 62 | 63 | The UNIX toolset relies on files and streams of text. 64 | 65 | JSON emerged as the standard interface for communication over the web. The 66 | virtues of standardization become glaringly apparent when we contrast JSON with 67 | its predecessor, XML. XML was designed to be extensible/customizable, allowing 68 | each application to design its own interface. This resulted in a sea of 69 | difficult to understand custom data languages that failed to develop a common 70 | analytic and data processing infrastructure. In contrast JSON is very 71 | restrictive and allows only a fixed set of data structures, namely lists, 72 | dictionaries, numbers, strings. Fortunately this set is common to most modern 73 | languages and so JSON is extremely widely supported, perhaps falling second 74 | only to CSV. 75 | 76 | Standard interfaces permeate physical reality as well. Examples range 77 | from supra-national currencies to drill bits and electrical circuitry. In all 78 | cases the interoperation that results becomes a defining and invaluable feature 79 | of each solution. 80 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Toolz documentation build configuration file, created by 4 | # sphinx-quickstart on Sun Sep 22 18:06:00 2013. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | sys.path.insert(0, os.path.abspath('.')) 21 | 22 | # -- General configuration ----------------------------------------------------- 23 | 24 | # If your documentation needs a minimal Sphinx version, state it here. 25 | #needs_sphinx = '1.0' 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.autosummary'] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ['_templates'] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = '.rst' 36 | 37 | # The encoding of source files. 38 | #source_encoding = 'utf-8-sig' 39 | 40 | # The master toctree document. 41 | master_doc = 'index' 42 | 43 | # General information about the project. 44 | project = u'Toolz' 45 | copyright = u'2013, Matthew Rocklin, John Jacobsen' 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | import toolz 53 | version = toolz.__version__ 54 | # The full version, including alpha/beta/rc tags. 55 | release = toolz.__version__ 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | #language = None 60 | 61 | # There are two options for replacing |today|: either, you set today to some 62 | # non-false value, then it is used: 63 | #today = '' 64 | # Else, today_fmt is used as the format for a strftime call. 65 | #today_fmt = '%B %d, %Y' 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | exclude_patterns = [] 70 | 71 | # The reST default role (used for this markup: `text`) to use for all documents. 72 | #default_role = None 73 | 74 | # If true, '()' will be appended to :func: etc. cross-reference text. 75 | #add_function_parentheses = True 76 | 77 | # If true, the current module name will be prepended to all description 78 | # unit titles (such as .. function::). 79 | #add_module_names = True 80 | 81 | # If true, sectionauthor and moduleauthor directives will be shown in the 82 | # output. They are ignored by default. 83 | #show_authors = False 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = 'sphinx' 87 | 88 | # A list of ignored prefixes for module index sorting. 89 | #modindex_common_prefix = [] 90 | 91 | 92 | # -- Options for HTML output --------------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. See the documentation for 95 | # a list of builtin themes. 96 | html_theme = 'default' 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | #html_theme_options = {} 102 | 103 | # Add any paths that contain custom themes here, relative to this directory. 104 | #html_theme_path = [] 105 | 106 | # The name for this set of Sphinx documents. If None, it defaults to 107 | # " v documentation". 108 | #html_title = None 109 | 110 | # A shorter title for the navigation bar. Default is the same as html_title. 111 | #html_short_title = None 112 | 113 | # The name of an image file (relative to this directory) to place at the top 114 | # of the sidebar. 115 | #html_logo = None 116 | 117 | # The name of an image file (within the static path) to use as favicon of the 118 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 119 | # pixels large. 120 | #html_favicon = None 121 | 122 | # Add any paths that contain custom static files (such as style sheets) here, 123 | # relative to this directory. They are copied after the builtin static files, 124 | # so a file named "default.css" will overwrite the builtin "default.css". 125 | html_static_path = ['_static'] 126 | 127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 128 | # using the given strftime format. 129 | #html_last_updated_fmt = '%b %d, %Y' 130 | 131 | # If true, SmartyPants will be used to convert quotes and dashes to 132 | # typographically correct entities. 133 | #html_use_smartypants = True 134 | 135 | # Custom sidebar templates, maps document names to template names. 136 | #html_sidebars = {} 137 | 138 | # Additional templates that should be rendered to pages, maps page names to 139 | # template names. 140 | #html_additional_pages = {} 141 | 142 | # If false, no module index is generated. 143 | #html_domain_indices = True 144 | 145 | # If false, no index is generated. 146 | #html_use_index = True 147 | 148 | # If true, the index is split into individual pages for each letter. 149 | #html_split_index = False 150 | 151 | # If true, links to the reST sources are added to the pages. 152 | #html_show_sourcelink = True 153 | 154 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 155 | #html_show_sphinx = True 156 | 157 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 158 | #html_show_copyright = True 159 | 160 | # If true, an OpenSearch description file will be output, and all pages will 161 | # contain a tag referring to it. The value of this option must be the 162 | # base URL from which the finished HTML is served. 163 | #html_use_opensearch = '' 164 | 165 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 166 | #html_file_suffix = None 167 | 168 | # Output file base name for HTML help builder. 169 | htmlhelp_basename = 'Toolzdoc' 170 | 171 | 172 | # -- Options for LaTeX output -------------------------------------------------- 173 | 174 | latex_elements = { 175 | # The paper size ('letterpaper' or 'a4paper'). 176 | #'papersize': 'letterpaper', 177 | 178 | # The font size ('10pt', '11pt' or '12pt'). 179 | #'pointsize': '10pt', 180 | 181 | # Additional stuff for the LaTeX preamble. 182 | #'preamble': '', 183 | } 184 | 185 | # Grouping the document tree into LaTeX files. List of tuples 186 | # (source start file, target name, title, author, documentclass [howto/manual]). 187 | latex_documents = [ 188 | ('index', 'Toolz.tex', u'Toolz Documentation', 189 | u'Matthew Rocklin, John Jacobsen', 'manual'), 190 | ] 191 | 192 | # The name of an image file (relative to this directory) to place at the top of 193 | # the title page. 194 | #latex_logo = None 195 | 196 | # For "manual" documents, if this is true, then toplevel headings are parts, 197 | # not chapters. 198 | #latex_use_parts = False 199 | 200 | # If true, show page references after internal links. 201 | #latex_show_pagerefs = False 202 | 203 | # If true, show URL addresses after external links. 204 | #latex_show_urls = False 205 | 206 | # Documents to append as an appendix to all manuals. 207 | #latex_appendices = [] 208 | 209 | # If false, no module index is generated. 210 | #latex_domain_indices = True 211 | 212 | 213 | # -- Options for manual page output -------------------------------------------- 214 | 215 | # One entry per manual page. List of tuples 216 | # (source start file, name, description, authors, manual section). 217 | man_pages = [ 218 | ('index', 'toolz', u'Toolz Documentation', 219 | [u'Matthew Rocklin, John Jacobsen'], 1) 220 | ] 221 | 222 | # If true, show URL addresses after external links. 223 | #man_show_urls = False 224 | 225 | 226 | # -- Options for Texinfo output ------------------------------------------------ 227 | 228 | # Grouping the document tree into Texinfo files. List of tuples 229 | # (source start file, target name, title, author, 230 | # dir menu entry, description, category) 231 | texinfo_documents = [ 232 | ('index', 'Toolz', u'Toolz Documentation', 233 | u'Matthew Rocklin, John Jacobsen', 'Toolz', 'One line description of project.', 234 | 'Miscellaneous'), 235 | ] 236 | 237 | # Documents to append as an appendix to all manuals. 238 | #texinfo_appendices = [] 239 | 240 | # If false, no module index is generated. 241 | #texinfo_domain_indices = True 242 | 243 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 244 | #texinfo_show_urls = 'footnote' 245 | 246 | 247 | # -- Options for Epub output --------------------------------------------------- 248 | 249 | # Bibliographic Dublin Core info. 250 | epub_title = u'Toolz' 251 | epub_author = u'Matthew Rocklin, John Jacobsen' 252 | epub_publisher = u'Matthew Rocklin, John Jacobsen' 253 | epub_copyright = u'2013, Matthew Rocklin, John Jacobsen' 254 | 255 | # The language of the text. It defaults to the language option 256 | # or en if the language is not set. 257 | #epub_language = '' 258 | 259 | # The scheme of the identifier. Typical schemes are ISBN or URL. 260 | #epub_scheme = '' 261 | 262 | # The unique identifier of the text. This can be a ISBN number 263 | # or the project homepage. 264 | #epub_identifier = '' 265 | 266 | # A unique identification for the text. 267 | #epub_uid = '' 268 | 269 | # A tuple containing the cover image and cover page html template filenames. 270 | #epub_cover = () 271 | 272 | # HTML files that should be inserted before the pages created by sphinx. 273 | # The format is a list of tuples containing the path and title. 274 | #epub_pre_files = [] 275 | 276 | # HTML files shat should be inserted after the pages created by sphinx. 277 | # The format is a list of tuples containing the path and title. 278 | #epub_post_files = [] 279 | 280 | # A list of files that should not be packed into the epub file. 281 | #epub_exclude_files = [] 282 | 283 | # The depth of the table of contents in toc.ncx. 284 | #epub_tocdepth = 3 285 | 286 | # Allow duplicate toc entries. 287 | #epub_tocdup = True 288 | -------------------------------------------------------------------------------- /doc/source/control.rst: -------------------------------------------------------------------------------- 1 | Control Flow 2 | ============ 3 | 4 | Programming is hard when we think simultaneously about several concepts. Good 5 | programming breaks down big problems into small problems and 6 | builds up small solutions into big solutions. By this practice the 7 | need for simultaneous thought is restricted to only a few elements at a time. 8 | 9 | All modern languages provide mechanisms to build data into data structures and 10 | to build functions out of other functions. The third element of programming, 11 | besides data and functions is control flow. Control flow is the third 12 | element of programming, after data and functions. Building complex control 13 | flow out of simple control flow presents deeper challenges. 14 | 15 | 16 | What? 17 | ----- 18 | 19 | Each element in a computer program is either 20 | 21 | - A variable or value literal like ``x``, ``total``, or ``5`` 22 | - A function or computation like the ``+`` in ``x + 1``, the function ``fib`` 23 | in ``fib(3)``, the method ``split`` in ``line.split(',')``, or the ``=`` in 24 | ``x = 0`` 25 | - Control flow like ``if``, ``for``, or ``return`` 26 | 27 | Here is a piece of code; see if you can label each term as either 28 | variable/value, function/computation, or control flow 29 | 30 | .. code:: 31 | 32 | def fib(n): 33 | a, b = 0, 1 34 | for i in range(n): 35 | a, b = b, a + b 36 | return b 37 | 38 | Programming is hard when we have to juggle many code elements of each type at 39 | the same time. Good programming is about managing these three elements so that 40 | the developer is only required to think about a handful of them at a time. For 41 | example we might collect many integer variables into a list of integers or 42 | build a big function out of smaller ones. While we have natural ways to manage 43 | data and functions, control flow presents more of a challenge. 44 | 45 | We organize our data into **data structures** like lists, dictionaries, or objects 46 | in order to group related data together -- this allows us to manipulate large 47 | collections of related data as if we were only manipulating a single entity. 48 | 49 | We **build large functions out of smaller ones**; enabling us to break up a 50 | complex task like doing laundry into a sequence of simpler tasks. 51 | 52 | .. code:: 53 | 54 | def do_laundry(clothes): 55 | wet_clothes = wash(clothes, coins) 56 | dry_clothes = dry(wet_clothes, coins) 57 | return fold(dry_clothes) 58 | 59 | **Control flow is more challenging**; how do we break down complex control flow 60 | into simpler pieces that fit in our brain? How do we encapsulate commonly 61 | recurring patterns? 62 | 63 | Lets motivate this with an example of a common control structure, applying a 64 | function to each element in a list. Imagine we want to download the HTML 65 | source for a number of webpages. 66 | 67 | .. code:: 68 | 69 | from urllib import urlopen 70 | 71 | urls = ['http://www.google.com', 'http://www.wikipedia.com', 'http://www.apple.com'] 72 | html_texts = [] 73 | for item in urls: 74 | html_texts.append(urlopen(item)) 75 | return html_texts 76 | 77 | Or maybe we want to compute the Fibonacci numbers on a particular set of 78 | integers 79 | 80 | .. code:: 81 | 82 | integers = [1, 2, 3, 4, 5] 83 | fib_integers = [] 84 | for item in integers: 85 | fib_integers.append(fib(item)) 86 | return fib_integers 87 | 88 | These two unrelated applications share an identical control flow pattern. They 89 | apply a function (``urlopen`` or ``fib``) onto each element of an input list 90 | (``urls``, or ``integers``), appending the result onto an output list. Because 91 | this control flow pattern is so common we give it a name, ``map``, and say that 92 | we map a function (like ``urlopen``) onto a list (like ``urls``). 93 | 94 | Because Python can treat functions like variables we can encode this control 95 | pattern into a higher-order-function as follows: 96 | 97 | .. code:: 98 | 99 | def map(function, sequence): 100 | output = [] 101 | for item in sequence: 102 | output.append(function(item)) 103 | return output 104 | 105 | This allows us to simplify our code above to the following, pithy solutions 106 | 107 | .. code:: 108 | 109 | html_texts = map(urlopen, urls) 110 | fib_integers = map(fib, integers) 111 | 112 | Experienced Python programmers know that this control pattern is so popular 113 | that it has been elevated to the status of **syntax** with the popular list 114 | comprehension 115 | 116 | .. code:: 117 | 118 | html_texts = [urlopen(url) for url in urls] 119 | 120 | 121 | Why? 122 | ---- 123 | 124 | So maybe you already knew about ``map`` and don't use it or maybe you just 125 | prefer list comprehensions. Why should you keep reading? 126 | 127 | Managing Complexity 128 | ^^^^^^^^^^^^^^^^^^^ 129 | 130 | The higher order function ``map`` gives us a name to call a particular control 131 | pattern. Regardless of whether or not you use a for loop, a list 132 | comprehension, or ``map`` itself, it is useful to recognize the operation 133 | and to give it a name. Naming control patterns lets us tackle 134 | complex problems a larger scale without burdening our mind with rote details. 135 | It is just as important as bundling data into data structures or building 136 | complex functions out of simple ones. 137 | 138 | *Naming control flow patterns enables programmers to manipulate increasingly 139 | complex operations.* 140 | 141 | Other Patterns 142 | ^^^^^^^^^^^^^^ 143 | 144 | The function ``map`` has friends. Advanced programmers may know about 145 | ``map``'s siblings, ``filter`` and ``reduce``. The ``filter`` control pattern 146 | is also handled by list comprehension syntax and ``reduce`` is often replaced 147 | by straight for loops, so if you don't want to use them there is no immediately 148 | practical reason why you would care. 149 | 150 | Most programmers however don't know about the many cousins of 151 | ``map``/``filter``/``reduce``. Consider for example the unsung heroine, 152 | ``groupby``. A brief example grouping names by their length follows: 153 | 154 | .. code:: 155 | 156 | >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] 157 | >>> groupby(len, names) 158 | {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} 159 | 160 | Groupby collects each element of a list into sublists determined by the value 161 | of a function. Lets see ``groupby`` in action again, grouping numbers by 162 | evenness. 163 | 164 | .. code:: 165 | 166 | >>> def iseven(n): 167 | ... return n % 2 == 0 168 | 169 | >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7]) 170 | {True: [2, 4, 6], False: [1, 3, 5, 7]} 171 | 172 | If we were to write this second operation out by hand it might look something 173 | like the following: 174 | 175 | .. code:: 176 | 177 | evens = [] 178 | odds = [] 179 | for item in numbers: 180 | if iseven(item): 181 | evens.append(item) 182 | else: 183 | odds.append(item) 184 | 185 | Most programmers have written code exactly like this over and over again, just 186 | like they may have repeated the ``map`` control pattern. When we identify code 187 | as a ``groupby`` operation we mentally collapse the detailed manipulation into 188 | a single concept. 189 | 190 | The Toolz library contains dozens of patterns like ``map`` and ``groupby``. 191 | Learning a core set (maybe a dozen) covers the vast majority of common 192 | programming tasks often done by hand. 193 | 194 | *A rich vocabulary of core control functions conveys the following benefits:* 195 | 196 | - You identify new patterns 197 | - You make fewer errors in rote coding 198 | - You can depend on well tested and benchmarked implementations 199 | 200 | But this does not come for free. As in spoken language the use of a rich 201 | vocabulary can alienate new practitioners. Most functional languages have 202 | fallen into this trap and are seen as unapproachable and smug. Python 203 | maintains a low-brow reputation and benefits from it. Just as with spoken 204 | language the value of using just-the-right-word must be moderated with the 205 | comprehension of the intended audience. 206 | -------------------------------------------------------------------------------- /doc/source/curry.rst: -------------------------------------------------------------------------------- 1 | 2 | Curry 3 | ===== 4 | 5 | Traditionally partial evaluation of functions is handled with the ``partial`` 6 | higher order function from ``functools``. Currying provides syntactic sugar. 7 | 8 | .. code:: 9 | 10 | >>> double = partial(mul, 2) # Partial evaluation 11 | >>> double = mul(2) # Currying 12 | 13 | This syntactic sugar is valuable when developers chain several higher order 14 | functions together. 15 | 16 | Partial Evaluation 17 | ------------------ 18 | 19 | Often when composing smaller functions to form big ones we need partial 20 | evaluation. We do this in the word counting example: 21 | 22 | .. code:: 23 | 24 | >>> def stem(word): 25 | ... """ Stem word to primitive form """ 26 | ... return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 27 | 28 | >>> wordcount = compose(frequencies, partial(map, stem), str.split) 29 | 30 | Here we want to map the ``stem`` function onto each of the words produced by 31 | ``str.split``. We want a ``stem_many`` function that takes a list of words, 32 | stems them, and returns a list back. In full form this would look like the 33 | following: 34 | 35 | .. code:: 36 | 37 | >>> def stem_many(words): 38 | ... return map(stem, words) 39 | 40 | The ``partial`` function lets us create this function more naturally. 41 | 42 | .. code:: 43 | 44 | >>> stem_many = partial(map, stem) 45 | 46 | In general 47 | 48 | .. code:: 49 | 50 | >>> def f(x, y, z): 51 | ... # Do stuff with x, y, and z 52 | 53 | >>> # partially evaluate f with known values a and b 54 | >>> def g(z): 55 | ... return f(a, b, z) 56 | 57 | >>> # partially evaluate f with known values a and b 58 | >>> g = partial(f, a, b) 59 | 60 | Curry 61 | ----- 62 | 63 | In this context currying is just syntactic sugar for partial evaluation. A 64 | curried function partially evaluates if it does not receive enough arguments to 65 | compute a result. 66 | 67 | .. code:: 68 | 69 | >>> from toolz import curry 70 | 71 | >>> @curry # We can use curry as a decorator 72 | ... def mul(x, y): 73 | ... return x * y 74 | 75 | >>> double = mul(2) # mul didn't receive enough arguments to evaluate 76 | ... # so it holds onto the 2 and waits, returning a 77 | ... # partially evaluated function, double 78 | 79 | >>> double(5) 80 | 10 81 | 82 | So if ``map`` was curried... 83 | 84 | .. code:: 85 | 86 | >>> map = curry(map) 87 | 88 | Then we could replace the ``partial`` with a function evaluation 89 | 90 | .. code:: 91 | 92 | >>> # wordcount = compose(frequencies, partial(map, stem), str.split) 93 | >>> wordcount = compose(frequencies, map(stem), str.split) 94 | 95 | In this particular example it's probably simpler to stick with ``partial``. 96 | Once ``partial`` starts occurring several times in your code it may be time to 97 | switch to the ``curried`` namespace. 98 | 99 | The Curried Namespace 100 | --------------------- 101 | 102 | All functions present in the ``toolz`` namespace are curried in the 103 | ``toolz.curried`` namespace. 104 | 105 | So you can exchange an import line like the following 106 | 107 | .. code:: 108 | 109 | >>> from toolz import * 110 | 111 | For the following 112 | 113 | .. code:: 114 | 115 | >>> from toolz.curried import * 116 | 117 | And all of your favorite ``toolz`` functions will curry automatically. We've 118 | also included curried versions of the standard Python higher order functions 119 | like ``map``, ``filter``, ``reduce`` so you'll get them too (whether you like 120 | it or not.) 121 | -------------------------------------------------------------------------------- /doc/source/heritage.rst: -------------------------------------------------------------------------------- 1 | Heritage 2 | ======== 3 | 4 | While Python was originally intended as an imperative language 5 | [Guido_], it contains all elements necessary to support a rich set of features 6 | from the functional paradigm. In particular its core data structures, lazy 7 | iterators, and functions as first class objects can be combined to implement a 8 | common standard library of functions shared among many functional languages. 9 | 10 | This was first recognized and supported through the standard libraries 11 | itertools_ and functools_ which contain functions like ``permutations``, 12 | ``chain`` and ``partial`` to complement the standard ``map``, ``filter``, 13 | ``reduce`` already found in the core language. While these libraries contain 14 | substantial functionality they do not achieve the same level of adoption found 15 | in similar projects in other languages. This may be because they are 16 | incomplete and lack a number of commonly related functions like ``compose`` and 17 | ``groupby`` which often complement these core operations. 18 | 19 | A completion of this set of functions was first attempted in the projects 20 | itertoolz_ and functoolz_ (note the z). These libraries contained 21 | several functions that were absent in the standard itertools_/functools_ 22 | libraries. The ``itertoolz``/``functoolz`` libraries were eventually merged 23 | into the monolithic ``toolz`` project described here. 24 | 25 | Most contemporary functional languages (Haskell, Scala, Clojure, ...) contain 26 | some variation of the functions found in ``toolz``. The ``toolz`` project 27 | generally adheres closely to the API found in the Clojure standard library (see 28 | cheatsheet_) and where disagreements occur that API usually dominates. The 29 | ``toolz`` API is also strongly affected by the principles of the Python 30 | language itself, and often makes deviations in order to be more approachable to 31 | that community. 32 | 33 | The development of a functional standard library within a popular imperative 34 | language is not unique. Similar projects have arisen in other 35 | imperative-by-design languages that contain the necessary elements to support a 36 | functional standard library. Underscore.js_ in JavaScript has attained 37 | notable popularity in the web community. ``LINQ`` in C# follows a similar 38 | philosophy but mimics declarative database languages rather than functional 39 | ones. Enumerable_ is is the closest project in Ruby. Other excellent projects 40 | also exist within the Python ecosystem, most notably Fn.py_ and Funcy_. 41 | 42 | .. [itertools] http://docs.python.org/2/library/itertools.html 43 | .. [functools] http://docs.python.org/2/library/functools.html 44 | .. [itertoolz] http://github.com/pytoolz/itertoolz 45 | .. [functoolz] http://github.com/pytoolz/functoolz 46 | .. [Underscore.js] http://underscorejs.org 47 | .. [cheatsheet] http://clojure.org/cheatsheet 48 | .. [Guido] http://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html 49 | .. [Enumerable] http://ruby-doc.org/core-2.0.0/Enumerable.html 50 | .. [funcy] https://github.com/suor/funcy/ 51 | .. [fn.py] https://github.com/kachayev/fn.py 52 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | 2 | PyToolz API Documentation 3 | ========================= 4 | 5 | Toolz provides a set of utility functions for iterators, functions, 6 | and dictionaries. These functions interoperate well and form 7 | the building blocks of common data analytic operations. They extend the 8 | standard libraries `itertools` and `functools` and borrow heavily from the 9 | standard libraries of contemporary functional languages. 10 | 11 | Toolz provides a suite of functions which have the following functional virtues: 12 | 13 | - **Composable:** They interoperate due to their use of core data structures. 14 | - **Pure:** They don't change their inputs or rely on external state. 15 | - **Lazy:** They don't run until absolutely necessary, allowing them to support large streaming data sets. 16 | 17 | Toolz functions are *pragmatic*. They understand that most programmers 18 | have deadlines. 19 | 20 | - **Low Tech:** They're just functions, no syntax or magic tricks to learn 21 | - **Tuned:** They're profiled and optimized 22 | - **Serializable:** They support common solutions for parallel computing 23 | 24 | This gives developers the power to write *powerful* programs to solve *complex 25 | problems* with relatively *simple code*. This code can be *easy to understand* 26 | without sacrificing *performance*. Toolz enables this approach, commonly 27 | associated with functional programming, within a natural Pythonic style 28 | suitable for most developers. 29 | 30 | BSD licensed source code is available at http://github.com/pytoolz/toolz/ . 31 | 32 | 33 | Contents 34 | ^^^^^^^^ 35 | 36 | .. toctree:: 37 | :maxdepth: 2 38 | 39 | heritage.rst 40 | install.rst 41 | composition.rst 42 | purity.rst 43 | laziness.rst 44 | control.rst 45 | curry.rst 46 | streaming-analytics.rst 47 | parallelism.rst 48 | api.rst 49 | tips-and-tricks.rst 50 | references.rst 51 | -------------------------------------------------------------------------------- /doc/source/install.rst: -------------------------------------------------------------------------------- 1 | Installation and Dependencies 2 | ============================= 3 | 4 | Toolz is pure Python and so is easily installable by the standard 5 | dependency manager ``pip``:: 6 | 7 | pip install toolz 8 | 9 | Toolz endeavors to be a very light dependency. It accomplishes this in 10 | three ways: 11 | 12 | 1. Toolz is pure Python 13 | 2. Toolz relies only on the standard library 14 | 3. Toolz simultaneously supports Python versions 2.6, 2.7, 3.2, 3.3 15 | -------------------------------------------------------------------------------- /doc/source/laziness.rst: -------------------------------------------------------------------------------- 1 | Laziness 2 | ======== 3 | 4 | Lazy iterators evaluate only when necessary. They allow us to semantically 5 | manipulate large amounts of data while keeping very little of it actually in 6 | memory. They act like lists but don't take up space. 7 | 8 | 9 | Example - A Tale of Two Cities 10 | ------------------------------ 11 | 12 | We open a file containing the text of the classic text "A Tale of Two Cities" 13 | by Charles Dickens[1_]. 14 | 15 | .. code:: 16 | 17 | >>> book = open('tale-of-two-cities.txt') 18 | 19 | Much like a secondary school student, Python owns and opens the book without 20 | reading a single line of the text. The object ``book`` is a lazy iterator! 21 | Python will give us a line of the text only when we explicitly ask it to do so 22 | 23 | .. code:: 24 | 25 | >>> next(book) 26 | "It was the best of times," 27 | 28 | >>> next(book) 29 | "it was the worst of times," 30 | 31 | and so on. Each time we call ``next`` on ``book`` we burn through another line 32 | of the text and the ``book`` iterator marches slowly onwards through the text. 33 | 34 | 35 | Computation 36 | ----------- 37 | 38 | We can lazily operate on lazy iterators without doing any actual computation. 39 | For example lets read the book in upper case 40 | 41 | .. code:: 42 | 43 | >>> from toolz import map # toolz' map is lazy by default 44 | 45 | >>> loud_book = map(str.upper, book) 46 | 47 | >>> next(loud_book) 48 | "IT WAS THE AGE OF WISDOM," 49 | >>> next(loud_book) 50 | "IT WAS THE AGE OF FOOLISHNESS," 51 | 52 | It is as if we applied the function ``str.upper`` onto every line of the book; 53 | yet the first line completes instantaneously. Instead Python does the 54 | uppercasing work only when it becomes necessary, i.e. when you call ``next`` 55 | to ask for another line. 56 | 57 | 58 | Reductions 59 | ---------- 60 | 61 | You can operate on lazy iterators just as you would with lists, tuples, or 62 | sets. You can use them in for loops as in 63 | 64 | 65 | .. code:: 66 | 67 | for line in loud_book: 68 | ... 69 | 70 | You can instantiate them all into memory by calling them with the constructors 71 | ``list``, or ``tuple``. 72 | 73 | .. code:: 74 | 75 | loud_book = list(loud_book) 76 | 77 | Of course if they are very large then this might be unwise. Often we use 78 | laziness to avoid loading large datasets into memory at once. Many 79 | computations on large datasets don't require access to all of the data at a 80 | single time. In particular *reductions* (like sum) often take large amounts of 81 | sequential data (like [1, 2, 3, 4]) and produce much more manageable results 82 | (like 10) and can do so just by viewing the data a little bit at a time. For 83 | example we can count all of the letters in the Tale of Two Cities trivially 84 | using functions from ``toolz`` 85 | 86 | .. code:: 87 | 88 | >>> from toolz import concat, frequencies 89 | >>> letters = frequencies(concat(loud_book)) 90 | { 'A': 48036, 91 | 'B': 8402, 92 | 'C': 13812, 93 | 'D': 28000, 94 | 'E': 74624, 95 | ... 96 | 97 | In this case ``frequencies`` is a sort of reduction. At no time were more than 98 | a few hundred bytes of Tale of Two Cities necessarily in memory. We could just 99 | have easily done this computation on the entire Gutenberg collection or on 100 | Wikipedia. In this case we are limited by the size and speed of our hard drive 101 | and not by the capacity of our memory. 102 | 103 | .. [1] http://www.gutenberg.org/cache/epub/98/pg98.txt 104 | -------------------------------------------------------------------------------- /doc/source/parallelism.rst: -------------------------------------------------------------------------------- 1 | Parallelism 2 | =========== 3 | 4 | PyToolz tries to support other parallel processing libraries. It does this 5 | by ensuring easy serialization of ``toolz`` functions and providing 6 | architecture-agnostic parallel algorithms. 7 | 8 | In practice ``toolz`` is developed against ``multiprocessing`` and 9 | ``IPython.parallel``. 10 | 11 | 12 | Serialization 13 | ------------- 14 | 15 | Multiprocessing or distributed computing requires the transmission of functions 16 | between different processes or computers. This is done through serializing the 17 | function into text, sending that text over a wire, and deserializing the text 18 | back into a function. To the extent possible PyToolz functions are compatible 19 | with the standard serialization library ``pickle``. 20 | 21 | The ``pickle`` library often fails for complex functions including lambdas, 22 | closures, and class methods. When this occurs we recommend the alternative 23 | serialization library ``dill``. 24 | 25 | 26 | Example with parallel map 27 | ------------------------- 28 | 29 | Most parallel processing tasks may be significantly accelerated using only a 30 | parallel map operation. A number of high quality parallel map operations exist 31 | in other libraries, notably ``multiprocessing``, ``IPython.parallel``, and 32 | ``threading`` (if your operation is not processor bound). 33 | 34 | In the example below we extend our wordcounting solution with a parallel map. 35 | We show how one can progress in development from sequential, to 36 | multiprocessing, to distributed computation all with the same domain code. 37 | 38 | 39 | .. code:: 40 | 41 | from toolz.curried import map 42 | from toolz import frequencies, compose, concat, merge_with 43 | 44 | def stem(word): 45 | """ Stem word to primitive form 46 | 47 | >>> stem("Hello!") 48 | 'hello' 49 | """ 50 | return 51 | word.lower().rstrip(",.!)-*_?:;$'-\"").lstrip("-*'\"(_$'") 52 | 53 | 54 | wordcount = compose(frequencies, map(stem), concat, map(str.split), open) 55 | 56 | if __name__ == '__main__': 57 | # Filenames for thousands of books from which we'd like to count words 58 | filenames = ['Book_%d.txt'%i for i in range(10000)] 59 | 60 | # Start with sequential map for development 61 | # pmap = map 62 | 63 | # Advance to Multiprocessing map for heavy computation on single machine 64 | # from multiprocessing import Pool 65 | # p = Pool(8) 66 | # pmap = p.map 67 | 68 | # Finish with distributed parallel map for big data 69 | from IPython.parallel import Client 70 | p = Client()[:] 71 | pmap = p.map_sync 72 | 73 | total = merge_with(sum, pmap(wordcount, filenames)) 74 | 75 | This smooth transition is possible because 76 | 77 | 1. The ``map`` abstraction is a simple function call and so can be replaced. 78 | This transformation would be difficult if we had written our code with a 79 | for loop or list comprehension 80 | 2. The operation ``wordcount`` is separate from the parallel solution. 81 | 3. The task is embarrassingly parallel, needing only a very simple parallel 82 | strategy. Fortunately this is the common case. 83 | 84 | 85 | Parallel Algorithms 86 | ------------------- 87 | 88 | PyToolz does not implement parallel processing systems. It does however 89 | provide parallel algorithms that can extend existing parallel systems. Our 90 | general solution is to build algorithms that operate around a user-supplied 91 | parallel map function. 92 | 93 | In particular we provide a parallel ``fold`` in ``toolz.sandbox.parallel.fold``. 94 | This fold can work equally well with ``multiprocessing.Pool.map`` 95 | ``threading.Pool.map`` or ``IPython.parallel``'s ``map_async``. 96 | -------------------------------------------------------------------------------- /doc/source/purity.rst: -------------------------------------------------------------------------------- 1 | Function Purity 2 | =============== 3 | 4 | We call a function *pure* if it meets the following criteria 5 | 6 | 1. It does not depend on hidden state, or equivalently it only depends on its 7 | inputs. 8 | 2. Evaluation of the function does not cause side effects 9 | 10 | In short the internal work of a pure function is isolated from the rest of the 11 | program. 12 | 13 | Examples 14 | -------- 15 | 16 | This is made clear by two examples: 17 | 18 | .. code:: 19 | 20 | # A pure function 21 | def min(x, y): 22 | if x < y: 23 | return x 24 | else: 25 | return y 26 | 27 | 28 | # An impure function 29 | exponent = 2 30 | 31 | def powers(L): 32 | for i in range(len(L)): 33 | L[i] = L[i]**exponent 34 | return L 35 | 36 | The function ``min`` is pure. It always produces the same result given the 37 | same inputs and it doesn't affect any external variable. 38 | 39 | The function ``powers`` is impure for two reasons. First, it depends on a 40 | global variable, ``exponent``. Second, it changes the input ``L`` which may 41 | have external state. Consider the following execution: 42 | 43 | .. code:: 44 | 45 | >>> data = [1, 2, 3] 46 | >>> result = powers(data) 47 | 48 | >>> print result 49 | [1, 4, 9] 50 | >>> print data 51 | [1, 4, 9] 52 | 53 | We see that ``powers`` affected the variable ``data``. Users of our function 54 | might be surprised by this. Usually we expect our inputs to be unchanged. 55 | 56 | Another problem occurs when we run this code in a different context: 57 | 58 | .. code:: 59 | 60 | >>> data = [1, 2, 3] 61 | >>> result = powers(data) 62 | >>> print result 63 | [1, 8, 27] 64 | 65 | When we give ``powers`` the same inputs we receive different outputs; how could 66 | this be? Someone must have changed the value of ``exponent`` to be ``3``, 67 | producing cubes rather than squares. At first this flexibility may seem like a 68 | feature and indeed in many cases it may be. The cost for this flexibility is 69 | that we need to keep track of the ``exponent`` variable separately whenever we 70 | use ``powers``. As we use more functions these extra variables become a 71 | burden. 72 | 73 | 74 | State 75 | ----- 76 | 77 | Impure functions are often more efficient but also require that the programmer 78 | "keep track" of the state of several variables. Keeping track of this state 79 | becomes increasingly difficult as programs grow in size. By eschewing state 80 | programmers are able to conceptually scale out to solve much larger problems. 81 | The loss of performance is often negligible compared to the freedom to trust 82 | that your functions work as expected on your inputs. 83 | 84 | Maintaining state provides efficiency at the cost of surprises. Pure 85 | functions produce no surprises and so lighten the mental load of the 86 | programmer. 87 | 88 | 89 | Testing 90 | ------- 91 | 92 | As an added bonus, testing pure functions is substantially simpler than testing 93 | impure ones. A programmer who has tried to test functions that include 94 | randomness will know this first-hand. 95 | -------------------------------------------------------------------------------- /doc/source/references.rst: -------------------------------------------------------------------------------- 1 | References 2 | ========== 3 | 4 | - `Underscore.js `__: A similar library for 5 | JavaScript 6 | - `Enumerable `__: A 7 | similar library for Ruby 8 | - `Clojure `__: A functional language whose 9 | standard library has several counterparts in ``toolz`` 10 | - `itertools `__: The 11 | Python standard library for iterator tools 12 | - `functools `__: The 13 | Python standard library for function tools 14 | - `Functional Programming HOWTO `__: 15 | The description of functional programming features from the official 16 | Python docs. 17 | 18 | Contemporary Projects 19 | --------------------- 20 | 21 | These projects also provide iterator and functional utilities within 22 | Python. Their functionality overlaps substantially with that of PyToolz. 23 | 24 | - `funcy `__ 25 | - `fn.py `__ 26 | - `more\_itertools `__ 27 | -------------------------------------------------------------------------------- /doc/source/streaming-analytics.rst: -------------------------------------------------------------------------------- 1 | Streaming Analytics 2 | =================== 3 | 4 | The toolz functions can be composed to analyze large streaming datasets. 5 | Toolz supports common analytics patterns like the selection, grouping, 6 | reduction, and joining of data through pure composable functions. These 7 | functions often have analogs to familiar operations in other data analytics 8 | platforms like SQL or Pandas. 9 | 10 | Throughout this document we'll use this simple dataset of accounts 11 | 12 | .. code:: 13 | 14 | >>> accounts = [(1, 'Alice', 100, 'F'), # id, name, balance, gender 15 | ... (2, 'Bob', 200, 'M'), 16 | ... (3, 'Charlie', 150, 'M'), 17 | ... (4, 'Dennis', 50, 'M'), 18 | ... (5, 'Edith', 300, 'F')] 19 | 20 | Selecting with ``map`` and ``filter`` 21 | ------------------------------------- 22 | 23 | Simple projection and linear selection from a sequence is achieved through the 24 | standard functions ``map`` and ``filter``. 25 | 26 | .. code:: 27 | 28 | SELECT name, balance 29 | FROM accounts 30 | WHERE balance > 150; 31 | 32 | These functions correspond to the SQL commands ``SELECT`` and ``WHERE``. 33 | 34 | .. code:: 35 | 36 | >>> from toolz.curried import pipe, map, filter, get 37 | >>> pipe(accounts, filter(lambda (id, name, balance, gender): balance > 150), 38 | ... map(get([1, 2])), 39 | ... list) 40 | 41 | *note: this uses the curried_ versions of ``map`` and ``filter``.* 42 | 43 | Of course, these operations are also well supported with standard 44 | list/generator comprehension syntax. This syntax is more often used and 45 | generally considered to be more Pythonic. 46 | 47 | .. code:: 48 | 49 | >>> [(name, balance) for (id, name, balance, gender) in accounts 50 | ... if balance > 150] 51 | 52 | 53 | Split-apply-combine with ``groupby`` and ``reduceby`` 54 | ----------------------------------------------------- 55 | 56 | We separate split-apply-combine operations into the following two concepts 57 | 58 | 1. Split the dataset into groups by some property 59 | 2. Reduce each of the groups with some synopsis function 60 | 61 | Toolz supports this common workflow with 62 | 63 | 1. a simple in-memory solution 64 | 2. a more sophisticated streaming solution. 65 | 66 | 67 | In Memory Split-Apply-Combine 68 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 69 | 70 | The in-memory solution depends on the functions `groupby`_ to split, and 71 | `valmap`_ to apply/combine. 72 | 73 | .. code:: 74 | 75 | SELECT gender, SUM(balance) 76 | FROM accounts 77 | GROUP BY gender; 78 | 79 | We first show these two functions piece by piece to show the intermediate 80 | groups. 81 | 82 | .. code:: 83 | 84 | >>> from toolz import groupby, valmap, compose 85 | >>> from toolz.curried import get, pluck 86 | 87 | >>> groupby(get(3), accounts) 88 | {'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')], 89 | 'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')]} 90 | 91 | >>> valmap(compose(sum, pluck(2)), 92 | ... _) 93 | {'F': 400, 'M': 400} 94 | 95 | 96 | Then we chain them together into a single computation 97 | 98 | .. code:: 99 | 100 | >>> pipe(accounts, groupby(get(3)), 101 | ... valmap(compose(sum, pluck(2)))) 102 | {'F': 400, 'M': 400} 103 | 104 | 105 | Streaming Split-Apply-Combine 106 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 107 | 108 | The ``groupby`` function collects the entire dataset in memory into a 109 | dictionary. While convenient, the ``groupby`` operation is *not streaming* and 110 | so this approach is limited to datasets that can fit comfortably into memory. 111 | 112 | Toolz achieves streaming split-apply-combine with `reduceby`_, a function that 113 | performs a simultaneous reduction on each group as the elements stream in. To 114 | understand this section you should first be familiar with the builtin function 115 | ``reduce``. 116 | 117 | The ``reduceby`` operation takes a key function, like ``get(3)`` or ``lambda x: 118 | x[3]``, and a binary operator like ``add`` or ``lesser = lambda acc, x: acc if 119 | acc < x else x``. It successively applies the key function to each item in 120 | succession, accumulating running totals for each key by combining each new 121 | value with the previous using the binary operator. It can't accept full 122 | reduction operations like ``sum`` or ``min`` as these require access to the 123 | entire group at once. Here is a simple example: 124 | 125 | .. code:: 126 | 127 | >>> from toolz import reduceby 128 | 129 | >>> def iseven(n): 130 | ... return n % 2 == 0 131 | 132 | >>> def add(x, y): 133 | ... return x + y 134 | 135 | >>> reduceby(iseven, add, [1, 2, 3, 4]) 136 | {True: 6, False: 4} 137 | 138 | The even numbers are added together ``(2 + 4 = 6)`` into group ``True``, and 139 | the odd numbers are added together ``(1 + 3 = 4)`` into group ``False``. 140 | 141 | 142 | Note that we have to replace the reduction ``sum`` with the binary operator 143 | ``add``. The incremental nature of ``add`` allows us to do the summation work as 144 | new data comes in. The use of binary operators like ``add`` over full reductions 145 | like ``sum`` enables computation on very large streaming datasets. 146 | 147 | The challenge to using ``reduceby`` often lies in the construction of a 148 | suitable binary operator. Here is the solution for our accounts example 149 | that adds up the balances for each group: 150 | 151 | .. code:: 152 | 153 | >>> binop = lambda total, (id, name, bal, gend): total + bal 154 | 155 | >>> reduceby(get(3), binop, accounts) 156 | {'F': 400, 'M': 400} 157 | 158 | 159 | This construction supports datasets that are much larger than available memory. 160 | Only the output must be able to fit comfortably in memory and this is rarely an 161 | issue, even for very large split-apply-combine computations. 162 | 163 | 164 | Semi-Streaming ``join`` 165 | ----------------------- 166 | 167 | We register multiple datasets together with `join`_. Consider a second 168 | dataset storing addresses by ID 169 | 170 | .. code:: 171 | 172 | >>> addresses = [(1, '123 Main Street'), # id, address 173 | ... (2, '5 Adams Way'), 174 | ... (5, '34 Rue St Michel')] 175 | 176 | We can join this dataset against our accounts dataset by specifying attributes 177 | which register different elements with each other; in this case they share a 178 | common first column, id. 179 | 180 | .. code:: 181 | 182 | SELECT accounts.name, addresses.address 183 | FROM accounts, addresses 184 | WHERE accounts.id = addresses.id; 185 | 186 | 187 | .. code:: 188 | 189 | >>> from toolz import join, first, second 190 | 191 | >>> result = join(first, accounts, 192 | ... first, addresses) 193 | 194 | >>> for ((id, name, bal, gender), (id, address)) in result: 195 | ... print((name, address)) 196 | ('Alice', '123 Main Street') 197 | ('Bob', '5 Adams Way') 198 | ('Edith', '34 Rue St Michel') 199 | 200 | Join takes four main arguments, a left and right key function and a left 201 | and right sequence. It returns a sequence of pairs of matching items. In our 202 | case the return value of ``join`` is a sequence of pairs of tuples such that the 203 | first element of each tuple (the ID) is the same. In the example above we 204 | unpack this pair of tuples to get the fields that we want (``name`` and 205 | ``address``) from the result. 206 | 207 | 208 | Join on arbitrary functions / data 209 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 210 | 211 | Those familiar with SQL are accustomed to this kind of join on columns. 212 | However a functional join is more general than this; it doesn't need to operate 213 | on tuples, and key functions do not need to get particular columns. In the 214 | example below we match numbers from two collections so that exactly one is even 215 | and one is odd. 216 | 217 | .. code:: 218 | 219 | >>> def iseven(x): 220 | ... return x % 2 == 0 221 | >>> def isodd(x): 222 | ... return x % 2 == 1 223 | 224 | >>> list(join(iseven, [1, 2, 3, 4], 225 | ... isodd, [7, 8, 9])) 226 | [(2, 7), (4, 7), (1, 8), (3, 8), (2, 9), (4, 9)] 227 | 228 | 229 | Semi-Streaming Join 230 | ^^^^^^^^^^^^^^^^^^^ 231 | 232 | The Toolz Join operation fully evaluates the *left* sequence and streams the 233 | *right* sequence through memory. Thus, if streaming support is desired the 234 | larger of the two sequences should always occupy the right side of the join. 235 | 236 | 237 | Algorithmic Details 238 | ^^^^^^^^^^^^^^^^^^^ 239 | 240 | The semi-streaming join operation in ``toolz`` is asymptotically optimal. 241 | Computationally it is linear in the size of the input + output. In terms of 242 | storage the left sequence must fit in memory but the right sequence is free to 243 | stream. 244 | 245 | The results are not normalized, as in SQL, in that they permit repeated values. If 246 | normalization is desired, consider composing with the function ``unique`` (note 247 | that ``unique`` is not fully streaming.) 248 | 249 | 250 | More Complex Example 251 | ^^^^^^^^^^^^^^^^^^^^ 252 | 253 | The accounts example above connects two one-to-one relationships, ``accounts`` 254 | and ``addresses``; there was exactly one name per ID and one address per ID. 255 | This need not be the case. The join abstraction is sufficiently flexible to 256 | join one-to-many or even many-to-many relationships. The following example 257 | finds city/person pairs where that person has a friend who has a residence in 258 | that city. This is an example of joining two many-to-many relationships, 259 | because a person may have many friends and because a friend may have many 260 | residences. 261 | 262 | 263 | .. code:: 264 | 265 | >>> friends = [('Alice', 'Edith'), 266 | ... ('Alice', 'Zhao'), 267 | ... ('Edith', 'Alice'), 268 | ... ('Zhao', 'Alice'), 269 | ... ('Zhao', 'Edith')] 270 | 271 | >>> cities = [('Alice', 'NYC'), 272 | ... ('Alice', 'Chicago'), 273 | ... ('Dan', 'Syndey'), 274 | ... ('Edith', 'Paris'), 275 | ... ('Edith', 'Berlin'), 276 | ... ('Zhao', 'Shanghai')] 277 | 278 | >>> # Vacation opportunities 279 | >>> # In what cities do people have friends? 280 | >>> result = join(second, friends, 281 | ... first, cities) 282 | >>> for ((name, friend), (friend, city)) in sorted(unique(result)): 283 | ... print((name, city)) 284 | ('Alice', 'Berlin') 285 | ('Alice', 'Paris') 286 | ('Alice', 'Shanghai') 287 | ('Edith', 'Chicago') 288 | ('Edith', 'NYC') 289 | ('Zhao', 'Chicago') 290 | ('Zhao', 'NYC') 291 | ('Zhao', 'Berlin') 292 | ('Zhao', 'Paris') 293 | 294 | Join is computationally powerful: 295 | 296 | * It is expressive enough to cover a wide set of analytics operations 297 | * It runs in linear time relative to the size of the input and output 298 | * Only the left sequence must fit in memory 299 | 300 | 301 | Disclaimer 302 | ---------- 303 | 304 | Toolz is a general purpose functional standard library, not a library 305 | specifically for data analytics. While there are obvious benefits (streaming, 306 | composition, ...) users interested in data analytics might be better served by 307 | using projects specific to data analytics like Pandas_ or SQLAlchemy. 308 | 309 | 310 | .. _groupby: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.groupby 311 | .. _join: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.join 312 | .. _reduceby: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.reduceby 313 | .. _valmap: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.valmap 314 | .. _Pandas: http://pandas.pydata.org/pandas-docs/stable/groupby.html 315 | .. _curried: http://toolz.readthedocs.org/en/latest/curry.html 316 | -------------------------------------------------------------------------------- /doc/source/tips-and-tricks.rst: -------------------------------------------------------------------------------- 1 | Tips and Tricks 2 | =============== 3 | 4 | Toolz functions can be combined to make functions that, while common, aren't 5 | a part of toolz's standard library. This section presents 6 | a few of these recipes. 7 | 8 | 9 | * .. function:: pick(whitelist, dictionary) 10 | 11 | Return a subset of the provided dictionary with keys contained in the 12 | whitelist. 13 | 14 | :: 15 | 16 | from toolz import keyfilter 17 | 18 | def pick(whitelist, d): 19 | return keyfilter(lambda k: k in whitelist, d) 20 | 21 | 22 | Example: 23 | 24 | >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4} 25 | >>> pick(['a', 'b'], alphabet) 26 | {'a': 1, 'b': 2} 27 | 28 | 29 | * .. function:: omit(blacklist, dictionary) 30 | 31 | Return a subset of the provided dictionary with keys *not* contained in the 32 | blacklist. 33 | 34 | :: 35 | 36 | from toolz import keyfilter 37 | 38 | def omit(blacklist, d): 39 | return keyfilter(lambda k: k not in blacklist, d) 40 | 41 | 42 | Example: 43 | 44 | >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4} 45 | >>> omit(['a', 'b'], alphabet) 46 | {'c': 3, 'd': 4} 47 | 48 | 49 | * .. function:: compact(iterable) 50 | 51 | Filter an iterable on "truthy" values. 52 | 53 | :: 54 | 55 | from toolz import filter 56 | 57 | def compact(iter): 58 | return filter(None, iter) 59 | 60 | 61 | Example: 62 | 63 | >>> results = [0, 1, 2, None, 3, False] 64 | >>> list(compact(results)) 65 | [1, 2, 3] 66 | 67 | * .. function:: keyjoin(leftkey, leftseq, rightkey, rightseq) 68 | 69 | Inner join two sequences of dictionaries on specified keys, merging matches with right value 70 | precedence. 71 | 72 | :: 73 | 74 | from itertools import starmap 75 | from toolz import join, merge 76 | 77 | def keyjoin(leftkey, leftseq, rightkey, rightseq): 78 | return starmap(merge, join(leftkey, leftseq, rightkey, rightseq)) 79 | 80 | 81 | Example: 82 | 83 | >>> people = [{'id': 0, 'name': 'Anonymous Guy', 'location': 'Unknown'}, 84 | {'id': 1, 'name': 'Karan', 'location': 'San Francisco'}, 85 | {'id': 2, 'name': 'Matthew', 'location': 'Oakland'}] 86 | >>> hobbies = [{'person_id': 1, 'hobby': 'Tennis'}, 87 | {'person_id': 1, 'hobby': 'Acting'}, 88 | {'person_id': 2, 'hobby': 'Biking'}] 89 | >>> list(keyjoin('id', people, 'person_id', hobbies)) 90 | [{'hobby': 'Tennis', 91 | 'id': 1, 92 | 'location': 'San Francisco', 93 | 'name': 'Karan', 94 | 'person_id': 1}, 95 | {'hobby': 'Acting', 96 | 'id': 1, 97 | 'location': 'San Francisco', 98 | 'name': 'Karan', 99 | 'person_id': 1}, 100 | {'hobby': 'Biking', 101 | 'id': 2, 102 | 'location': 'Oakland', 103 | 'name': 'Matthew', 104 | 'person_id': 2}] 105 | 106 | * .. function:: areidentical(\*seqs) 107 | 108 | Determine if sequences are identical element-wise. 109 | This lazily evaluates the sequences and stops as soon as the result 110 | is determined. 111 | 112 | :: 113 | 114 | from toolz import diff 115 | 116 | def areidentical(*seqs): 117 | return not any(diff(*seqs, default=object())) 118 | 119 | 120 | Example: 121 | 122 | >>> areidentical([1, 2, 3], (1, 2, 3)) 123 | True 124 | 125 | >>> areidentical([1, 2, 3], [1, 2]) 126 | False 127 | -------------------------------------------------------------------------------- /examples/fib.py: -------------------------------------------------------------------------------- 1 | # / 0 if i is 0 2 | # fib(i) = | 1 if i is 1 3 | # \ fib(i - 1) + fib(i - 2) otherwise 4 | 5 | 6 | def fib(n): 7 | """ Imperative definition of Fibonacci numbers """ 8 | a, b = 0, 1 9 | for i in range(n): 10 | a, b = b, a + b 11 | return a 12 | 13 | 14 | # This is intuitive but VERY slow 15 | def fib(n): 16 | """ Functional definition of Fibonacci numbers """ 17 | if n == 0 or n == 1: 18 | return n 19 | else: 20 | return fib(n - 1) + fib(n - 2) 21 | 22 | from toolz import memoize 23 | 24 | # Oh wait, it's fast again 25 | fib = memoize(fib) 26 | 27 | 28 | # Provide a cache with initial values to `memoize` 29 | @memoize(cache={0: 0, 1: 1}) 30 | def fib(n): 31 | """ Functional definition of Fibonacci numbers with initial terms cached. 32 | 33 | fib(0) == 0 34 | fib(1) == 1 35 | ... 36 | fib(n) == fib(n - 1) + fib(n - 2) 37 | """ 38 | return fib(n - 1) + fib(n - 2) 39 | -------------------------------------------------------------------------------- /examples/graph.py: -------------------------------------------------------------------------------- 1 | from toolz.curried import * 2 | a, b, c, d, e, f, g = 'abcdefg' 3 | 4 | edges = [(a, b), (b, a), (a, c), (a, d), (d, a), (d, e), (e, f), (d, f), 5 | (f, d), (d, g), (e, g)] 6 | 7 | 8 | out_degrees = countby(first, edges) 9 | # {'a': 3, 'b': 1, 'd': 4, 'e': 2, 'f': 1} 10 | 11 | in_degrees = countby(second, edges) 12 | # {'a': 2, 'b': 1, 'c': 1, 'd': 2, 'e': 1, 'f': 2, 'g': 2} 13 | 14 | 15 | out_neighbors = valmap(comp(tuple, map(second)), 16 | groupby(first, edges)) 17 | # {'a': ('b', 'c', 'd'), 18 | # 'b': ('a',), 19 | # 'd': ('a', 'e', 'f', 'g'), 20 | # 'e': ('f', 'g'), 21 | # 'f': ('d',)} 22 | 23 | in_neighbors = valmap(comp(tuple, map(first)), 24 | groupby(second, edges)) 25 | # {'a': ('b', 'd'), 26 | # 'b': ('a',), 27 | # 'c': ('a',), 28 | # 'd': ('a', 'f'), 29 | # 'e': ('d',), 30 | # 'f': ('e', 'd'), 31 | # 'g': ('d', 'e')} 32 | -------------------------------------------------------------------------------- /examples/wordcount.py: -------------------------------------------------------------------------------- 1 | from toolz import * 2 | 3 | 4 | def stem(word): 5 | """ Stem word to primitive form """ 6 | return word.lower().rstrip(",.!:;'-\"").lstrip("'\"") 7 | 8 | wordcount = comp(frequencies, partial(map, stem), str.split) 9 | 10 | if __name__ == '__main__': 11 | print(wordcount("This cat jumped over this other cat!")) 12 | # prints {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1} 13 | -------------------------------------------------------------------------------- /release-notes: -------------------------------------------------------------------------------- 1 | New in 0.4.2 2 | 3 | Removed intersection 4 | 5 | 6 | New in 0.5.3 7 | 8 | * get_in function 9 | * add itervalues, iterkeys, iteritems to compatibility 10 | * Add do function, remove side_effects from sandbox 11 | * Add juxt, partner to map 12 | * Performance improvements to merge_with 13 | * Errors from curried functions propagate upwards 14 | * keyfilter, valfilter 15 | * do 16 | 17 | New Authors: 18 | 19 | Graeme Coupar, @obmarg 20 | 21 | 22 | New in 0.6.0 23 | 24 | * memoize is curried by default 25 | * memoize support `key` keyword argument 26 | * Cleaned up issues in curried namespace 27 | * Unary functions memoize with just the single argument, not a tuple 28 | * Flattened directory structure 29 | * Add `pluck` function from underscore.js 30 | * Remove `sandbox.jackknife` 31 | 32 | 33 | New in 0.6.1 34 | 35 | 36 | * Python 3.4 support 37 | * New `join` operation 38 | * `join`, `groupby`, ... accept non-callable key functions. 39 | * Many speed improvements: 40 | * Cache method lookup 41 | * Faster `merge_sorted` without key 42 | * An additional round of tuning on `groupby` 43 | * Toolz builds on binstar build under mrocklin channel 44 | * Avoid generators, favor map. Assists in debugging. 45 | * Cleaner `curry` implementation 46 | * Fix serialization issues for `juxt`, `complement` 47 | * `reduceby` no longer requires `default` keyword argument 48 | * Fix bug in `get` where `get([1], coll)` used to return element rather than 49 | length-one tuple 50 | * `EqualityHashKey` added to sandbox 51 | * `juxt` returns a tuple, not a generator 52 | 53 | 54 | New Authors: 55 | 56 | Leonid Shvechikov, José Ricardo, Lars Buitinck, Tom Prince 57 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from os.path import exists 4 | from setuptools import setup 5 | import toolz 6 | 7 | setup(name='toolz', 8 | version=toolz.__version__, 9 | description='List processing tools and functional utilities', 10 | url='http://github.com/pytoolz/toolz/', 11 | author='https://raw.github.com/pytoolz/toolz/master/AUTHORS.md', 12 | maintainer='Matthew Rocklin', 13 | maintainer_email='mrocklin@gmail.com', 14 | license='BSD', 15 | keywords='functional utility itertools functools', 16 | packages=['toolz', 17 | 'toolz.sandbox', 18 | 'toolz.curried'], 19 | package_data={'toolz': ['tests/*.py']}, 20 | long_description=(open('README.rst').read() if exists('README.rst') 21 | else ''), 22 | zip_safe=False) 23 | -------------------------------------------------------------------------------- /toolz/__init__.py: -------------------------------------------------------------------------------- 1 | from .itertoolz import * 2 | 3 | from .functoolz import * 4 | 5 | from .dicttoolz import * 6 | 7 | from .recipes import * 8 | 9 | from .compatibility import map, filter 10 | 11 | from . import sandbox 12 | 13 | from functools import partial, reduce 14 | 15 | sorted = sorted 16 | 17 | # Aliases 18 | comp = compose 19 | 20 | __version__ = '0.7.4' 21 | -------------------------------------------------------------------------------- /toolz/compatibility.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import sys 3 | PY3 = sys.version_info[0] > 2 4 | 5 | __all__ = ('PY3', 'map', 'filter', 'range', 'zip', 'reduce', 'zip_longest', 6 | 'iteritems', 'iterkeys', 'itervalues', 'filterfalse') 7 | 8 | if PY3: 9 | map = map 10 | filter = filter 11 | range = range 12 | zip = zip 13 | from functools import reduce 14 | from itertools import zip_longest 15 | from itertools import filterfalse 16 | iteritems = operator.methodcaller('items') 17 | iterkeys = operator.methodcaller('keys') 18 | itervalues = operator.methodcaller('values') 19 | else: 20 | range = xrange 21 | reduce = reduce 22 | from itertools import imap as map 23 | from itertools import ifilter as filter 24 | from itertools import ifilterfalse as filterfalse 25 | from itertools import izip as zip 26 | from itertools import izip_longest as zip_longest 27 | iteritems = operator.methodcaller('iteritems') 28 | iterkeys = operator.methodcaller('iterkeys') 29 | itervalues = operator.methodcaller('itervalues') 30 | -------------------------------------------------------------------------------- /toolz/curried/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Alternate namespece for toolz such that all functions are curried 3 | 4 | Currying provides implicit partial evaluation of all functions 5 | 6 | Example: 7 | 8 | Get usually requires two arguments, an index and a collection 9 | >>> from toolz.curried import get 10 | >>> get(0, ('a', 'b')) 11 | 'a' 12 | 13 | When we use it in higher order functions we often want to pass a partially 14 | evaluated form 15 | >>> data = [(1, 2), (11, 22), (111, 222)] 16 | >>> list(map(lambda seq: get(0, seq), data)) 17 | [1, 11, 111] 18 | 19 | The curried version allows simple expression of partial evaluation 20 | >>> list(map(get(0), data)) 21 | [1, 11, 111] 22 | 23 | See Also: 24 | toolz.functoolz.curry 25 | """ 26 | import inspect 27 | 28 | from . import exceptions 29 | from . import operator 30 | import toolz 31 | 32 | 33 | def _nargs(f): 34 | try: 35 | return len(inspect.getargspec(f).args) 36 | except TypeError: 37 | return 0 38 | 39 | 40 | def _should_curry(f): 41 | do_curry = frozenset((toolz.map, toolz.filter, toolz.sorted, toolz.reduce)) 42 | return (callable(f) and _nargs(f) > 1 or f in do_curry) 43 | 44 | 45 | def _curry_namespace(ns): 46 | return dict( 47 | (name, toolz.curry(f) if _should_curry(f) else f) 48 | for name, f in ns.items() if '__' not in name 49 | ) 50 | 51 | 52 | locals().update(toolz.merge( 53 | _curry_namespace(vars(toolz)), 54 | _curry_namespace(vars(exceptions)), 55 | )) 56 | 57 | # Clean up the namespace. 58 | del _nargs 59 | del _should_curry 60 | del exceptions 61 | del toolz 62 | -------------------------------------------------------------------------------- /toolz/curried/exceptions.py: -------------------------------------------------------------------------------- 1 | import toolz 2 | 3 | 4 | __all__ = ['merge_with', 'merge'] 5 | 6 | 7 | @toolz.curry 8 | def merge_with(fn, *dicts, **kwargs): 9 | if len(dicts) == 0: 10 | raise TypeError() 11 | else: 12 | return toolz.merge_with(fn, *dicts, **kwargs) 13 | 14 | 15 | @toolz.curry 16 | def merge(*dicts, **kwargs): 17 | if len(dicts) == 0: 18 | raise TypeError() 19 | else: 20 | return toolz.merge(*dicts, **kwargs) 21 | 22 | merge_with.__doc__ = toolz.merge_with.__doc__ 23 | merge.__doc__ = toolz.merge.__doc__ 24 | -------------------------------------------------------------------------------- /toolz/curried/operator.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import operator 4 | 5 | from toolz import curry 6 | 7 | 8 | # We use a blacklist instead of whitelist because: 9 | # 1. We have more things to include than exclude. 10 | # 2. This gives us access to things like matmul iff we are in Python >=3.5. 11 | no_curry = frozenset(( 12 | 'abs', 13 | 'index', 14 | 'inv', 15 | 'invert', 16 | 'neg', 17 | 'not_', 18 | 'pos', 19 | 'truth', 20 | )) 21 | 22 | locals().update( 23 | dict((name, curry(f) if name not in no_curry else f) 24 | for name, f in vars(operator).items() if callable(f)), 25 | ) 26 | 27 | # Clean up the namespace. 28 | del curry 29 | del no_curry 30 | del operator 31 | -------------------------------------------------------------------------------- /toolz/dicttoolz.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import operator 3 | from toolz.compatibility import (map, zip, iteritems, iterkeys, itervalues, 4 | reduce) 5 | 6 | __all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap', 7 | 'valfilter', 'keyfilter', 'itemfilter', 8 | 'assoc', 'dissoc', 'update_in', 'get_in') 9 | 10 | 11 | def _get_factory(f, kwargs): 12 | factory = kwargs.pop('factory', dict) 13 | if kwargs: 14 | raise TypeError("{0}() got an unexpected keyword argument " 15 | "'{1}'".format(f.__name__, kwargs.popitem()[0])) 16 | return factory 17 | 18 | 19 | def merge(*dicts, **kwargs): 20 | """ Merge a collection of dictionaries 21 | 22 | >>> merge({1: 'one'}, {2: 'two'}) 23 | {1: 'one', 2: 'two'} 24 | 25 | Later dictionaries have precedence 26 | 27 | >>> merge({1: 2, 3: 4}, {3: 3, 4: 4}) 28 | {1: 2, 3: 3, 4: 4} 29 | 30 | See Also: 31 | merge_with 32 | """ 33 | if len(dicts) == 1 and not isinstance(dicts[0], dict): 34 | dicts = dicts[0] 35 | factory = _get_factory(merge, kwargs) 36 | 37 | rv = factory() 38 | for d in dicts: 39 | rv.update(d) 40 | return rv 41 | 42 | 43 | def merge_with(func, *dicts, **kwargs): 44 | """ Merge dictionaries and apply function to combined values 45 | 46 | A key may occur in more than one dict, and all values mapped from the key 47 | will be passed to the function as a list, such as func([val1, val2, ...]). 48 | 49 | >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20}) 50 | {1: 11, 2: 22} 51 | 52 | >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30}) # doctest: +SKIP 53 | {1: 1, 2: 2, 3: 30} 54 | 55 | See Also: 56 | merge 57 | """ 58 | if len(dicts) == 1 and not isinstance(dicts[0], dict): 59 | dicts = dicts[0] 60 | factory = _get_factory(merge_with, kwargs) 61 | 62 | result = factory() 63 | for d in dicts: 64 | for k, v in iteritems(d): 65 | if k not in result: 66 | result[k] = [v] 67 | else: 68 | result[k].append(v) 69 | return valmap(func, result, factory) 70 | 71 | 72 | def valmap(func, d, factory=dict): 73 | """ Apply function to values of dictionary 74 | 75 | >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} 76 | >>> valmap(sum, bills) # doctest: +SKIP 77 | {'Alice': 65, 'Bob': 45} 78 | 79 | See Also: 80 | keymap 81 | itemmap 82 | """ 83 | rv = factory() 84 | rv.update(zip(iterkeys(d), map(func, itervalues(d)))) 85 | return rv 86 | 87 | 88 | def keymap(func, d, factory=dict): 89 | """ Apply function to keys of dictionary 90 | 91 | >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]} 92 | >>> keymap(str.lower, bills) # doctest: +SKIP 93 | {'alice': [20, 15, 30], 'bob': [10, 35]} 94 | 95 | See Also: 96 | valmap 97 | itemmap 98 | """ 99 | rv = factory() 100 | rv.update(zip(map(func, iterkeys(d)), itervalues(d))) 101 | return rv 102 | 103 | 104 | def itemmap(func, d, factory=dict): 105 | """ Apply function to items of dictionary 106 | 107 | >>> accountids = {"Alice": 10, "Bob": 20} 108 | >>> itemmap(reversed, accountids) # doctest: +SKIP 109 | {10: "Alice", 20: "Bob"} 110 | 111 | See Also: 112 | keymap 113 | valmap 114 | """ 115 | rv = factory() 116 | rv.update(map(func, iteritems(d))) 117 | return rv 118 | 119 | 120 | def valfilter(predicate, d, factory=dict): 121 | """ Filter items in dictionary by value 122 | 123 | >>> iseven = lambda x: x % 2 == 0 124 | >>> d = {1: 2, 2: 3, 3: 4, 4: 5} 125 | >>> valfilter(iseven, d) 126 | {1: 2, 3: 4} 127 | 128 | See Also: 129 | keyfilter 130 | itemfilter 131 | valmap 132 | """ 133 | rv = factory() 134 | for k, v in iteritems(d): 135 | if predicate(v): 136 | rv[k] = v 137 | return rv 138 | 139 | 140 | def keyfilter(predicate, d, factory=dict): 141 | """ Filter items in dictionary by key 142 | 143 | >>> iseven = lambda x: x % 2 == 0 144 | >>> d = {1: 2, 2: 3, 3: 4, 4: 5} 145 | >>> keyfilter(iseven, d) 146 | {2: 3, 4: 5} 147 | 148 | See Also: 149 | valfilter 150 | itemfilter 151 | keymap 152 | """ 153 | rv = factory() 154 | for k, v in iteritems(d): 155 | if predicate(k): 156 | rv[k] = v 157 | return rv 158 | 159 | 160 | def itemfilter(predicate, d, factory=dict): 161 | """ Filter items in dictionary by item 162 | 163 | >>> def isvalid(item): 164 | ... k, v = item 165 | ... return k % 2 == 0 and v < 4 166 | 167 | >>> d = {1: 2, 2: 3, 3: 4, 4: 5} 168 | >>> itemfilter(isvalid, d) 169 | {2: 3} 170 | 171 | See Also: 172 | keyfilter 173 | valfilter 174 | itemmap 175 | """ 176 | rv = factory() 177 | for item in iteritems(d): 178 | if predicate(item): 179 | k, v = item 180 | rv[k] = v 181 | return rv 182 | 183 | 184 | def assoc(d, key, value, factory=dict): 185 | """ 186 | Return a new dict with new key value pair 187 | 188 | New dict has d[key] set to value. Does not modify the initial dictionary. 189 | 190 | >>> assoc({'x': 1}, 'x', 2) 191 | {'x': 2} 192 | >>> assoc({'x': 1}, 'y', 3) # doctest: +SKIP 193 | {'x': 1, 'y': 3} 194 | """ 195 | d2 = factory() 196 | d2[key] = value 197 | return merge(d, d2, factory=factory) 198 | 199 | 200 | def dissoc(d, *keys): 201 | """ 202 | Return a new dict with the given key(s) removed. 203 | 204 | New dict has d[key] deleted for each supplied key. 205 | Does not modify the initial dictionary. 206 | 207 | >>> dissoc({'x': 1, 'y': 2}, 'y') 208 | {'x': 1} 209 | >>> dissoc({'x': 1, 'y': 2}, 'y', 'x') 210 | {} 211 | >>> dissoc({'x': 1}, 'y') # Ignores missing keys 212 | {'x': 1} 213 | """ 214 | d2 = copy.copy(d) 215 | for key in keys: 216 | if key in d2: 217 | del d2[key] 218 | return d2 219 | 220 | 221 | def update_in(d, keys, func, default=None, factory=dict): 222 | """ Update value in a (potentially) nested dictionary 223 | 224 | inputs: 225 | d - dictionary on which to operate 226 | keys - list or tuple giving the location of the value to be changed in d 227 | func - function to operate on that value 228 | 229 | If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the 230 | original dictionary with v replaced by func(v), but does not mutate the 231 | original dictionary. 232 | 233 | If k0 is not a key in d, update_in creates nested dictionaries to the depth 234 | specified by the keys, with the innermost value set to func(default). 235 | 236 | >>> inc = lambda x: x + 1 237 | >>> update_in({'a': 0}, ['a'], inc) 238 | {'a': 1} 239 | 240 | >>> transaction = {'name': 'Alice', 241 | ... 'purchase': {'items': ['Apple', 'Orange'], 242 | ... 'costs': [0.50, 1.25]}, 243 | ... 'credit card': '5555-1234-1234-1234'} 244 | >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP 245 | {'credit card': '5555-1234-1234-1234', 246 | 'name': 'Alice', 247 | 'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}} 248 | 249 | >>> # updating a value when k0 is not in d 250 | >>> update_in({}, [1, 2, 3], str, default="bar") 251 | {1: {2: {3: 'bar'}}} 252 | >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0) 253 | {1: 'foo', 2: {3: {4: 1}}} 254 | """ 255 | assert len(keys) > 0 256 | k, ks = keys[0], keys[1:] 257 | if ks: 258 | return assoc(d, k, update_in(d[k] if (k in d) else factory(), 259 | ks, func, default, factory), 260 | factory) 261 | else: 262 | innermost = func(d[k]) if (k in d) else func(default) 263 | return assoc(d, k, innermost, factory) 264 | 265 | 266 | def get_in(keys, coll, default=None, no_default=False): 267 | """ 268 | Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys. 269 | 270 | If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless 271 | ``no_default`` is specified, then it raises KeyError or IndexError. 272 | 273 | ``get_in`` is a generalization of ``operator.getitem`` for nested data 274 | structures such as dictionaries and lists. 275 | 276 | >>> transaction = {'name': 'Alice', 277 | ... 'purchase': {'items': ['Apple', 'Orange'], 278 | ... 'costs': [0.50, 1.25]}, 279 | ... 'credit card': '5555-1234-1234-1234'} 280 | >>> get_in(['purchase', 'items', 0], transaction) 281 | 'Apple' 282 | >>> get_in(['name'], transaction) 283 | 'Alice' 284 | >>> get_in(['purchase', 'total'], transaction) 285 | >>> get_in(['purchase', 'items', 'apple'], transaction) 286 | >>> get_in(['purchase', 'items', 10], transaction) 287 | >>> get_in(['purchase', 'total'], transaction, 0) 288 | 0 289 | >>> get_in(['y'], {}, no_default=True) 290 | Traceback (most recent call last): 291 | ... 292 | KeyError: 'y' 293 | 294 | See Also: 295 | itertoolz.get 296 | operator.getitem 297 | """ 298 | try: 299 | return reduce(operator.getitem, keys, coll) 300 | except (KeyError, IndexError, TypeError): 301 | if no_default: 302 | raise 303 | return default 304 | -------------------------------------------------------------------------------- /toolz/functoolz.py: -------------------------------------------------------------------------------- 1 | from functools import reduce, partial 2 | import inspect 3 | import operator 4 | import sys 5 | 6 | 7 | __all__ = ('identity', 'thread_first', 'thread_last', 'memoize', 'compose', 8 | 'pipe', 'complement', 'juxt', 'do', 'curry', 'flip') 9 | 10 | 11 | def identity(x): 12 | """ Identity function. Return x 13 | 14 | >>> identity(3) 15 | 3 16 | """ 17 | return x 18 | 19 | 20 | def thread_first(val, *forms): 21 | """ Thread value through a sequence of functions/forms 22 | 23 | >>> def double(x): return 2*x 24 | >>> def inc(x): return x + 1 25 | >>> thread_first(1, inc, double) 26 | 4 27 | 28 | If the function expects more than one input you can specify those inputs 29 | in a tuple. The value is used as the first input. 30 | 31 | >>> def add(x, y): return x + y 32 | >>> def pow(x, y): return x**y 33 | >>> thread_first(1, (add, 4), (pow, 2)) # pow(add(1, 4), 2) 34 | 25 35 | 36 | So in general 37 | thread_first(x, f, (g, y, z)) 38 | expands to 39 | g(f(x), y, z) 40 | 41 | See Also: 42 | thread_last 43 | """ 44 | def evalform_front(val, form): 45 | if callable(form): 46 | return form(val) 47 | if isinstance(form, tuple): 48 | func, args = form[0], form[1:] 49 | args = (val,) + args 50 | return func(*args) 51 | return reduce(evalform_front, forms, val) 52 | 53 | 54 | def thread_last(val, *forms): 55 | """ Thread value through a sequence of functions/forms 56 | 57 | >>> def double(x): return 2*x 58 | >>> def inc(x): return x + 1 59 | >>> thread_last(1, inc, double) 60 | 4 61 | 62 | If the function expects more than one input you can specify those inputs 63 | in a tuple. The value is used as the last input. 64 | 65 | >>> def add(x, y): return x + y 66 | >>> def pow(x, y): return x**y 67 | >>> thread_last(1, (add, 4), (pow, 2)) # pow(2, add(4, 1)) 68 | 32 69 | 70 | So in general 71 | thread_last(x, f, (g, y, z)) 72 | expands to 73 | g(y, z, f(x)) 74 | 75 | >>> def iseven(x): 76 | ... return x % 2 == 0 77 | >>> list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) 78 | [2, 4] 79 | 80 | See Also: 81 | thread_first 82 | """ 83 | def evalform_back(val, form): 84 | if callable(form): 85 | return form(val) 86 | if isinstance(form, tuple): 87 | func, args = form[0], form[1:] 88 | args = args + (val,) 89 | return func(*args) 90 | return reduce(evalform_back, forms, val) 91 | 92 | 93 | # This is a kludge for Python 3.4.0 support 94 | # currently len(inspect.getargspec(map).args) == 0, a wrong result. 95 | # As this is fixed in future versions then hopefully this kludge can be 96 | # removed. 97 | known_numargs = {map: 2, filter: 2, reduce: 2} 98 | 99 | 100 | def _num_required_args(func): 101 | """ Number of args for func 102 | 103 | >>> def foo(a, b, c=None): 104 | ... return a + b + c 105 | 106 | >>> _num_required_args(foo) 107 | 2 108 | 109 | >>> def bar(*args): 110 | ... return sum(args) 111 | 112 | >>> print(_num_required_args(bar)) 113 | None 114 | """ 115 | if func in known_numargs: 116 | return known_numargs[func] 117 | try: 118 | spec = inspect.getargspec(func) 119 | if spec.varargs: 120 | return None 121 | num_defaults = len(spec.defaults) if spec.defaults else 0 122 | return len(spec.args) - num_defaults 123 | except TypeError: 124 | return None 125 | 126 | 127 | class curry(object): 128 | """ Curry a callable function 129 | 130 | Enables partial application of arguments through calling a function with an 131 | incomplete set of arguments. 132 | 133 | >>> def mul(x, y): 134 | ... return x * y 135 | >>> mul = curry(mul) 136 | 137 | >>> double = mul(2) 138 | >>> double(10) 139 | 20 140 | 141 | Also supports keyword arguments 142 | 143 | >>> @curry # Can use curry as a decorator 144 | ... def f(x, y, a=10): 145 | ... return a * (x + y) 146 | 147 | >>> add = f(a=1) 148 | >>> add(2, 3) 149 | 5 150 | 151 | See Also: 152 | toolz.curried - namespace of curried functions 153 | http://toolz.readthedocs.org/en/latest/curry.html 154 | """ 155 | def __init__(self, *args, **kwargs): 156 | if not args: 157 | raise TypeError('__init__() takes at least 2 arguments (1 given)') 158 | func, args = args[0], args[1:] 159 | if not callable(func): 160 | raise TypeError("Input must be callable") 161 | 162 | # curry- or functools.partial-like object? Unpack and merge arguments 163 | if (hasattr(func, 'func') 164 | and hasattr(func, 'args') 165 | and hasattr(func, 'keywords') 166 | and isinstance(func.args, tuple)): 167 | _kwargs = {} 168 | if func.keywords: 169 | _kwargs.update(func.keywords) 170 | _kwargs.update(kwargs) 171 | kwargs = _kwargs 172 | args = func.args + args 173 | func = func.func 174 | 175 | if kwargs: 176 | self._partial = partial(func, *args, **kwargs) 177 | else: 178 | self._partial = partial(func, *args) 179 | 180 | self.__doc__ = getattr(func, '__doc__', None) 181 | self.__name__ = getattr(func, '__name__', '') 182 | 183 | @property 184 | def func(self): 185 | return self._partial.func 186 | 187 | @property 188 | def args(self): 189 | return self._partial.args 190 | 191 | @property 192 | def keywords(self): 193 | return self._partial.keywords 194 | 195 | @property 196 | def func_name(self): 197 | return self.__name__ 198 | 199 | def __str__(self): 200 | return str(self.func) 201 | 202 | def __repr__(self): 203 | return repr(self.func) 204 | 205 | def __hash__(self): 206 | return hash((self.func, self.args, 207 | frozenset(self.keywords.items()) if self.keywords 208 | else None)) 209 | 210 | def __eq__(self, other): 211 | return (isinstance(other, curry) and self.func == other.func and 212 | self.args == other.args and self.keywords == other.keywords) 213 | 214 | def __ne__(self, other): 215 | return not self.__eq__(other) 216 | 217 | def __call__(self, *args, **kwargs): 218 | try: 219 | return self._partial(*args, **kwargs) 220 | except TypeError: 221 | # If there was a genuine TypeError 222 | required_args = _num_required_args(self.func) 223 | if (required_args is not None and 224 | len(args) + len(self.args) >= required_args): 225 | raise 226 | 227 | return curry(self._partial, *args, **kwargs) 228 | 229 | def __get__(self, instance, owner): 230 | if instance is None: 231 | return self 232 | return curry(self, instance) 233 | 234 | # pickle protocol because functools.partial objects can't be pickled 235 | def __getstate__(self): 236 | # dictoolz.keyfilter, I miss you! 237 | userdict = tuple((k, v) for k, v in self.__dict__.items() 238 | if k != '_partial') 239 | return self.func, self.args, self.keywords, userdict 240 | 241 | def __setstate__(self, state): 242 | func, args, kwargs, userdict = state 243 | self.__init__(func, *args, **(kwargs or {})) 244 | self.__dict__.update(userdict) 245 | 246 | 247 | def has_kwargs(f): 248 | """ Does a function have keyword arguments? 249 | 250 | >>> def f(x, y=0): 251 | ... return x + y 252 | 253 | >>> has_kwargs(f) 254 | True 255 | """ 256 | if sys.version_info[0] == 2: # pragma: no cover 257 | spec = inspect.getargspec(f) 258 | return bool(spec and (spec.keywords or spec.defaults)) 259 | if sys.version_info[0] == 3: # pragma: no cover 260 | spec = inspect.getfullargspec(f) 261 | return bool(spec.defaults) 262 | 263 | 264 | def isunary(f): 265 | """ Does a function have only a single argument? 266 | 267 | >>> def f(x): 268 | ... return x 269 | 270 | >>> isunary(f) 271 | True 272 | >>> isunary(lambda x, y: x + y) 273 | False 274 | """ 275 | try: 276 | if sys.version_info[0] == 2: # pragma: no cover 277 | spec = inspect.getargspec(f) 278 | if sys.version_info[0] == 3: # pragma: no cover 279 | spec = inspect.getfullargspec(f) 280 | return bool(spec and spec.varargs is None and not has_kwargs(f) 281 | and len(spec.args) == 1) 282 | except TypeError: # pragma: no cover 283 | return None # in Python < 3.4 builtins fail, return None 284 | 285 | 286 | @curry 287 | def memoize(func, cache=None, key=None): 288 | """ Cache a function's result for speedy future evaluation 289 | 290 | Considerations: 291 | Trades memory for speed. 292 | Only use on pure functions. 293 | 294 | >>> def add(x, y): return x + y 295 | >>> add = memoize(add) 296 | 297 | Or use as a decorator 298 | 299 | >>> @memoize 300 | ... def add(x, y): 301 | ... return x + y 302 | 303 | Use the ``cache`` keyword to provide a dict-like object as an initial cache 304 | 305 | >>> @memoize(cache={(1, 2): 3}) 306 | ... def add(x, y): 307 | ... return x + y 308 | 309 | Note that the above works as a decorator because ``memoize`` is curried. 310 | 311 | It is also possible to provide a ``key(args, kwargs)`` function that 312 | calculates keys used for the cache, which receives an ``args`` tuple and 313 | ``kwargs`` dict as input, and must return a hashable value. However, 314 | the default key function should be sufficient most of the time. 315 | 316 | >>> # Use key function that ignores extraneous keyword arguments 317 | >>> @memoize(key=lambda args, kwargs: args) 318 | ... def add(x, y, verbose=False): 319 | ... if verbose: 320 | ... print('Calculating %s + %s' % (x, y)) 321 | ... return x + y 322 | """ 323 | if cache is None: 324 | cache = {} 325 | 326 | try: 327 | may_have_kwargs = has_kwargs(func) 328 | # Is unary function (single arg, no variadic argument or keywords)? 329 | is_unary = isunary(func) 330 | except TypeError: # pragma: no cover 331 | may_have_kwargs = True 332 | is_unary = False 333 | 334 | def memof(*args, **kwargs): 335 | try: 336 | if key is not None: 337 | k = key(args, kwargs) 338 | elif is_unary: 339 | k = args[0] 340 | elif may_have_kwargs: 341 | k = (args or None, 342 | frozenset(kwargs.items()) if kwargs else None) 343 | else: 344 | k = args 345 | 346 | in_cache = k in cache 347 | except TypeError: 348 | raise TypeError("Arguments to memoized function must be hashable") 349 | 350 | if in_cache: 351 | return cache[k] 352 | else: 353 | result = func(*args, **kwargs) 354 | cache[k] = result 355 | return result 356 | 357 | try: 358 | memof.__name__ = func.__name__ 359 | except AttributeError: 360 | pass 361 | memof.__doc__ = func.__doc__ 362 | return memof 363 | 364 | 365 | class Compose(object): 366 | """ A composition of functions 367 | 368 | See Also: 369 | compose 370 | """ 371 | __slots__ = 'first', 'funcs' 372 | 373 | def __init__(self, funcs): 374 | funcs = tuple(reversed(funcs)) 375 | self.first = funcs[0] 376 | self.funcs = funcs[1:] 377 | 378 | def __call__(self, *args, **kwargs): 379 | ret = self.first(*args, **kwargs) 380 | for f in self.funcs: 381 | ret = f(ret) 382 | return ret 383 | 384 | def __getstate__(self): 385 | return self.first, self.funcs 386 | 387 | def __setstate__(self, state): 388 | self.first, self.funcs = state 389 | 390 | @property 391 | def __doc__(self): 392 | def composed_doc(*fs): 393 | """Generate a docstring for the composition of fs. 394 | """ 395 | if not fs: 396 | # Argument name for the docstring. 397 | return '*args, **kwargs' 398 | 399 | return '{f}({g})'.format(f=fs[0].__name__, g=composed_doc(*fs[1:])) 400 | 401 | try: 402 | return ( 403 | 'lambda *args, **kwargs: ' + 404 | composed_doc(*reversed((self.first,) + self.funcs)) 405 | ) 406 | except AttributeError: 407 | # One of our callables does not have a `__name__`, whatever. 408 | return 'A composition of functions' 409 | 410 | @property 411 | def __name__(self): 412 | try: 413 | return '_of_'.join( 414 | f.__name__ for f in reversed((self.first,) + self.funcs), 415 | ) 416 | except AttributeError: 417 | return type(self).__name__ 418 | 419 | 420 | def compose(*funcs): 421 | """ Compose functions to operate in series. 422 | 423 | Returns a function that applies other functions in sequence. 424 | 425 | Functions are applied from right to left so that 426 | ``compose(f, g, h)(x, y)`` is the same as ``f(g(h(x, y)))``. 427 | 428 | If no arguments are provided, the identity function (f(x) = x) is returned. 429 | 430 | >>> inc = lambda i: i + 1 431 | >>> compose(str, inc)(3) 432 | '4' 433 | 434 | See Also: 435 | pipe 436 | """ 437 | if not funcs: 438 | return identity 439 | if len(funcs) == 1: 440 | return funcs[0] 441 | else: 442 | return Compose(funcs) 443 | 444 | 445 | def pipe(data, *funcs): 446 | """ Pipe a value through a sequence of functions 447 | 448 | I.e. ``pipe(data, f, g, h)`` is equivalent to ``h(g(f(data)))`` 449 | 450 | We think of the value as progressing through a pipe of several 451 | transformations, much like pipes in UNIX 452 | 453 | ``$ cat data | f | g | h`` 454 | 455 | >>> double = lambda i: 2 * i 456 | >>> pipe(3, double, str) 457 | '6' 458 | 459 | See Also: 460 | compose 461 | thread_first 462 | thread_last 463 | """ 464 | for func in funcs: 465 | data = func(data) 466 | return data 467 | 468 | 469 | def complement(func): 470 | """ Convert a predicate function to its logical complement. 471 | 472 | In other words, return a function that, for inputs that normally 473 | yield True, yields False, and vice-versa. 474 | 475 | >>> def iseven(n): return n % 2 == 0 476 | >>> isodd = complement(iseven) 477 | >>> iseven(2) 478 | True 479 | >>> isodd(2) 480 | False 481 | """ 482 | return compose(operator.not_, func) 483 | 484 | 485 | class juxt(object): 486 | """ 487 | Creates a function that calls several functions with the same arguments. 488 | 489 | Takes several functions and returns a function that applies its arguments 490 | to each of those functions then returns a tuple of the results. 491 | 492 | Name comes from juxtaposition: the fact of two things being seen or placed 493 | close together with contrasting effect. 494 | 495 | >>> inc = lambda x: x + 1 496 | >>> double = lambda x: x * 2 497 | >>> juxt(inc, double)(10) 498 | (11, 20) 499 | >>> juxt([inc, double])(10) 500 | (11, 20) 501 | """ 502 | __slots__ = ['funcs'] 503 | 504 | def __init__(self, *funcs): 505 | if len(funcs) == 1 and not callable(funcs[0]): 506 | funcs = funcs[0] 507 | self.funcs = tuple(funcs) 508 | 509 | def __call__(self, *args, **kwargs): 510 | return tuple(func(*args, **kwargs) for func in self.funcs) 511 | 512 | def __getstate__(self): 513 | return self.funcs 514 | 515 | def __setstate__(self, state): 516 | self.funcs = state 517 | 518 | 519 | def do(func, x): 520 | """ Runs ``func`` on ``x``, returns ``x`` 521 | 522 | Because the results of ``func`` are not returned, only the side 523 | effects of ``func`` are relevant. 524 | 525 | Logging functions can be made by composing ``do`` with a storage function 526 | like ``list.append`` or ``file.write`` 527 | 528 | >>> from toolz import compose 529 | >>> from toolz.curried import do 530 | 531 | >>> log = [] 532 | >>> inc = lambda x: x + 1 533 | >>> inc = compose(inc, do(log.append)) 534 | >>> inc(1) 535 | 2 536 | >>> inc(11) 537 | 12 538 | >>> log 539 | [1, 11] 540 | 541 | """ 542 | func(x) 543 | return x 544 | 545 | 546 | @curry 547 | def flip(func, a, b): 548 | """Call the function call with the arguments flipped. 549 | 550 | This function is curried. 551 | 552 | >>> def div(a, b): 553 | ... return a / b 554 | ... 555 | >>> flip(div, 2, 1) 556 | 0.5 557 | >>> div_by_two = flip(div, 2) 558 | >>> div_by_two(4) 559 | 2.0 560 | 561 | This is particularly useful for built in functions and functions defined 562 | in C extensions that accept positional only arguments. For example: 563 | isinstance, issubclass. 564 | 565 | >>> data = [1, 'a', 'b', 2, 1.5, object(), 3] 566 | >>> only_ints = list(filter(flip(isinstance, int), data)) 567 | >>> only_ints 568 | [1, 2, 3] 569 | """ 570 | return func(b, a) 571 | -------------------------------------------------------------------------------- /toolz/itertoolz.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import heapq 3 | import collections 4 | import operator 5 | from functools import partial 6 | from toolz.compatibility import (map, filterfalse, zip, zip_longest, iteritems) 7 | from toolz.utils import no_default 8 | 9 | 10 | __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', 11 | 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', 12 | 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 13 | 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 14 | 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', 15 | 'join', 'tail', 'diff', 'topk', 'peek') 16 | 17 | 18 | def remove(predicate, seq): 19 | """ Return those items of sequence for which predicate(item) is False 20 | 21 | >>> def iseven(x): 22 | ... return x % 2 == 0 23 | >>> list(remove(iseven, [1, 2, 3, 4])) 24 | [1, 3] 25 | """ 26 | return filterfalse(predicate, seq) 27 | 28 | 29 | def accumulate(binop, seq, initial=no_default): 30 | """ Repeatedly apply binary function to a sequence, accumulating results 31 | 32 | >>> from operator import add, mul 33 | >>> list(accumulate(add, [1, 2, 3, 4, 5])) 34 | [1, 3, 6, 10, 15] 35 | >>> list(accumulate(mul, [1, 2, 3, 4, 5])) 36 | [1, 2, 6, 24, 120] 37 | 38 | Accumulate is similar to ``reduce`` and is good for making functions like 39 | cumulative sum: 40 | 41 | >>> from functools import partial, reduce 42 | >>> sum = partial(reduce, add) 43 | >>> cumsum = partial(accumulate, add) 44 | 45 | Accumulate also takes an optional argument that will be used as the first 46 | value. This is similar to reduce. 47 | 48 | >>> list(accumulate(add, [1, 2, 3], -1)) 49 | [-1, 0, 2, 5] 50 | >>> list(accumulate(add, [], 1)) 51 | [1] 52 | 53 | See Also: 54 | itertools.accumulate : In standard itertools for Python 3.2+ 55 | """ 56 | seq = iter(seq) 57 | result = next(seq) if initial is no_default else initial 58 | yield result 59 | for elem in seq: 60 | result = binop(result, elem) 61 | yield result 62 | 63 | 64 | def groupby(key, seq): 65 | """ Group a collection by a key function 66 | 67 | >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] 68 | >>> groupby(len, names) # doctest: +SKIP 69 | {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} 70 | 71 | >>> iseven = lambda x: x % 2 == 0 72 | >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP 73 | {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} 74 | 75 | Non-callable keys imply grouping on a member. 76 | 77 | >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, 78 | ... {'name': 'Bob', 'gender': 'M'}, 79 | ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP 80 | {'F': [{'gender': 'F', 'name': 'Alice'}], 81 | 'M': [{'gender': 'M', 'name': 'Bob'}, 82 | {'gender': 'M', 'name': 'Charlie'}]} 83 | 84 | See Also: 85 | countby 86 | """ 87 | if not callable(key): 88 | key = getter(key) 89 | d = collections.defaultdict(lambda: [].append) 90 | for item in seq: 91 | d[key(item)](item) 92 | rv = {} 93 | for k, v in iteritems(d): 94 | rv[k] = v.__self__ 95 | return rv 96 | 97 | 98 | def merge_sorted(*seqs, **kwargs): 99 | """ Merge and sort a collection of sorted collections 100 | 101 | This works lazily and only keeps one value from each iterable in memory. 102 | 103 | >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) 104 | [1, 2, 3, 4, 5, 6] 105 | 106 | >>> ''.join(merge_sorted('abc', 'abc', 'abc')) 107 | 'aaabbbccc' 108 | 109 | The "key" function used to sort the input may be passed as a keyword. 110 | 111 | >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) 112 | [2, 1, 3, 3] 113 | """ 114 | key = kwargs.get('key', None) 115 | if key is None: 116 | # heapq.merge does what we do below except by val instead of key(val) 117 | return heapq.merge(*seqs) 118 | else: 119 | return _merge_sorted_key(seqs, key) 120 | 121 | 122 | def _merge_sorted_key(seqs, key): 123 | # The commented code below shows an alternative (slower) implementation 124 | # to apply a key function for sorting. 125 | # 126 | # mapper = lambda i, item: (key(item), i, item) 127 | # keyiters = [map(partial(mapper, i), itr) for i, itr in 128 | # enumerate(seqs)] 129 | # return (item for (item_key, i, item) in heapq.merge(*keyiters)) 130 | 131 | # binary heap as a priority queue 132 | pq = [] 133 | 134 | # Initial population 135 | for itnum, it in enumerate(map(iter, seqs)): 136 | try: 137 | item = next(it) 138 | pq.append([key(item), itnum, item, it]) 139 | except StopIteration: 140 | pass 141 | heapq.heapify(pq) 142 | 143 | # Repeatedly yield and then repopulate from the same iterator 144 | heapreplace = heapq.heapreplace 145 | heappop = heapq.heappop 146 | while len(pq) > 1: 147 | try: 148 | while True: 149 | # raises IndexError when pq is empty 150 | _, itnum, item, it = s = pq[0] 151 | yield item 152 | item = next(it) # raises StopIteration when exhausted 153 | s[0] = key(item) 154 | s[2] = item 155 | heapreplace(pq, s) # restore heap condition 156 | except StopIteration: 157 | heappop(pq) # remove empty iterator 158 | if pq: 159 | # Much faster when only a single iterable remains 160 | _, itnum, item, it = pq[0] 161 | yield item 162 | for item in it: 163 | yield item 164 | 165 | 166 | def interleave(seqs, pass_exceptions=()): 167 | """ Interleave a sequence of sequences 168 | 169 | >>> list(interleave([[1, 2], [3, 4]])) 170 | [1, 3, 2, 4] 171 | 172 | >>> ''.join(interleave(('ABC', 'XY'))) 173 | 'AXBYC' 174 | 175 | Both the individual sequences and the sequence of sequences may be infinite 176 | 177 | Returns a lazy iterator 178 | """ 179 | iters = map(iter, seqs) 180 | while iters: 181 | newiters = [] 182 | for itr in iters: 183 | try: 184 | yield next(itr) 185 | newiters.append(itr) 186 | except (StopIteration,) + tuple(pass_exceptions): 187 | pass 188 | iters = newiters 189 | 190 | 191 | def unique(seq, key=None): 192 | """ Return only unique elements of a sequence 193 | 194 | >>> tuple(unique((1, 2, 3))) 195 | (1, 2, 3) 196 | >>> tuple(unique((1, 2, 1, 3))) 197 | (1, 2, 3) 198 | 199 | Uniqueness can be defined by key keyword 200 | 201 | >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) 202 | ('cat', 'mouse') 203 | """ 204 | seen = set() 205 | seen_add = seen.add 206 | if key is None: 207 | for item in seq: 208 | if item not in seen: 209 | seen_add(item) 210 | yield item 211 | else: # calculate key 212 | for item in seq: 213 | val = key(item) 214 | if val not in seen: 215 | seen_add(val) 216 | yield item 217 | 218 | 219 | def isiterable(x): 220 | """ Is x iterable? 221 | 222 | >>> isiterable([1, 2, 3]) 223 | True 224 | >>> isiterable('abc') 225 | True 226 | >>> isiterable(5) 227 | False 228 | """ 229 | try: 230 | iter(x) 231 | return True 232 | except TypeError: 233 | return False 234 | 235 | 236 | def isdistinct(seq): 237 | """ All values in sequence are distinct 238 | 239 | >>> isdistinct([1, 2, 3]) 240 | True 241 | >>> isdistinct([1, 2, 1]) 242 | False 243 | 244 | >>> isdistinct("Hello") 245 | False 246 | >>> isdistinct("World") 247 | True 248 | """ 249 | if iter(seq) is seq: 250 | seen = set() 251 | seen_add = seen.add 252 | for item in seq: 253 | if item in seen: 254 | return False 255 | seen_add(item) 256 | return True 257 | else: 258 | return len(seq) == len(set(seq)) 259 | 260 | 261 | def take(n, seq): 262 | """ The first n elements of a sequence 263 | 264 | >>> list(take(2, [10, 20, 30, 40, 50])) 265 | [10, 20] 266 | 267 | See Also: 268 | drop 269 | tail 270 | """ 271 | return itertools.islice(seq, n) 272 | 273 | 274 | def tail(n, seq): 275 | """ The last n elements of a sequence 276 | 277 | >>> tail(2, [10, 20, 30, 40, 50]) 278 | [40, 50] 279 | 280 | See Also: 281 | drop 282 | take 283 | """ 284 | try: 285 | return seq[-n:] 286 | except (TypeError, KeyError): 287 | return tuple(collections.deque(seq, n)) 288 | 289 | 290 | def drop(n, seq): 291 | """ The sequence following the first n elements 292 | 293 | >>> list(drop(2, [10, 20, 30, 40, 50])) 294 | [30, 40, 50] 295 | 296 | See Also: 297 | take 298 | tail 299 | """ 300 | return itertools.islice(seq, n, None) 301 | 302 | 303 | def take_nth(n, seq): 304 | """ Every nth item in seq 305 | 306 | >>> list(take_nth(2, [10, 20, 30, 40, 50])) 307 | [10, 30, 50] 308 | """ 309 | return itertools.islice(seq, 0, None, n) 310 | 311 | 312 | def first(seq): 313 | """ The first element in a sequence 314 | 315 | >>> first('ABC') 316 | 'A' 317 | """ 318 | return next(iter(seq)) 319 | 320 | 321 | def second(seq): 322 | """ The second element in a sequence 323 | 324 | >>> second('ABC') 325 | 'B' 326 | """ 327 | return next(itertools.islice(seq, 1, None)) 328 | 329 | 330 | def nth(n, seq): 331 | """ The nth element in a sequence 332 | 333 | >>> nth(1, 'ABC') 334 | 'B' 335 | """ 336 | if isinstance(seq, (tuple, list, collections.Sequence)): 337 | return seq[n] 338 | else: 339 | return next(itertools.islice(seq, n, None)) 340 | 341 | 342 | def last(seq): 343 | """ The last element in a sequence 344 | 345 | >>> last('ABC') 346 | 'C' 347 | """ 348 | return tail(1, seq)[0] 349 | 350 | 351 | rest = partial(drop, 1) 352 | 353 | 354 | def _get(ind, seq, default): 355 | try: 356 | return seq[ind] 357 | except (KeyError, IndexError): 358 | return default 359 | 360 | 361 | def get(ind, seq, default=no_default): 362 | """ Get element in a sequence or dict 363 | 364 | Provides standard indexing 365 | 366 | >>> get(1, 'ABC') # Same as 'ABC'[1] 367 | 'B' 368 | 369 | Pass a list to get multiple values 370 | 371 | >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) 372 | ('B', 'C') 373 | 374 | Works on any value that supports indexing/getitem 375 | For example here we see that it works with dictionaries 376 | 377 | >>> phonebook = {'Alice': '555-1234', 378 | ... 'Bob': '555-5678', 379 | ... 'Charlie':'555-9999'} 380 | >>> get('Alice', phonebook) 381 | '555-1234' 382 | 383 | >>> get(['Alice', 'Bob'], phonebook) 384 | ('555-1234', '555-5678') 385 | 386 | Provide a default for missing values 387 | 388 | >>> get(['Alice', 'Dennis'], phonebook, None) 389 | ('555-1234', None) 390 | 391 | See Also: 392 | pluck 393 | """ 394 | try: 395 | return seq[ind] 396 | except TypeError: # `ind` may be a list 397 | if isinstance(ind, list): 398 | if default is no_default: 399 | if len(ind) > 1: 400 | return operator.itemgetter(*ind)(seq) 401 | elif ind: 402 | return (seq[ind[0]],) 403 | else: 404 | return () 405 | else: 406 | return tuple(_get(i, seq, default) for i in ind) 407 | elif default is not no_default: 408 | return default 409 | else: 410 | raise 411 | except (KeyError, IndexError): # we know `ind` is not a list 412 | if default is no_default: 413 | raise 414 | else: 415 | return default 416 | 417 | 418 | def concat(seqs): 419 | """ Concatenate zero or more iterables, any of which may be infinite. 420 | 421 | An infinite sequence will prevent the rest of the arguments from 422 | being included. 423 | 424 | We use chain.from_iterable rather than chain(*seqs) so that seqs 425 | can be a generator. 426 | 427 | >>> list(concat([[], [1], [2, 3]])) 428 | [1, 2, 3] 429 | 430 | See also: 431 | itertools.chain.from_iterable equivalent 432 | """ 433 | return itertools.chain.from_iterable(seqs) 434 | 435 | 436 | def concatv(*seqs): 437 | """ Variadic version of concat 438 | 439 | >>> list(concatv([], ["a"], ["b", "c"])) 440 | ['a', 'b', 'c'] 441 | 442 | See also: 443 | itertools.chain 444 | """ 445 | return concat(seqs) 446 | 447 | 448 | def mapcat(func, seqs): 449 | """ Apply func to each sequence in seqs, concatenating results. 450 | 451 | >>> list(mapcat(lambda s: [c.upper() for c in s], 452 | ... [["a", "b"], ["c", "d", "e"]])) 453 | ['A', 'B', 'C', 'D', 'E'] 454 | """ 455 | return concat(map(func, seqs)) 456 | 457 | 458 | def cons(el, seq): 459 | """ Add el to beginning of (possibly infinite) sequence seq. 460 | 461 | >>> list(cons(1, [2, 3])) 462 | [1, 2, 3] 463 | """ 464 | yield el 465 | for s in seq: 466 | yield s 467 | 468 | 469 | def interpose(el, seq): 470 | """ Introduce element between each pair of elements in seq 471 | 472 | >>> list(interpose("a", [1, 2, 3])) 473 | [1, 'a', 2, 'a', 3] 474 | """ 475 | combined = zip(itertools.repeat(el), seq) 476 | return drop(1, concat(combined)) 477 | 478 | 479 | def frequencies(seq): 480 | """ Find number of occurrences of each value in seq 481 | 482 | >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP 483 | {'cat': 3, 'ox': 1, 'pig': 2} 484 | 485 | See Also: 486 | countby 487 | groupby 488 | """ 489 | d = collections.defaultdict(int) 490 | for item in seq: 491 | d[item] += 1 492 | return dict(d) 493 | 494 | 495 | def reduceby(key, binop, seq, init=no_default): 496 | """ Perform a simultaneous groupby and reduction 497 | 498 | The computation: 499 | 500 | >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP 501 | 502 | is equivalent to the following: 503 | 504 | >>> def reduction(group): # doctest: +SKIP 505 | ... return reduce(binop, group, init) # doctest: +SKIP 506 | 507 | >>> groups = groupby(key, seq) # doctest: +SKIP 508 | >>> result = valmap(reduction, groups) # doctest: +SKIP 509 | 510 | But the former does not build the intermediate groups, allowing it to 511 | operate in much less space. This makes it suitable for larger datasets 512 | that do not fit comfortably in memory 513 | 514 | The ``init`` keyword argument is the default initialization of the 515 | reduction. This can be either a constant value like ``0`` or a callable 516 | like ``lambda : 0`` as might be used in ``defaultdict``. 517 | 518 | Simple Examples 519 | --------------- 520 | 521 | >>> from operator import add, mul 522 | >>> iseven = lambda x: x % 2 == 0 523 | 524 | >>> data = [1, 2, 3, 4, 5] 525 | 526 | >>> reduceby(iseven, add, data) # doctest: +SKIP 527 | {False: 9, True: 6} 528 | 529 | >>> reduceby(iseven, mul, data) # doctest: +SKIP 530 | {False: 15, True: 8} 531 | 532 | Complex Example 533 | --------------- 534 | 535 | >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, 536 | ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, 537 | ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, 538 | ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] 539 | 540 | >>> reduceby('state', # doctest: +SKIP 541 | ... lambda acc, x: acc + x['cost'], 542 | ... projects, 0) 543 | {'CA': 1200000, 'IL': 2100000} 544 | 545 | Example Using ``init`` 546 | ---------------------- 547 | 548 | >>> def set_add(s, i): 549 | ... s.add(i) 550 | ... return s 551 | 552 | >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP 553 | {True: set([2, 4]), 554 | False: set([1, 3])} 555 | """ 556 | if init is not no_default and not callable(init): 557 | _init = init 558 | init = lambda: _init 559 | if not callable(key): 560 | key = getter(key) 561 | d = {} 562 | for item in seq: 563 | k = key(item) 564 | if k not in d: 565 | if init is no_default: 566 | d[k] = item 567 | continue 568 | else: 569 | d[k] = init() 570 | d[k] = binop(d[k], item) 571 | return d 572 | 573 | 574 | def iterate(func, x): 575 | """ Repeatedly apply a function func onto an original input 576 | 577 | Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. 578 | 579 | >>> def inc(x): return x + 1 580 | >>> counter = iterate(inc, 0) 581 | >>> next(counter) 582 | 0 583 | >>> next(counter) 584 | 1 585 | >>> next(counter) 586 | 2 587 | 588 | >>> double = lambda x: x * 2 589 | >>> powers_of_two = iterate(double, 1) 590 | >>> next(powers_of_two) 591 | 1 592 | >>> next(powers_of_two) 593 | 2 594 | >>> next(powers_of_two) 595 | 4 596 | >>> next(powers_of_two) 597 | 8 598 | 599 | """ 600 | while True: 601 | yield x 602 | x = func(x) 603 | 604 | 605 | def sliding_window(n, seq): 606 | """ A sequence of overlapping subsequences 607 | 608 | >>> list(sliding_window(2, [1, 2, 3, 4])) 609 | [(1, 2), (2, 3), (3, 4)] 610 | 611 | This function creates a sliding window suitable for transformations like 612 | sliding means / smoothing 613 | 614 | >>> mean = lambda seq: float(sum(seq)) / len(seq) 615 | >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) 616 | [1.5, 2.5, 3.5] 617 | """ 618 | it = iter(seq) 619 | # An efficient FIFO data structure with maximum length 620 | d = collections.deque(itertools.islice(it, n), n) 621 | if len(d) != n: 622 | raise StopIteration() 623 | d_append = d.append 624 | for item in it: 625 | yield tuple(d) 626 | d_append(item) 627 | yield tuple(d) 628 | 629 | 630 | no_pad = '__no__pad__' 631 | 632 | 633 | def partition(n, seq, pad=no_pad): 634 | """ Partition sequence into tuples of length n 635 | 636 | >>> list(partition(2, [1, 2, 3, 4])) 637 | [(1, 2), (3, 4)] 638 | 639 | If the length of ``seq`` is not evenly divisible by ``n``, the final tuple 640 | is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: 641 | 642 | >>> list(partition(2, [1, 2, 3, 4, 5])) 643 | [(1, 2), (3, 4)] 644 | 645 | >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) 646 | [(1, 2), (3, 4), (5, None)] 647 | 648 | See Also: 649 | partition_all 650 | """ 651 | args = [iter(seq)] * n 652 | if pad is no_pad: 653 | return zip(*args) 654 | else: 655 | return zip_longest(*args, fillvalue=pad) 656 | 657 | 658 | def partition_all(n, seq): 659 | """ Partition all elements of sequence into tuples of length at most n 660 | 661 | The final tuple may be shorter to accommodate extra elements. 662 | 663 | >>> list(partition_all(2, [1, 2, 3, 4])) 664 | [(1, 2), (3, 4)] 665 | 666 | >>> list(partition_all(2, [1, 2, 3, 4, 5])) 667 | [(1, 2), (3, 4), (5,)] 668 | 669 | See Also: 670 | partition 671 | """ 672 | args = [iter(seq)] * n 673 | it = zip_longest(*args, fillvalue=no_pad) 674 | prev = next(it) 675 | for item in it: 676 | yield prev 677 | prev = item 678 | if prev[-1] is no_pad: 679 | yield prev[:prev.index(no_pad)] 680 | else: 681 | yield prev 682 | 683 | 684 | def count(seq): 685 | """ Count the number of items in seq 686 | 687 | Like the builtin ``len`` but works on lazy sequencies. 688 | 689 | Not to be confused with ``itertools.count`` 690 | 691 | See also: 692 | len 693 | """ 694 | if hasattr(seq, '__len__'): 695 | return len(seq) 696 | return sum(1 for i in seq) 697 | 698 | 699 | def pluck(ind, seqs, default=no_default): 700 | """ plucks an element or several elements from each item in a sequence. 701 | 702 | ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more 703 | elements of each item in the sequence. 704 | 705 | This is equivalent to running `map(curried.get(ind), seqs)` 706 | 707 | ``ind`` can be either a single string/index or a sequence of 708 | strings/indices. 709 | ``seqs`` should be sequence containing sequences or dicts. 710 | 711 | e.g. 712 | 713 | >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] 714 | >>> list(pluck('name', data)) 715 | ['Cheese', 'Pies'] 716 | >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) 717 | [(1, 2), (4, 5)] 718 | 719 | See Also: 720 | get 721 | map 722 | """ 723 | if default is no_default: 724 | get = getter(ind) 725 | return map(get, seqs) 726 | elif isinstance(ind, list): 727 | return (tuple(_get(item, seq, default) for item in ind) 728 | for seq in seqs) 729 | return (_get(ind, seq, default) for seq in seqs) 730 | 731 | 732 | def getter(index): 733 | if isinstance(index, list): 734 | if len(index) == 1: 735 | index = index[0] 736 | return lambda x: (x[index],) 737 | elif index: 738 | return operator.itemgetter(*index) 739 | else: 740 | return lambda x: () 741 | else: 742 | return operator.itemgetter(index) 743 | 744 | 745 | def join(leftkey, leftseq, rightkey, rightseq, 746 | left_default=no_default, right_default=no_default): 747 | """ Join two sequences on common attributes 748 | 749 | This is a semi-streaming operation. The LEFT sequence is fully evaluated 750 | and placed into memory. The RIGHT sequence is evaluated lazily and so can 751 | be arbitrarily large. 752 | 753 | >>> friends = [('Alice', 'Edith'), 754 | ... ('Alice', 'Zhao'), 755 | ... ('Edith', 'Alice'), 756 | ... ('Zhao', 'Alice'), 757 | ... ('Zhao', 'Edith')] 758 | 759 | >>> cities = [('Alice', 'NYC'), 760 | ... ('Alice', 'Chicago'), 761 | ... ('Dan', 'Syndey'), 762 | ... ('Edith', 'Paris'), 763 | ... ('Edith', 'Berlin'), 764 | ... ('Zhao', 'Shanghai')] 765 | 766 | >>> # Vacation opportunities 767 | >>> # In what cities do people have friends? 768 | >>> result = join(second, friends, 769 | ... first, cities) 770 | >>> for ((a, b), (c, d)) in sorted(unique(result)): 771 | ... print((a, d)) 772 | ('Alice', 'Berlin') 773 | ('Alice', 'Paris') 774 | ('Alice', 'Shanghai') 775 | ('Edith', 'Chicago') 776 | ('Edith', 'NYC') 777 | ('Zhao', 'Chicago') 778 | ('Zhao', 'NYC') 779 | ('Zhao', 'Berlin') 780 | ('Zhao', 'Paris') 781 | 782 | Specify outer joins with keyword arguments ``left_default`` and/or 783 | ``right_default``. Here is a full outer join in which unmatched elements 784 | are paired with None. 785 | 786 | >>> identity = lambda x: x 787 | >>> list(join(identity, [1, 2, 3], 788 | ... identity, [2, 3, 4], 789 | ... left_default=None, right_default=None)) 790 | [(2, 2), (3, 3), (None, 4), (1, None)] 791 | 792 | Usually the key arguments are callables to be applied to the sequences. If 793 | the keys are not obviously callable then it is assumed that indexing was 794 | intended, e.g. the following is a legal change 795 | 796 | >>> # result = join(second, friends, first, cities) 797 | >>> result = join(1, friends, 0, cities) # doctest: +SKIP 798 | """ 799 | if not callable(leftkey): 800 | leftkey = getter(leftkey) 801 | if not callable(rightkey): 802 | rightkey = getter(rightkey) 803 | 804 | d = groupby(leftkey, leftseq) 805 | seen_keys = set() 806 | 807 | for item in rightseq: 808 | key = rightkey(item) 809 | seen_keys.add(key) 810 | try: 811 | left_matches = d[key] 812 | for match in left_matches: 813 | yield (match, item) 814 | except KeyError: 815 | if left_default is not no_default: 816 | yield (left_default, item) 817 | 818 | if right_default is not no_default: 819 | for key, matches in d.items(): 820 | if key not in seen_keys: 821 | for match in matches: 822 | yield (match, right_default) 823 | 824 | 825 | def diff(*seqs, **kwargs): 826 | """ Return those items that differ between sequences 827 | 828 | >>> list(diff([1, 2, 3], [1, 2, 10, 100])) 829 | [(3, 10)] 830 | 831 | Shorter sequences may be padded with a ``default`` value: 832 | 833 | >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) 834 | [(3, 10), (None, 100)] 835 | 836 | A ``key`` function may also be applied to each item to use during 837 | comparisons: 838 | 839 | >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) 840 | [('bananas', 'Oranges')] 841 | """ 842 | N = len(seqs) 843 | if N == 1 and isinstance(seqs[0], list): 844 | seqs = seqs[0] 845 | N = len(seqs) 846 | if N < 2: 847 | raise TypeError('Too few sequences given (min 2 required)') 848 | default = kwargs.get('default', no_default) 849 | if default is no_default: 850 | iters = zip(*seqs) 851 | else: 852 | iters = zip_longest(*seqs, fillvalue=default) 853 | key = kwargs.get('key', None) 854 | if key is None: 855 | for items in iters: 856 | if items.count(items[0]) != N: 857 | yield items 858 | else: 859 | for items in iters: 860 | vals = tuple(map(key, items)) 861 | if vals.count(vals[0]) != N: 862 | yield items 863 | 864 | 865 | def topk(k, seq, key=None): 866 | """ 867 | Find the k largest elements of a sequence 868 | 869 | Operates lazily in ``n*log(k)`` time 870 | 871 | >>> topk(2, [1, 100, 10, 1000]) 872 | (1000, 100) 873 | 874 | Use a key function to change sorted order 875 | 876 | >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) 877 | ('Charlie', 'Alice') 878 | 879 | See also: 880 | heapq.nlargest 881 | """ 882 | if key is not None and not callable(key): 883 | key = getter(key) 884 | return tuple(heapq.nlargest(k, seq, key=key)) 885 | 886 | 887 | def peek(seq): 888 | """ Retrieve the next element of a sequence 889 | 890 | Returns the first element and an iterable equivalent to the original 891 | sequence, still having the element retrieved. 892 | 893 | >>> seq = [0, 1, 2, 3, 4] 894 | >>> first, seq = peek(seq) 895 | >>> first 896 | 0 897 | >>> list(seq) 898 | [0, 1, 2, 3, 4] 899 | 900 | """ 901 | iterator = iter(seq) 902 | item = next(iterator) 903 | return item, itertools.chain([item], iterator) 904 | -------------------------------------------------------------------------------- /toolz/recipes.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from .itertoolz import frequencies, pluck, getter 3 | from .compatibility import map 4 | 5 | 6 | __all__ = ('countby', 'partitionby') 7 | 8 | 9 | def countby(key, seq): 10 | """ Count elements of a collection by a key function 11 | 12 | >>> countby(len, ['cat', 'mouse', 'dog']) 13 | {3: 2, 5: 1} 14 | 15 | >>> def iseven(x): return x % 2 == 0 16 | >>> countby(iseven, [1, 2, 3]) # doctest:+SKIP 17 | {True: 1, False: 2} 18 | 19 | See Also: 20 | groupby 21 | """ 22 | if not callable(key): 23 | key = getter(key) 24 | return frequencies(map(key, seq)) 25 | 26 | 27 | def partitionby(func, seq): 28 | """ Partition a sequence according to a function 29 | 30 | Partition `s` into a sequence of lists such that, when traversing 31 | `s`, every time the output of `func` changes a new list is started 32 | and that and subsequent items are collected into that list. 33 | 34 | >>> is_space = lambda c: c == " " 35 | >>> list(partitionby(is_space, "I have space")) 36 | [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')] 37 | 38 | >>> is_large = lambda x: x > 10 39 | >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5])) 40 | [(1, 2, 1), (99, 88, 33, 99), (-1, 5)] 41 | 42 | See also: 43 | partition 44 | groupby 45 | itertools.groupby 46 | """ 47 | return map(tuple, pluck(1, itertools.groupby(seq, key=func))) 48 | -------------------------------------------------------------------------------- /toolz/sandbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import EqualityHashKey, unzip 2 | from .parallel import fold 3 | -------------------------------------------------------------------------------- /toolz/sandbox/core.py: -------------------------------------------------------------------------------- 1 | from toolz.itertoolz import getter, cons, pluck 2 | from itertools import tee, starmap 3 | 4 | 5 | # See #166: https://github.com/pytoolz/toolz/issues/166 6 | # See #173: https://github.com/pytoolz/toolz/pull/173 7 | class EqualityHashKey(object): 8 | """ Create a hash key that uses equality comparisons between items. 9 | 10 | This may be used to create hash keys for otherwise unhashable types: 11 | 12 | >>> from toolz import curry 13 | >>> EqualityHashDefault = curry(EqualityHashKey, None) 14 | >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP 15 | {=[]=, =()=, =[1]=} 16 | 17 | **Caution:** adding N ``EqualityHashKey`` items to a hash container 18 | may require O(N**2) operations, not O(N) as for typical hashable types. 19 | Therefore, a suitable key function such as ``tuple`` or ``frozenset`` 20 | is usually preferred over using ``EqualityHashKey`` if possible. 21 | 22 | The ``key`` argument to ``EqualityHashKey`` should be a function or 23 | index that returns a hashable object that effectively distinguishes 24 | unequal items. This helps avoid the poor scaling that occurs when 25 | using the default key. For example, the above example can be improved 26 | by using a key function that distinguishes items by length or type: 27 | 28 | >>> EqualityHashLen = curry(EqualityHashKey, len) 29 | >>> EqualityHashType = curry(EqualityHashKey, type) # this works too 30 | >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP 31 | {=[]=, =()=, =[1]=} 32 | 33 | ``EqualityHashKey`` is convenient to use when a suitable key function 34 | is complicated or unavailable. For example, the following returns all 35 | unique values based on equality: 36 | 37 | >>> from toolz import unique 38 | >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}] 39 | >>> list(unique(vals, key=EqualityHashDefault)) 40 | [[], (), [1], [2], {}] 41 | 42 | **Warning:** don't change the equality value of an item already in a hash 43 | containter. Unhashable types are unhashable for a reason. For example: 44 | 45 | >>> L1 = [1] ; L2 = [2] 46 | >>> s = set(map(EqualityHashDefault, [L1, L2])) 47 | >>> s # doctest: +SKIP 48 | {=[1]=, =[2]=} 49 | 50 | >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items! 51 | >>> s # doctest: +SKIP 52 | {=[2]=, =[2]=} 53 | 54 | Although this may appear problematic, immutable data types is a common 55 | idiom in functional programming, and``EqualityHashKey`` easily allows 56 | the same idiom to be used by convention rather than strict requirement. 57 | 58 | See Also: 59 | identity 60 | """ 61 | __slots__ = ['item', 'key'] 62 | _default_hashkey = '__default__hashkey__' 63 | 64 | def __init__(self, key, item): 65 | if key is None: 66 | self.key = self._default_hashkey 67 | elif not callable(key): 68 | self.key = getter(key) 69 | else: 70 | self.key = key 71 | self.item = item 72 | 73 | def __hash__(self): 74 | if self.key == self._default_hashkey: 75 | val = self.key 76 | else: 77 | val = self.key(self.item) 78 | return hash(val) 79 | 80 | def __eq__(self, other): 81 | try: 82 | return (self._default_hashkey == other._default_hashkey and 83 | self.item == other.item) 84 | except AttributeError: 85 | return False 86 | 87 | def __ne__(self, other): 88 | return not self.__eq__(other) 89 | 90 | def __str__(self): 91 | return '=%s=' % str(self.item) 92 | 93 | def __repr__(self): 94 | return '=%s=' % repr(self.item) 95 | 96 | 97 | # See issue #293: https://github.com/pytoolz/toolz/issues/239 98 | def unzip(seq): 99 | """Inverse of ``zip`` 100 | 101 | >>> a, b = unzip([('a', 1), ('b', 2)]) 102 | >>> list(a) 103 | ['a', 'b'] 104 | >>> list(b) 105 | [1, 2] 106 | 107 | Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this 108 | implementation can handle a finite sequence of infinite sequences. 109 | 110 | Caveats: 111 | 112 | * The implementation uses ``tee``, and so can use a significant amount 113 | of auxiliary storage if the resulting iterators are consumed at 114 | different times. 115 | 116 | * The top level sequence cannot be infinite. 117 | 118 | """ 119 | 120 | seq = iter(seq) 121 | 122 | # Check how many iterators we need 123 | try: 124 | first = tuple(next(seq)) 125 | except StopIteration: 126 | return tuple() 127 | 128 | # and create them 129 | niters = len(first) 130 | seqs = tee(cons(first, seq), niters) 131 | 132 | return tuple(starmap(pluck, enumerate(seqs))) 133 | -------------------------------------------------------------------------------- /toolz/sandbox/parallel.py: -------------------------------------------------------------------------------- 1 | from toolz.itertoolz import partition_all 2 | from toolz.compatibility import reduce, map 3 | from toolz.utils import no_default 4 | 5 | 6 | def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None): 7 | """ 8 | Reduce without guarantee of ordered reduction. 9 | 10 | inputs: 11 | 12 | ``binop`` - associative operator. The associative property allows us to 13 | leverage a parallel map to perform reductions in parallel. 14 | ``seq`` - a sequence to be aggregated 15 | ``default`` - an identity element like 0 for ``add`` or 1 for mul 16 | 17 | ``map`` - an implementation of ``map``. This may be parallel and 18 | determines how work is distributed. 19 | ``chunksize`` - Number of elements of ``seq`` that should be handled 20 | within a single function call 21 | ``combine`` - Binary operator to combine two intermediate results. 22 | If ``binop`` is of type (total, item) -> total 23 | then ``combine`` is of type (total, total) -> total 24 | Defaults to ``binop`` for common case of operators like add 25 | 26 | Fold chunks up the collection into blocks of size ``chunksize`` and then 27 | feeds each of these to calls to ``reduce``. This work is distributed 28 | with a call to ``map``, gathered back and then refolded to finish the 29 | computation. In this way ``fold`` specifies only how to chunk up data but 30 | leaves the distribution of this work to an externally provided ``map`` 31 | function. This function can be sequential or rely on multithreading, 32 | multiprocessing, or even distributed solutions. 33 | 34 | If ``map`` intends to serialize functions it should be prepared to accept 35 | and serialize lambdas. Note that the standard ``pickle`` module fails 36 | here. 37 | 38 | Example 39 | ------- 40 | 41 | >>> # Provide a parallel map to accomplish a parallel sum 42 | >>> from operator import add 43 | >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map) 44 | 10 45 | """ 46 | if combine is None: 47 | combine = binop 48 | 49 | chunks = partition_all(chunksize, seq) 50 | 51 | # Evaluate sequence in chunks via map 52 | if default is no_default: 53 | results = map(lambda chunk: reduce(binop, chunk), chunks) 54 | else: 55 | results = map(lambda chunk: reduce(binop, chunk, default), chunks) 56 | 57 | results = list(results) # TODO: Support complete laziness 58 | 59 | if len(results) == 1: # Return completed result 60 | return results[0] 61 | else: # Recurse to reaggregate intermediate results 62 | return fold(combine, results, map=map, chunksize=chunksize) 63 | -------------------------------------------------------------------------------- /toolz/sandbox/tests/test_core.py: -------------------------------------------------------------------------------- 1 | from toolz import curry, unique, first, take 2 | from toolz.sandbox.core import EqualityHashKey, unzip 3 | from itertools import count, repeat 4 | from toolz.compatibility import map, zip 5 | 6 | 7 | def test_EqualityHashKey_default_key(): 8 | EqualityHashDefault = curry(EqualityHashKey, None) 9 | L1 = [1] 10 | L2 = [2] 11 | data1 = [L1, L1, L2, [], [], [1], [2], {}, ()] 12 | set1 = set(map(EqualityHashDefault, data1)) 13 | set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()])) 14 | assert set1 == set2 15 | assert len(set1) == 5 16 | 17 | # Test that ``EqualityHashDefault(item)`` is distinct from ``item`` 18 | T0 = () 19 | T1 = (1,) 20 | data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)])) 21 | data2.extend([T0, T1, (), (1,)]) 22 | set3 = set(data2) 23 | assert set3 == set([(), (1,), EqualityHashDefault(()), 24 | EqualityHashDefault((1,))]) 25 | assert len(set3) == 4 26 | assert EqualityHashDefault(()) in set3 27 | assert EqualityHashDefault((1,)) in set3 28 | 29 | # Miscellaneous 30 | E1 = EqualityHashDefault(L1) 31 | E2 = EqualityHashDefault(L2) 32 | assert str(E1) == '=[1]=' 33 | assert repr(E1) == '=[1]=' 34 | assert E1 != E2 35 | assert not (E1 == E2) 36 | assert E1 == EqualityHashDefault(L1) 37 | assert not (E1 != EqualityHashDefault(L1)) 38 | assert E1 != L1 39 | assert not (E1 == L1) 40 | 41 | 42 | def test_EqualityHashKey_callable_key(): 43 | # Common simple hash key functions. 44 | EqualityHashLen = curry(EqualityHashKey, len) 45 | EqualityHashType = curry(EqualityHashKey, type) 46 | EqualityHashId = curry(EqualityHashKey, id) 47 | EqualityHashFirst = curry(EqualityHashKey, first) 48 | data1 = [[], [1], (), (1,), {}, {1: 2}] 49 | data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}] 50 | assert list(unique(data1*3, key=EqualityHashLen)) == data1 51 | assert list(unique(data2*3, key=EqualityHashLen)) == data2 52 | assert list(unique(data1*3, key=EqualityHashType)) == data1 53 | assert list(unique(data2*3, key=EqualityHashType)) == data2 54 | assert list(unique(data1*3, key=EqualityHashId)) == data1 55 | assert list(unique(data2*3, key=EqualityHashId)) == data2 56 | assert list(unique(data2*3, key=EqualityHashFirst)) == data2 57 | 58 | 59 | def test_EqualityHashKey_index_key(): 60 | d1 = {'firstname': 'Alice', 'age': 21, 'data': {}} 61 | d2 = {'firstname': 'Alice', 'age': 34, 'data': {}} 62 | d3a = {'firstname': 'Bob', 'age': 56, 'data': {}} 63 | d3b = {'firstname': 'Bob', 'age': 56, 'data': {}} 64 | EqualityHashFirstname = curry(EqualityHashKey, 'firstname') 65 | assert list(unique(3*[d1, d2, d3a, d3b], 66 | key=EqualityHashFirstname)) == [d1, d2, d3a] 67 | EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age']) 68 | assert list(unique(3*[d1, d2, d3a, d3b], 69 | key=EqualityHashFirstnameAge)) == [d1, d2, d3a] 70 | list1 = [0] * 10 71 | list2 = [0] * 100 72 | list3a = [1] * 10 73 | list3b = [1] * 10 74 | EqualityHash0 = curry(EqualityHashKey, 0) 75 | assert list(unique(3*[list1, list2, list3a, list3b], 76 | key=EqualityHash0)) == [list1, list2, list3a] 77 | 78 | 79 | def test_unzip(): 80 | def _to_lists(seq, n=10): 81 | """iter of iters -> finite list of finite lists 82 | """ 83 | def initial(s): 84 | return list(take(n, s)) 85 | 86 | return initial(map(initial, seq)) 87 | 88 | def _assert_initial_matches(a, b, n=10): 89 | assert list(take(n, a)) == list(take(n, b)) 90 | 91 | # Unzips a simple list correctly 92 | assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \ 93 | == [['a', 'b', 'c'], [1, 2, 3]] 94 | 95 | # Can handle a finite number of infinite iterators (the naive unzip 96 | # implementation `zip(*args)` impelementation fails on this example). 97 | a, b, c = unzip(zip(count(1), repeat(0), repeat(1))) 98 | _assert_initial_matches(a, count(1)) 99 | _assert_initial_matches(b, repeat(0)) 100 | _assert_initial_matches(c, repeat(1)) 101 | 102 | # Sensibly handles empty input 103 | assert list(unzip(zip([]))) == [] 104 | -------------------------------------------------------------------------------- /toolz/sandbox/tests/test_parallel.py: -------------------------------------------------------------------------------- 1 | from toolz.sandbox.parallel import fold 2 | from toolz import reduce 3 | from operator import add 4 | 5 | 6 | def test_fold(): 7 | assert fold(add, range(10), 0) == reduce(add, range(10), 0) 8 | assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0) 9 | assert fold(add, range(10)) == fold(add, range(10), 0) 10 | 11 | def setadd(s, item): 12 | s = s.copy() 13 | s.add(item) 14 | return s 15 | 16 | assert fold(setadd, [1, 2, 3], set()) == set((1, 2, 3)) 17 | assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union) 18 | == set((1, 2, 3))) 19 | -------------------------------------------------------------------------------- /toolz/tests/test_compatibility.py: -------------------------------------------------------------------------------- 1 | from toolz.compatibility import map, filter, iteritems, iterkeys, itervalues 2 | 3 | 4 | def test_map_filter_are_lazy(): 5 | def bad(x): 6 | raise Exception() 7 | map(bad, [1, 2, 3]) 8 | filter(bad, [1, 2, 3]) 9 | 10 | 11 | def test_dict_iteration(): 12 | d = {'a': 1, 'b': 2, 'c': 3} 13 | assert not isinstance(iteritems(d), list) 14 | assert not isinstance(iterkeys(d), list) 15 | assert not isinstance(itervalues(d), list) 16 | assert set(iteritems(d)) == set(d.items()) 17 | assert set(iterkeys(d)) == set(d.keys()) 18 | assert set(itervalues(d)) == set(d.values()) 19 | -------------------------------------------------------------------------------- /toolz/tests/test_curried.py: -------------------------------------------------------------------------------- 1 | import toolz 2 | import toolz.curried 3 | from toolz.curried import (take, first, second, sorted, merge_with, reduce, 4 | merge, operator as cop) 5 | from collections import defaultdict 6 | from operator import add 7 | 8 | 9 | def test_take(): 10 | assert list(take(2)([1, 2, 3])) == [1, 2] 11 | 12 | 13 | def test_first(): 14 | assert first is toolz.itertoolz.first 15 | 16 | 17 | def test_merge(): 18 | assert merge(factory=lambda: defaultdict(int))({1: 1}) == {1: 1} 19 | assert merge({1: 1}) == {1: 1} 20 | assert merge({1: 1}, factory=lambda: defaultdict(int)) == {1: 1} 21 | 22 | 23 | def test_merge_with(): 24 | assert merge_with(sum)({1: 1}, {1: 2}) == {1: 3} 25 | 26 | 27 | def test_merge_with_list(): 28 | assert merge_with(sum, [{'a': 1}, {'a': 2}]) == {'a': 3} 29 | 30 | 31 | def test_sorted(): 32 | assert sorted(key=second)([(1, 2), (2, 1)]) == [(2, 1), (1, 2)] 33 | 34 | 35 | def test_reduce(): 36 | assert reduce(add)((1, 2, 3)) == 6 37 | 38 | 39 | def test_module_name(): 40 | assert toolz.curried.__name__ == 'toolz.curried' 41 | 42 | 43 | def test_curried_operator(): 44 | for k, v in vars(cop).items(): 45 | if not callable(v): 46 | continue 47 | 48 | if not isinstance(v, toolz.curry): 49 | try: 50 | # Make sure it is unary 51 | # We cannot use isunary because it might be defined in C. 52 | v(1) 53 | except TypeError: 54 | raise AssertionError( 55 | 'toolz.curried.operator.%s is not curried!' % k, 56 | ) 57 | 58 | # Make sure this isn't totally empty. 59 | assert len(set(vars(cop)) & set(['add', 'sub', 'mul'])) == 3 60 | -------------------------------------------------------------------------------- /toolz/tests/test_dicttoolz.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict as _defaultdict 2 | from toolz.dicttoolz import (merge, merge_with, valmap, keymap, update_in, 3 | assoc, dissoc, keyfilter, valfilter, itemmap, 4 | itemfilter) 5 | from toolz.utils import raises 6 | from toolz.compatibility import PY3 7 | 8 | 9 | def inc(x): 10 | return x + 1 11 | 12 | 13 | def iseven(i): 14 | return i % 2 == 0 15 | 16 | 17 | class TestDict(object): 18 | """Test typical usage: dict inputs, no factory keyword. 19 | 20 | Class attributes: 21 | D: callable that inputs a dict and creates or returns a MutableMapping 22 | kw: kwargs dict to specify "factory" keyword (if applicable) 23 | """ 24 | D = dict 25 | kw = {} 26 | 27 | def test_merge(self): 28 | D, kw = self.D, self.kw 29 | assert merge(D({1: 1, 2: 2}), D({3: 4}), **kw) == D({1: 1, 2: 2, 3: 4}) 30 | 31 | def test_merge_iterable_arg(self): 32 | D, kw = self.D, self.kw 33 | assert merge([D({1: 1, 2: 2}), D({3: 4})], **kw) == D({1: 1, 2: 2, 3: 4}) 34 | 35 | def test_merge_with(self): 36 | D, kw = self.D, self.kw 37 | dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) 38 | assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) 39 | assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20)}) 40 | 41 | dicts = D({1: 1, 2: 2, 3: 3}), D({1: 10, 2: 20}) 42 | assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22, 3: 3}) 43 | assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20), 3: (3,)}) 44 | 45 | assert not merge_with(sum) 46 | 47 | def test_merge_with_iterable_arg(self): 48 | D, kw = self.D, self.kw 49 | dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20}) 50 | assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22}) 51 | assert merge_with(sum, dicts, **kw) == D({1: 11, 2: 22}) 52 | assert merge_with(sum, iter(dicts), **kw) == D({1: 11, 2: 22}) 53 | 54 | def test_valmap(self): 55 | D, kw = self.D, self.kw 56 | assert valmap(inc, D({1: 1, 2: 2}), **kw) == D({1: 2, 2: 3}) 57 | 58 | def test_keymap(self): 59 | D, kw = self.D, self.kw 60 | assert keymap(inc, D({1: 1, 2: 2}), **kw) == D({2: 1, 3: 2}) 61 | 62 | def test_itemmap(self): 63 | D, kw = self.D, self.kw 64 | assert itemmap(reversed, D({1: 2, 2: 4}), **kw) == D({2: 1, 4: 2}) 65 | 66 | def test_valfilter(self): 67 | D, kw = self.D, self.kw 68 | assert valfilter(iseven, D({1: 2, 2: 3}), **kw) == D({1: 2}) 69 | 70 | def test_keyfilter(self): 71 | D, kw = self.D, self.kw 72 | assert keyfilter(iseven, D({1: 2, 2: 3}), **kw) == D({2: 3}) 73 | 74 | def test_itemfilter(self): 75 | D, kw = self.D, self.kw 76 | assert itemfilter(lambda item: iseven(item[0]), D({1: 2, 2: 3}), **kw) == D({2: 3}) 77 | assert itemfilter(lambda item: iseven(item[1]), D({1: 2, 2: 3}), **kw) == D({1: 2}) 78 | 79 | def test_assoc(self): 80 | D, kw = self.D, self.kw 81 | assert assoc(D({}), "a", 1, **kw) == D({"a": 1}) 82 | assert assoc(D({"a": 1}), "a", 3, **kw) == D({"a": 3}) 83 | assert assoc(D({"a": 1}), "b", 3, **kw) == D({"a": 1, "b": 3}) 84 | 85 | # Verify immutability: 86 | d = D({'x': 1}) 87 | oldd = d 88 | assoc(d, 'x', 2, **kw) 89 | assert d is oldd 90 | 91 | def test_dissoc(self): 92 | D, kw = self.D, self.kw 93 | assert dissoc(D({"a": 1}), "a") == D({}) 94 | assert dissoc(D({"a": 1, "b": 2}), "a") == D({"b": 2}) 95 | assert dissoc(D({"a": 1, "b": 2}), "b") == D({"a": 1}) 96 | assert dissoc(D({"a": 1, "b": 2}), "a", "b") == D({}) 97 | assert dissoc(D({"a": 1}), "a") == dissoc(dissoc(D({"a": 1}), "a"), "a") 98 | 99 | # Verify immutability: 100 | d = D({'x': 1}) 101 | oldd = d 102 | d2 = dissoc(d, 'x') 103 | assert d is oldd 104 | assert d2 is not oldd 105 | 106 | def test_update_in(self): 107 | D, kw = self.D, self.kw 108 | assert update_in(D({"a": 0}), ["a"], inc, **kw) == D({"a": 1}) 109 | assert update_in(D({"a": 0, "b": 1}), ["b"], str, **kw) == D({"a": 0, "b": "1"}) 110 | assert (update_in(D({"t": 1, "v": D({"a": 0})}), ["v", "a"], inc, **kw) == 111 | D({"t": 1, "v": D({"a": 1})})) 112 | # Handle one missing key. 113 | assert update_in(D({}), ["z"], str, None, **kw) == D({"z": "None"}) 114 | assert update_in(D({}), ["z"], inc, 0, **kw) == D({"z": 1}) 115 | assert update_in(D({}), ["z"], lambda x: x+"ar", default="b", **kw) == D({"z": "bar"}) 116 | # Same semantics as Clojure for multiple missing keys, ie. recursively 117 | # create nested empty dictionaries to the depth specified by the 118 | # keys with the innermost value set to f(default). 119 | assert update_in(D({}), [0, 1], inc, default=-1, **kw) == D({0: D({1: 0})}) 120 | assert update_in(D({}), [0, 1], str, default=100, **kw) == D({0: D({1: "100"})}) 121 | assert (update_in(D({"foo": "bar", 1: 50}), ["d", 1, 0], str, 20, **kw) == 122 | D({"foo": "bar", 1: 50, "d": D({1: D({0: "20"})})})) 123 | # Verify immutability: 124 | d = D({'x': 1}) 125 | oldd = d 126 | update_in(d, ['x'], inc, **kw) 127 | assert d is oldd 128 | 129 | def test_factory(self): 130 | D, kw = self.D, self.kw 131 | assert merge(defaultdict(int, D({1: 2})), D({2: 3})) == {1: 2, 2: 3} 132 | assert (merge(defaultdict(int, D({1: 2})), D({2: 3}), 133 | factory=lambda: defaultdict(int)) == 134 | defaultdict(int, D({1: 2, 2: 3}))) 135 | assert not (merge(defaultdict(int, D({1: 2})), D({2: 3}), 136 | factory=lambda: defaultdict(int)) == {1: 2, 2: 3}) 137 | assert raises(TypeError, lambda: merge(D({1: 2}), D({2: 3}), factoryy=dict)) 138 | 139 | 140 | class defaultdict(_defaultdict): 141 | def __eq__(self, other): 142 | return (super(defaultdict, self).__eq__(other) and 143 | isinstance(other, _defaultdict) and 144 | self.default_factory == other.default_factory) 145 | 146 | 147 | class TestDefaultDict(TestDict): 148 | """Test defaultdict as input and factory 149 | 150 | Class attributes: 151 | D: callable that inputs a dict and creates or returns a MutableMapping 152 | kw: kwargs dict to specify "factory" keyword (if applicable) 153 | """ 154 | @staticmethod 155 | def D(dict_): 156 | return defaultdict(int, dict_) 157 | 158 | kw = {'factory': lambda: defaultdict(int)} 159 | 160 | 161 | class CustomMapping(object): 162 | """Define methods of the MutableMapping protocol required by dicttoolz""" 163 | def __init__(self, *args, **kwargs): 164 | self._d = dict(*args, **kwargs) 165 | 166 | def __getitem__(self, key): 167 | return self._d[key] 168 | 169 | def __setitem__(self, key, val): 170 | self._d[key] = val 171 | 172 | def __delitem__(self, key): 173 | del self._d[key] 174 | 175 | def __iter__(self): 176 | return iter(self._d) 177 | 178 | def __len__(self): 179 | return len(self._d) 180 | 181 | def __contains__(self, key): 182 | return key in self._d 183 | 184 | def __eq__(self, other): 185 | return isinstance(other, CustomMapping) and self._d == other._d 186 | 187 | def __ne__(self, other): 188 | return not isinstance(other, CustomMapping) or self._d != other._d 189 | 190 | def keys(self): 191 | return self._d.keys() 192 | 193 | def values(self): 194 | return self._d.values() 195 | 196 | def items(self): 197 | return self._d.items() 198 | 199 | def update(self, *args, **kwargs): 200 | self._d.update(*args, **kwargs) 201 | 202 | # Should we require these to be defined for Python 2? 203 | if not PY3: 204 | def iterkeys(self): 205 | return self._d.iterkeys() 206 | 207 | def itervalues(self): 208 | return self._d.itervalues() 209 | 210 | def iteritems(self): 211 | return self._d.iteritems() 212 | 213 | # Unused methods that are part of the MutableMapping protocol 214 | #def get(self, key, *args): 215 | # return self._d.get(key, *args) 216 | 217 | #def pop(self, key, *args): 218 | # return self._d.pop(key, *args) 219 | 220 | #def popitem(self, key): 221 | # return self._d.popitem() 222 | 223 | #def clear(self): 224 | # self._d.clear() 225 | 226 | #def setdefault(self, key, *args): 227 | # return self._d.setdefault(self, key, *args) 228 | 229 | 230 | class TestCustomMapping(TestDict): 231 | """Test CustomMapping as input and factory 232 | 233 | Class attributes: 234 | D: callable that inputs a dict and creates or returns a MutableMapping 235 | kw: kwargs dict to specify "factory" keyword (if applicable) 236 | """ 237 | D = CustomMapping 238 | kw = {'factory': lambda: CustomMapping()} 239 | 240 | -------------------------------------------------------------------------------- /toolz/tests/test_functoolz.py: -------------------------------------------------------------------------------- 1 | import platform 2 | 3 | 4 | from toolz.functoolz import (thread_first, thread_last, memoize, curry, 5 | compose, pipe, complement, do, juxt, flip) 6 | from toolz.functoolz import _num_required_args 7 | from operator import add, mul, itemgetter 8 | from toolz.utils import raises 9 | from functools import partial 10 | 11 | 12 | def iseven(x): 13 | return x % 2 == 0 14 | 15 | 16 | def isodd(x): 17 | return x % 2 == 1 18 | 19 | 20 | def inc(x): 21 | return x + 1 22 | 23 | 24 | def double(x): 25 | return 2 * x 26 | 27 | 28 | def test_thread_first(): 29 | assert thread_first(2) == 2 30 | assert thread_first(2, inc) == 3 31 | assert thread_first(2, inc, inc) == 4 32 | assert thread_first(2, double, inc) == 5 33 | assert thread_first(2, (add, 5), double) == 14 34 | 35 | 36 | def test_thread_last(): 37 | assert list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) == [2, 4] 38 | assert list(thread_last([1, 2, 3], (map, inc), (filter, isodd))) == [3] 39 | assert thread_last(2, (add, 5), double) == 14 40 | 41 | 42 | def test_memoize(): 43 | fn_calls = [0] # Storage for side effects 44 | 45 | def f(x, y): 46 | """ A docstring """ 47 | fn_calls[0] += 1 48 | return x + y 49 | mf = memoize(f) 50 | 51 | assert mf(2, 3) == mf(2, 3) 52 | assert fn_calls == [1] # function was only called once 53 | assert mf.__doc__ == f.__doc__ 54 | assert raises(TypeError, lambda: mf(1, {})) 55 | 56 | 57 | def test_memoize_kwargs(): 58 | fn_calls = [0] # Storage for side effects 59 | 60 | def f(x, y=0): 61 | return x + y 62 | 63 | mf = memoize(f) 64 | 65 | assert mf(1) == f(1) 66 | assert mf(1, 2) == f(1, 2) 67 | assert mf(1, y=2) == f(1, y=2) 68 | assert mf(1, y=3) == f(1, y=3) 69 | 70 | 71 | def test_memoize_curried(): 72 | @curry 73 | def f(x, y=0): 74 | return x + y 75 | 76 | f2 = f(y=1) 77 | fm2 = memoize(f2) 78 | 79 | assert fm2(3) == f2(3) 80 | assert fm2(3) == f2(3) 81 | 82 | 83 | def test_memoize_partial(): 84 | def f(x, y=0): 85 | return x + y 86 | 87 | f2 = partial(f, y=1) 88 | fm2 = memoize(f2) 89 | 90 | assert fm2(3) == f2(3) 91 | assert fm2(3) == f2(3) 92 | 93 | 94 | def test_memoize_key_signature(): 95 | # Single argument should not be tupled as a key. No keywords. 96 | mf = memoize(lambda x: False, cache={1: True}) 97 | assert mf(1) is True 98 | assert mf(2) is False 99 | 100 | # Single argument must be tupled if signature has varargs. No keywords. 101 | mf = memoize(lambda x, *args: False, cache={(1,): True, (1, 2): 2}) 102 | assert mf(1) is True 103 | assert mf(2) is False 104 | assert mf(1, 1) is False 105 | assert mf(1, 2) == 2 106 | assert mf((1, 2)) is False 107 | 108 | # More than one argument is always tupled. No keywords. 109 | mf = memoize(lambda x, y: False, cache={(1, 2): True}) 110 | assert mf(1, 2) is True 111 | assert mf(1, 3) is False 112 | assert raises(TypeError, lambda: mf((1, 2))) 113 | 114 | # Nullary function (no inputs) uses empty tuple as the key 115 | mf = memoize(lambda: False, cache={(): True}) 116 | assert mf() is True 117 | 118 | # Single argument must be tupled if there are keyword arguments, because 119 | # keyword arguments may be passed as unnamed args. 120 | mf = memoize(lambda x, y=0: False, 121 | cache={((1,), frozenset((('y', 2),))): 2, 122 | ((1, 2), None): 3}) 123 | assert mf(1, y=2) == 2 124 | assert mf(1, 2) == 3 125 | assert mf(2, y=2) is False 126 | assert mf(2, 2) is False 127 | assert mf(1) is False 128 | assert mf((1, 2)) is False 129 | 130 | # Keyword-only signatures must still have an "args" tuple. 131 | mf = memoize(lambda x=0: False, cache={(None, frozenset((('x', 1),))): 1, 132 | ((1,), None): 2}) 133 | assert mf() is False 134 | assert mf(x=1) == 1 135 | assert mf(1) == 2 136 | 137 | 138 | def test_memoize_curry_cache(): 139 | @memoize(cache={1: True}) 140 | def f(x): 141 | return False 142 | 143 | assert f(1) is True 144 | assert f(2) is False 145 | 146 | 147 | def test_memoize_key(): 148 | @memoize(key=lambda args, kwargs: args[0]) 149 | def f(x, y, *args, **kwargs): 150 | return x + y 151 | 152 | assert f(1, 2) == 3 153 | assert f(1, 3) == 3 154 | 155 | 156 | def test_curry_simple(): 157 | cmul = curry(mul) 158 | double = cmul(2) 159 | assert callable(double) 160 | assert double(10) == 20 161 | assert repr(cmul) == repr(mul) 162 | 163 | cmap = curry(map) 164 | assert list(cmap(inc)([1, 2, 3])) == [2, 3, 4] 165 | 166 | assert raises(TypeError, lambda: curry()) 167 | assert raises(TypeError, lambda: curry({1: 2})) 168 | 169 | 170 | def test_curry_kwargs(): 171 | def f(a, b, c=10): 172 | return (a + b) * c 173 | 174 | f = curry(f) 175 | assert f(1, 2, 3) == 9 176 | assert f(1)(2, 3) == 9 177 | assert f(1, 2) == 30 178 | assert f(1, c=3)(2) == 9 179 | assert f(c=3)(1, 2) == 9 180 | 181 | def g(a=1, b=10, c=0): 182 | return a + b + c 183 | 184 | cg = curry(g, b=2) 185 | assert cg() == 3 186 | assert cg(b=3) == 4 187 | assert cg(a=0) == 2 188 | assert cg(a=0, b=1) == 1 189 | assert cg(0) == 2 # pass "a" as arg, not kwarg 190 | assert raises(TypeError, lambda: cg(1, 2)) # pass "b" as arg AND kwarg 191 | 192 | def h(x, func=int): 193 | return func(x) 194 | 195 | if platform.python_implementation() != 'PyPy'\ 196 | or platform.python_version_tuple()[0] != '3': # Bug on PyPy3<2.5 197 | # __init__ must not pick func as positional arg 198 | assert curry(h)(0.0) == 0 199 | assert curry(h)(func=str)(0.0) == '0.0' 200 | assert curry(h, func=str)(0.0) == '0.0' 201 | 202 | 203 | def test_curry_passes_errors(): 204 | @curry 205 | def f(a, b): 206 | if not isinstance(a, int): 207 | raise TypeError() 208 | return a + b 209 | 210 | assert f(1, 2) == 3 211 | assert raises(TypeError, lambda: f('1', 2)) 212 | assert raises(TypeError, lambda: f('1')(2)) 213 | assert raises(TypeError, lambda: f(1, 2, 3)) 214 | 215 | 216 | def test_curry_docstring(): 217 | def f(x, y): 218 | """ A docstring """ 219 | return x 220 | 221 | g = curry(f) 222 | assert g.__doc__ == f.__doc__ 223 | assert str(g) == str(f) 224 | assert f(1, 2) == g(1, 2) 225 | 226 | 227 | def test_curry_is_like_partial(): 228 | def foo(a, b, c=1): 229 | return a + b + c 230 | 231 | p, c = partial(foo, 1, c=2), curry(foo)(1, c=2) 232 | assert p.keywords == c.keywords 233 | assert p.args == c.args 234 | assert p(3) == c(3) 235 | 236 | p, c = partial(foo, 1), curry(foo)(1) 237 | assert p.keywords == c.keywords 238 | assert p.args == c.args 239 | assert p(3) == c(3) 240 | assert p(3, c=2) == c(3, c=2) 241 | 242 | p, c = partial(foo, c=1), curry(foo)(c=1) 243 | assert p.keywords == c.keywords 244 | assert p.args == c.args 245 | assert p(1, 2) == c(1, 2) 246 | 247 | 248 | def test_curry_is_idempotent(): 249 | def foo(a, b, c=1): 250 | return a + b + c 251 | 252 | f = curry(foo, 1, c=2) 253 | g = curry(f) 254 | assert isinstance(f, curry) 255 | assert isinstance(g, curry) 256 | assert not isinstance(g.func, curry) 257 | assert not hasattr(g.func, 'func') 258 | assert f.func == g.func 259 | assert f.args == g.args 260 | assert f.keywords == g.keywords 261 | 262 | 263 | def test_curry_attributes_readonly(): 264 | def foo(a, b, c=1): 265 | return a + b + c 266 | 267 | f = curry(foo, 1, c=2) 268 | assert raises(AttributeError, lambda: setattr(f, 'args', (2,))) 269 | assert raises(AttributeError, lambda: setattr(f, 'keywords', {'c': 3})) 270 | assert raises(AttributeError, lambda: setattr(f, 'func', f)) 271 | 272 | 273 | def test_curry_attributes_writable(): 274 | def foo(a, b, c=1): 275 | return a + b + c 276 | 277 | f = curry(foo, 1, c=2) 278 | f.__name__ = 'newname' 279 | f.__doc__ = 'newdoc' 280 | assert f.__name__ == 'newname' 281 | assert f.__doc__ == 'newdoc' 282 | if hasattr(f, 'func_name'): 283 | assert f.__name__ == f.func_name 284 | 285 | 286 | def test_curry_comparable(): 287 | def foo(a, b, c=1): 288 | return a + b + c 289 | f1 = curry(foo, 1, c=2) 290 | f2 = curry(foo, 1, c=2) 291 | g1 = curry(foo, 1, c=3) 292 | h1 = curry(foo, c=2) 293 | h2 = h1(c=2) 294 | h3 = h1() 295 | assert f1 == f2 296 | assert not (f1 != f2) 297 | assert f1 != g1 298 | assert not (f1 == g1) 299 | assert f1 != h1 300 | assert h1 == h2 301 | assert h1 == h3 302 | 303 | # test function comparison works 304 | def bar(a, b, c=1): 305 | return a + b + c 306 | b1 = curry(bar, 1, c=2) 307 | assert b1 != f1 308 | 309 | assert set([f1, f2, g1, h1, h2, h3, b1, b1()]) == set([f1, g1, h1, b1]) 310 | 311 | # test unhashable input 312 | unhash1 = curry(foo, []) 313 | assert raises(TypeError, lambda: hash(unhash1)) 314 | unhash2 = curry(foo, c=[]) 315 | assert raises(TypeError, lambda: hash(unhash2)) 316 | 317 | 318 | def test_curry_doesnot_transmogrify(): 319 | # Early versions of `curry` transmogrified to `partial` objects if 320 | # only one positional argument remained even if keyword arguments 321 | # were present. Now, `curry` should always remain `curry`. 322 | def f(x, y=0): 323 | return x + y 324 | 325 | cf = curry(f) 326 | assert cf(y=1)(y=2)(y=3)(1) == f(1, 3) 327 | 328 | 329 | def test_curry_on_classmethods(): 330 | class A(object): 331 | BASE = 10 332 | 333 | def __init__(self, base): 334 | self.BASE = base 335 | 336 | @curry 337 | def addmethod(self, x, y): 338 | return self.BASE + x + y 339 | 340 | @classmethod 341 | @curry 342 | def addclass(cls, x, y): 343 | return cls.BASE + x + y 344 | 345 | @staticmethod 346 | @curry 347 | def addstatic(x, y): 348 | return x + y 349 | 350 | a = A(100) 351 | assert a.addmethod(3, 4) == 107 352 | assert a.addmethod(3)(4) == 107 353 | assert A.addmethod(a, 3, 4) == 107 354 | assert A.addmethod(a)(3)(4) == 107 355 | 356 | assert a.addclass(3, 4) == 17 357 | assert a.addclass(3)(4) == 17 358 | assert A.addclass(3, 4) == 17 359 | assert A.addclass(3)(4) == 17 360 | 361 | assert a.addstatic(3, 4) == 7 362 | assert a.addstatic(3)(4) == 7 363 | assert A.addstatic(3, 4) == 7 364 | assert A.addstatic(3)(4) == 7 365 | 366 | # we want this to be of type curry 367 | assert isinstance(a.addmethod, curry) 368 | assert isinstance(A.addmethod, curry) 369 | 370 | 371 | def test_memoize_on_classmethods(): 372 | class A(object): 373 | BASE = 10 374 | HASH = 10 375 | 376 | def __init__(self, base): 377 | self.BASE = base 378 | 379 | @memoize 380 | def addmethod(self, x, y): 381 | return self.BASE + x + y 382 | 383 | @classmethod 384 | @memoize 385 | def addclass(cls, x, y): 386 | return cls.BASE + x + y 387 | 388 | @staticmethod 389 | @memoize 390 | def addstatic(x, y): 391 | return x + y 392 | 393 | def __hash__(self): 394 | return self.HASH 395 | 396 | a = A(100) 397 | assert a.addmethod(3, 4) == 107 398 | assert A.addmethod(a, 3, 4) == 107 399 | 400 | a.BASE = 200 401 | assert a.addmethod(3, 4) == 107 402 | a.HASH = 200 403 | assert a.addmethod(3, 4) == 207 404 | 405 | assert a.addclass(3, 4) == 17 406 | assert A.addclass(3, 4) == 17 407 | A.BASE = 20 408 | assert A.addclass(3, 4) == 17 409 | A.HASH = 20 # hashing of class is handled by metaclass 410 | assert A.addclass(3, 4) == 17 # hence, != 27 411 | 412 | assert a.addstatic(3, 4) == 7 413 | assert A.addstatic(3, 4) == 7 414 | 415 | 416 | def test__num_required_args(): 417 | assert _num_required_args(map) != 0 418 | assert _num_required_args(lambda x: x) == 1 419 | assert _num_required_args(lambda x, y: x) == 2 420 | 421 | def foo(x, y, z=2): 422 | pass 423 | assert _num_required_args(foo) == 2 424 | 425 | 426 | def test_compose(): 427 | assert compose()(0) == 0 428 | assert compose(inc)(0) == 1 429 | assert compose(double, inc)(0) == 2 430 | assert compose(str, iseven, inc, double)(3) == "False" 431 | assert compose(str, add)(1, 2) == '3' 432 | 433 | def f(a, b, c=10): 434 | return (a + b) * c 435 | 436 | assert compose(str, inc, f)(1, 2, c=3) == '10' 437 | 438 | # Define two functions with different names 439 | def f(a): 440 | return a 441 | 442 | def g(a): 443 | return a 444 | 445 | composed = compose(f, g) 446 | assert composed.__name__ == 'f_of_g' 447 | assert composed.__doc__ == 'lambda *args, **kwargs: f(g(*args, **kwargs))' 448 | 449 | # Create an object with no __name__. 450 | h = object() 451 | 452 | composed = compose(f, h) 453 | assert composed.__name__ == 'Compose' 454 | assert composed.__doc__ == 'A composition of functions' 455 | 456 | 457 | def test_pipe(): 458 | assert pipe(1, inc) == 2 459 | assert pipe(1, inc, inc) == 3 460 | assert pipe(1, double, inc, iseven) is False 461 | 462 | 463 | def test_complement(): 464 | # No args: 465 | assert complement(lambda: False)() 466 | assert not complement(lambda: True)() 467 | 468 | # Single arity: 469 | assert complement(iseven)(1) 470 | assert not complement(iseven)(2) 471 | assert complement(complement(iseven))(2) 472 | assert not complement(complement(isodd))(2) 473 | 474 | # Multiple arities: 475 | both_even = lambda a, b: iseven(a) and iseven(b) 476 | assert complement(both_even)(1, 2) 477 | assert not complement(both_even)(2, 2) 478 | 479 | # Generic truthiness: 480 | assert complement(lambda: "")() 481 | assert complement(lambda: 0)() 482 | assert complement(lambda: None)() 483 | assert complement(lambda: [])() 484 | 485 | assert not complement(lambda: "x")() 486 | assert not complement(lambda: 1)() 487 | assert not complement(lambda: [1])() 488 | 489 | 490 | def test_do(): 491 | inc = lambda x: x + 1 492 | assert do(inc, 1) == 1 493 | 494 | log = [] 495 | assert do(log.append, 1) == 1 496 | assert log == [1] 497 | 498 | 499 | def test_juxt_generator_input(): 500 | data = list(range(10)) 501 | juxtfunc = juxt(itemgetter(2*i) for i in range(5)) 502 | assert juxtfunc(data) == (0, 2, 4, 6, 8) 503 | assert juxtfunc(data) == (0, 2, 4, 6, 8) 504 | 505 | 506 | def test_flip(): 507 | def f(a, b): 508 | return a, b 509 | 510 | assert flip(f, 'a', 'b') == ('b', 'a') 511 | -------------------------------------------------------------------------------- /toolz/tests/test_itertoolz.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | from itertools import starmap 3 | from toolz.utils import raises 4 | from functools import partial 5 | from toolz.itertoolz import (remove, groupby, merge_sorted, 6 | concat, concatv, interleave, unique, 7 | isiterable, getter, 8 | mapcat, isdistinct, first, second, 9 | nth, take, tail, drop, interpose, get, 10 | rest, last, cons, frequencies, 11 | reduceby, iterate, accumulate, 12 | sliding_window, count, partition, 13 | partition_all, take_nth, pluck, join, 14 | diff, topk, peek) 15 | from toolz.compatibility import range, filter 16 | from operator import add, mul 17 | 18 | 19 | def identity(x): 20 | return x 21 | 22 | 23 | def iseven(x): 24 | return x % 2 == 0 25 | 26 | 27 | def isodd(x): 28 | return x % 2 == 1 29 | 30 | 31 | def inc(x): 32 | return x + 1 33 | 34 | 35 | def double(x): 36 | return 2 * x 37 | 38 | 39 | def test_remove(): 40 | r = remove(iseven, range(5)) 41 | assert type(r) is not list 42 | assert list(r) == list(filter(isodd, range(5))) 43 | 44 | 45 | def test_groupby(): 46 | assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]} 47 | 48 | 49 | def test_groupby_non_callable(): 50 | assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ 51 | {1: [(1, 2), (1, 3)], 52 | 2: [(2, 2), (2, 4)]} 53 | 54 | assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ 55 | {(1,): [(1, 2), (1, 3)], 56 | (2,): [(2, 2), (2, 4)]} 57 | 58 | assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \ 59 | {(1, 1): [(1, 2), (1, 3)], 60 | (2, 2): [(2, 2), (2, 4)]} 61 | 62 | 63 | def test_merge_sorted(): 64 | assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3] 65 | assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6] 66 | assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4] 67 | assert list(merge_sorted([5, 3, 1], [6, 4, 3], [], 68 | key=lambda x: -x)) == [6, 5, 4, 3, 3, 1] 69 | assert list(merge_sorted([2, 1, 3], [1, 2, 3], 70 | key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3] 71 | assert list(merge_sorted([2, 3], [1, 3], 72 | key=lambda x: x // 3)) == [2, 1, 3, 3] 73 | assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc' 74 | assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc' 75 | assert ''.join(merge_sorted('cba', 'cba', 'cba', 76 | key=lambda x: -ord(x))) == 'cccbbbaaa' 77 | assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4] 78 | 79 | data = [[(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)], 80 | [(9, 1), (9, 8), (9, 9)]] 81 | assert list(merge_sorted(*data, key=lambda x: x[1])) == [ 82 | (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9)] 83 | 84 | 85 | def test_interleave(): 86 | assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3' 87 | assert ''.join(interleave(('ABC', '1'))) == 'A1BC' 88 | 89 | 90 | def test_unique(): 91 | assert tuple(unique((1, 2, 3))) == (1, 2, 3) 92 | assert tuple(unique((1, 2, 1, 3))) == (1, 2, 3) 93 | assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2) 94 | 95 | 96 | def test_isiterable(): 97 | assert isiterable([1, 2, 3]) is True 98 | assert isiterable('abc') is True 99 | assert isiterable(5) is False 100 | 101 | 102 | def test_isdistinct(): 103 | assert isdistinct([1, 2, 3]) is True 104 | assert isdistinct([1, 2, 1]) is False 105 | 106 | assert isdistinct("Hello") is False 107 | assert isdistinct("World") is True 108 | 109 | assert isdistinct(iter([1, 2, 3])) is True 110 | assert isdistinct(iter([1, 2, 1])) is False 111 | 112 | 113 | def test_nth(): 114 | assert nth(2, 'ABCDE') == 'C' 115 | assert nth(2, iter('ABCDE')) == 'C' 116 | assert nth(1, (3, 2, 1)) == 2 117 | assert nth(0, {'foo': 'bar'}) == 'foo' 118 | assert raises(StopIteration, lambda: nth(10, {10: 'foo'})) 119 | assert nth(-2, 'ABCDE') == 'D' 120 | assert raises(ValueError, lambda: nth(-2, iter('ABCDE'))) 121 | 122 | 123 | def test_first(): 124 | assert first('ABCDE') == 'A' 125 | assert first((3, 2, 1)) == 3 126 | assert isinstance(first({0: 'zero', 1: 'one'}), int) 127 | 128 | 129 | def test_second(): 130 | assert second('ABCDE') == 'B' 131 | assert second((3, 2, 1)) == 2 132 | assert isinstance(second({0: 'zero', 1: 'one'}), int) 133 | 134 | 135 | def test_last(): 136 | assert last('ABCDE') == 'E' 137 | assert last((3, 2, 1)) == 1 138 | assert isinstance(last({0: 'zero', 1: 'one'}), int) 139 | 140 | 141 | def test_rest(): 142 | assert list(rest('ABCDE')) == list('BCDE') 143 | assert list(rest((3, 2, 1))) == list((2, 1)) 144 | 145 | 146 | def test_take(): 147 | assert list(take(3, 'ABCDE')) == list('ABC') 148 | assert list(take(2, (3, 2, 1))) == list((3, 2)) 149 | 150 | 151 | def test_tail(): 152 | assert list(tail(3, 'ABCDE')) == list('CDE') 153 | assert list(tail(3, iter('ABCDE'))) == list('CDE') 154 | assert list(tail(2, (3, 2, 1))) == list((2, 1)) 155 | 156 | 157 | def test_drop(): 158 | assert list(drop(3, 'ABCDE')) == list('DE') 159 | assert list(drop(1, (3, 2, 1))) == list((2, 1)) 160 | 161 | 162 | def test_take_nth(): 163 | assert list(take_nth(2, 'ABCDE')) == list('ACE') 164 | 165 | 166 | def test_get(): 167 | assert get(1, 'ABCDE') == 'B' 168 | assert list(get([1, 3], 'ABCDE')) == list('BD') 169 | assert get('a', {'a': 1, 'b': 2, 'c': 3}) == 1 170 | assert get(['a', 'b'], {'a': 1, 'b': 2, 'c': 3}) == (1, 2) 171 | 172 | assert get('foo', {}, default='bar') == 'bar' 173 | assert get({}, [1, 2, 3], default='bar') == 'bar' 174 | assert get([0, 2], 'AB', 'C') == ('A', 'C') 175 | 176 | assert get([0], 'AB') == ('A',) 177 | assert get([], 'AB') == () 178 | 179 | assert raises(IndexError, lambda: get(10, 'ABC')) 180 | assert raises(KeyError, lambda: get(10, {'a': 1})) 181 | assert raises(TypeError, lambda: get({}, [1, 2, 3])) 182 | assert raises(TypeError, lambda: get([1, 2, 3], 1, None)) 183 | 184 | 185 | def test_mapcat(): 186 | assert (list(mapcat(identity, [[1, 2, 3], [4, 5, 6]])) == 187 | [1, 2, 3, 4, 5, 6]) 188 | 189 | assert (list(mapcat(reversed, [[3, 2, 1, 0], [6, 5, 4], [9, 8, 7]])) == 190 | list(range(10))) 191 | 192 | inc = lambda i: i + 1 193 | assert ([4, 5, 6, 7, 8, 9] == 194 | list(mapcat(partial(map, inc), [[3, 4, 5], [6, 7, 8]]))) 195 | 196 | 197 | def test_cons(): 198 | assert list(cons(1, [2, 3])) == [1, 2, 3] 199 | 200 | 201 | def test_concat(): 202 | assert list(concat([[], [], []])) == [] 203 | assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) == 204 | ['a', 'b', 0, 1, 2]) 205 | 206 | 207 | def test_concatv(): 208 | assert list(concatv([], [], [])) == [] 209 | assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) == 210 | ['a', 'b', 0, 1, 2]) 211 | 212 | 213 | def test_interpose(): 214 | assert "a" == first(rest(interpose("a", range(1000000000)))) 215 | assert "tXaXrXzXaXn" == "".join(interpose("X", "tarzan")) 216 | assert list(interpose(0, itertools.repeat(1, 4))) == [1, 0, 1, 0, 1, 0, 1] 217 | assert list(interpose('.', ['a', 'b', 'c'])) == ['a', '.', 'b', '.', 'c'] 218 | 219 | 220 | def test_frequencies(): 221 | assert (frequencies(["cat", "pig", "cat", "eel", 222 | "pig", "dog", "dog", "dog"]) == 223 | {"cat": 2, "eel": 1, "pig": 2, "dog": 3}) 224 | assert frequencies([]) == {} 225 | assert frequencies("onomatopoeia") == {"a": 2, "e": 1, "i": 1, "m": 1, 226 | "o": 4, "n": 1, "p": 1, "t": 1} 227 | 228 | 229 | def test_reduceby(): 230 | data = [1, 2, 3, 4, 5] 231 | iseven = lambda x: x % 2 == 0 232 | assert reduceby(iseven, add, data, 0) == {False: 9, True: 6} 233 | assert reduceby(iseven, mul, data, 1) == {False: 15, True: 8} 234 | 235 | projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, 236 | {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, 237 | {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, 238 | {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] 239 | assert reduceby(lambda x: x['state'], 240 | lambda acc, x: acc + x['cost'], 241 | projects, 0) == {'CA': 1200000, 'IL': 2100000} 242 | 243 | assert reduceby('state', 244 | lambda acc, x: acc + x['cost'], 245 | projects, 0) == {'CA': 1200000, 'IL': 2100000} 246 | 247 | 248 | def test_reduce_by_init(): 249 | assert reduceby(iseven, add, [1, 2, 3, 4]) == {True: 2 + 4, False: 1 + 3} 250 | 251 | 252 | def test_reduce_by_callable_default(): 253 | def set_add(s, i): 254 | s.add(i) 255 | return s 256 | 257 | assert reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2], set) == \ 258 | {True: set([2, 4]), False: set([1, 3])} 259 | 260 | 261 | def test_iterate(): 262 | assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4] 263 | assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8] 264 | 265 | 266 | def test_accumulate(): 267 | assert list(accumulate(add, [1, 2, 3, 4, 5])) == [1, 3, 6, 10, 15] 268 | assert list(accumulate(mul, [1, 2, 3, 4, 5])) == [1, 2, 6, 24, 120] 269 | assert list(accumulate(add, [1, 2, 3, 4, 5], -1)) == [-1, 0, 2, 5, 9, 14] 270 | 271 | def binop(a, b): 272 | raise AssertionError('binop should not be called') 273 | 274 | start = object() 275 | assert list(accumulate(binop, [], start)) == [start] 276 | 277 | 278 | def test_accumulate_works_on_consumable_iterables(): 279 | assert list(accumulate(add, iter((1, 2, 3)))) == [1, 3, 6] 280 | 281 | 282 | def test_sliding_window(): 283 | assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)] 284 | assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)] 285 | 286 | 287 | def test_sliding_window_of_short_iterator(): 288 | assert list(sliding_window(3, [1, 2])) == [] 289 | 290 | 291 | def test_partition(): 292 | assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] 293 | assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)] 294 | assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2), 295 | (3, -1, -1)] 296 | assert list(partition(2, [])) == [] 297 | 298 | 299 | def test_partition_all(): 300 | assert list(partition_all(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)] 301 | assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)] 302 | assert list(partition_all(2, [])) == [] 303 | 304 | 305 | def test_count(): 306 | assert count((1, 2, 3)) == 3 307 | assert count([]) == 0 308 | assert count(iter((1, 2, 3, 4))) == 4 309 | 310 | assert count('hello') == 5 311 | assert count(iter('hello')) == 5 312 | 313 | 314 | def test_pluck(): 315 | assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4] 316 | assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)] 317 | assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1] 318 | 319 | data = [{'id': 1, 'name': 'cheese'}, {'id': 2, 'name': 'pies', 'price': 1}] 320 | assert list(pluck('id', data)) == [1, 2] 321 | assert list(pluck('price', data, None)) == [None, 1] 322 | assert list(pluck(['id', 'name'], data)) == [(1, 'cheese'), (2, 'pies')] 323 | assert list(pluck(['name'], data)) == [('cheese',), ('pies',)] 324 | assert list(pluck(['price', 'other'], data, None)) == [(None, None), 325 | (1, None)] 326 | 327 | assert raises(IndexError, lambda: list(pluck(1, [[0]]))) 328 | assert raises(KeyError, lambda: list(pluck('name', [{'id': 1}]))) 329 | 330 | 331 | def test_join(): 332 | names = [(1, 'one'), (2, 'two'), (3, 'three')] 333 | fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)] 334 | 335 | def addpair(pair): 336 | return pair[0] + pair[1] 337 | 338 | result = set(starmap(add, join(first, names, second, fruit))) 339 | 340 | expected = set([((1, 'one', 'apple', 1)), 341 | ((1, 'one', 'orange', 1)), 342 | ((2, 'two', 'banana', 2)), 343 | ((2, 'two', 'coconut', 2))]) 344 | 345 | assert result == expected 346 | 347 | 348 | def test_getter(): 349 | assert getter(0)('Alice') == 'A' 350 | assert getter([0])('Alice') == ('A',) 351 | assert getter([])('Alice') == () 352 | 353 | 354 | def test_key_as_getter(): 355 | squares = [(i, i**2) for i in range(5)] 356 | pows = [(i, i**2, i**3) for i in range(5)] 357 | 358 | assert set(join(0, squares, 0, pows)) == set(join(lambda x: x[0], squares, 359 | lambda x: x[0], pows)) 360 | 361 | get = lambda x: (x[0], x[1]) 362 | assert set(join([0, 1], squares, [0, 1], pows)) == set(join(get, squares, 363 | get, pows)) 364 | 365 | get = lambda x: (x[0],) 366 | assert set(join([0], squares, [0], pows)) == set(join(get, squares, 367 | get, pows)) 368 | 369 | 370 | def test_join_double_repeats(): 371 | names = [(1, 'one'), (2, 'two'), (3, 'three'), (1, 'uno'), (2, 'dos')] 372 | fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)] 373 | 374 | result = set(starmap(add, join(first, names, second, fruit))) 375 | 376 | expected = set([((1, 'one', 'apple', 1)), 377 | ((1, 'one', 'orange', 1)), 378 | ((2, 'two', 'banana', 2)), 379 | ((2, 'two', 'coconut', 2)), 380 | ((1, 'uno', 'apple', 1)), 381 | ((1, 'uno', 'orange', 1)), 382 | ((2, 'dos', 'banana', 2)), 383 | ((2, 'dos', 'coconut', 2))]) 384 | 385 | assert result == expected 386 | 387 | 388 | def test_join_missing_element(): 389 | names = [(1, 'one'), (2, 'two'), (3, 'three')] 390 | fruit = [('apple', 5), ('orange', 1)] 391 | 392 | result = set(starmap(add, join(first, names, second, fruit))) 393 | 394 | expected = set([((1, 'one', 'orange', 1))]) 395 | 396 | assert result == expected 397 | 398 | 399 | def test_left_outer_join(): 400 | result = set(join(identity, [1, 2], identity, [2, 3], left_default=None)) 401 | expected = set([(2, 2), (None, 3)]) 402 | 403 | assert result == expected 404 | 405 | 406 | def test_right_outer_join(): 407 | result = set(join(identity, [1, 2], identity, [2, 3], right_default=None)) 408 | expected = set([(2, 2), (1, None)]) 409 | 410 | assert result == expected 411 | 412 | 413 | def test_outer_join(): 414 | result = set(join(identity, [1, 2], identity, [2, 3], 415 | left_default=None, right_default=None)) 416 | expected = set([(2, 2), (1, None), (None, 3)]) 417 | 418 | assert result == expected 419 | 420 | 421 | def test_diff(): 422 | assert raises(TypeError, lambda: list(diff())) 423 | assert raises(TypeError, lambda: list(diff([1, 2]))) 424 | assert raises(TypeError, lambda: list(diff([1, 2], 3))) 425 | assert list(diff([1, 2], (1, 2), iter([1, 2]))) == [] 426 | assert list(diff([1, 2, 3], (1, 10, 3), iter([1, 2, 10]))) == [ 427 | (2, 10, 2), (3, 3, 10)] 428 | assert list(diff([1, 2], [10])) == [(1, 10)] 429 | assert list(diff([1, 2], [10], default=None)) == [(1, 10), (2, None)] 430 | # non-variadic usage 431 | assert raises(TypeError, lambda: list(diff([]))) 432 | assert raises(TypeError, lambda: list(diff([[]]))) 433 | assert raises(TypeError, lambda: list(diff([[1, 2]]))) 434 | assert raises(TypeError, lambda: list(diff([[1, 2], 3]))) 435 | assert list(diff([(1, 2), (1, 3)])) == [(2, 3)] 436 | 437 | data1 = [{'cost': 1, 'currency': 'dollar'}, 438 | {'cost': 2, 'currency': 'dollar'}] 439 | 440 | data2 = [{'cost': 100, 'currency': 'yen'}, 441 | {'cost': 300, 'currency': 'yen'}] 442 | 443 | conversions = {'dollar': 1, 'yen': 0.01} 444 | 445 | def indollars(item): 446 | return conversions[item['currency']] * item['cost'] 447 | 448 | list(diff(data1, data2, key=indollars)) == [ 449 | ({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})] 450 | 451 | 452 | def test_topk(): 453 | assert topk(2, [4, 1, 5, 2]) == (5, 4) 454 | assert topk(2, [4, 1, 5, 2], key=lambda x: -x) == (1, 2) 455 | assert topk(2, iter([5, 1, 4, 2]), key=lambda x: -x) == (1, 2) 456 | 457 | assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9}, 458 | {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='a') == \ 459 | ({'a': 10, 'b': 1}, {'a': 9, 'b': 2}) 460 | 461 | assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9}, 462 | {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='b') == \ 463 | ({'a': 1, 'b': 10}, {'a': 2, 'b': 9}) 464 | assert topk(2, [(0, 4), (1, 3), (2, 2), (3, 1), (4, 0)], 0) == \ 465 | ((4, 0), (3, 1)) 466 | 467 | 468 | def test_topk_is_stable(): 469 | assert topk(4, [5, 9, 2, 1, 5, 3], key=lambda x: 1) == (5, 9, 2, 1) 470 | 471 | 472 | def test_peek(): 473 | alist = ["Alice", "Bob", "Carol"] 474 | element, blist = peek(alist) 475 | element == alist[0] 476 | assert list(blist) == alist 477 | 478 | assert raises(StopIteration, lambda: peek([])) 479 | -------------------------------------------------------------------------------- /toolz/tests/test_recipes.py: -------------------------------------------------------------------------------- 1 | from toolz import first, identity, countby, partitionby 2 | 3 | 4 | def iseven(x): 5 | return x % 2 == 0 6 | 7 | 8 | def test_countby(): 9 | assert countby(iseven, [1, 2, 3]) == {True: 1, False: 2} 10 | assert countby(len, ['cat', 'dog', 'mouse']) == {3: 2, 5: 1} 11 | assert countby(0, ('ab', 'ac', 'bc')) == {'a': 2, 'b': 1} 12 | 13 | 14 | def test_partitionby(): 15 | assert list(partitionby(identity, [])) == [] 16 | 17 | vowels = "aeiou" 18 | assert (list(partitionby(vowels.__contains__, "abcdefghi")) == 19 | [("a",), ("b", "c", "d"), ("e",), ("f", "g", "h"), ("i",)]) 20 | 21 | assert (list(map(first, 22 | partitionby(identity, 23 | [1, 1, 1, 2, 3, 3, 2, 2, 3]))) == 24 | [1, 2, 3, 2, 3]) 25 | 26 | assert ''.join(map(first, 27 | partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!' 28 | -------------------------------------------------------------------------------- /toolz/tests/test_serialization.py: -------------------------------------------------------------------------------- 1 | from toolz import * 2 | import pickle 3 | 4 | 5 | def test_compose(): 6 | f = compose(str, sum) 7 | g = pickle.loads(pickle.dumps(f)) 8 | assert f((1, 2)) == g((1, 2)) 9 | 10 | 11 | def test_curry(): 12 | f = curry(map)(str) 13 | g = pickle.loads(pickle.dumps(f)) 14 | assert list(f((1, 2, 3))) == list(g((1, 2, 3))) 15 | 16 | 17 | def test_juxt(): 18 | f = juxt(str, int, bool) 19 | g = pickle.loads(pickle.dumps(f)) 20 | assert f(1) == g(1) 21 | assert f.funcs == g.funcs 22 | 23 | 24 | def test_complement(): 25 | f = complement(bool) 26 | assert f(True) is False 27 | assert f(False) is True 28 | g = pickle.loads(pickle.dumps(f)) 29 | assert f(True) == g(True) 30 | assert f(False) == g(False) 31 | -------------------------------------------------------------------------------- /toolz/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from toolz.utils import raises 2 | 3 | 4 | def test_raises(): 5 | assert raises(ZeroDivisionError, lambda: 1 / 0) 6 | assert not raises(ZeroDivisionError, lambda: 1) 7 | -------------------------------------------------------------------------------- /toolz/utils.py: -------------------------------------------------------------------------------- 1 | def raises(err, lamda): 2 | try: 3 | lamda() 4 | return False 5 | except err: 6 | return True 7 | 8 | 9 | no_default = '__no__default__' 10 | --------------------------------------------------------------------------------