├── .binstar.yml
├── .coveragerc
├── .gitignore
├── .travis.yml
├── AUTHORS.md
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── bench
    ├── test_curry.py
    ├── test_curry_baseline.py
    ├── test_first.py
    ├── test_first_iter.py
    ├── test_frequencies.py
    ├── test_get.py
    ├── test_get_list.py
    ├── test_groupby.py
    ├── test_join.py
    ├── test_memoize.py
    ├── test_memoize_kwargs.py
    ├── test_pluck.py
    ├── test_sliding_window.py
    └── test_wordcount.py
├── conda.recipe
    ├── bld.bat
    ├── build.sh
    └── meta.yaml
├── doc
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── api.rst
    │   ├── composition.rst
    │   ├── conf.py
    │   ├── control.rst
    │   ├── curry.rst
    │   ├── heritage.rst
    │   ├── index.rst
    │   ├── install.rst
    │   ├── laziness.rst
    │   ├── parallelism.rst
    │   ├── purity.rst
    │   ├── references.rst
    │   ├── streaming-analytics.rst
    │   └── tips-and-tricks.rst
├── examples
    ├── fib.py
    ├── graph.py
    └── wordcount.py
├── release-notes
├── setup.py
└── toolz
    ├── __init__.py
    ├── compatibility.py
    ├── curried
        ├── __init__.py
        ├── exceptions.py
        └── operator.py
    ├── dicttoolz.py
    ├── functoolz.py
    ├── itertoolz.py
    ├── recipes.py
    ├── sandbox
        ├── __init__.py
        ├── core.py
        ├── parallel.py
        └── tests
        │   ├── test_core.py
        │   └── test_parallel.py
    ├── tests
        ├── test_compatibility.py
        ├── test_curried.py
        ├── test_dicttoolz.py
        ├── test_functoolz.py
        ├── test_itertoolz.py
        ├── test_recipes.py
        ├── test_serialization.py
        └── test_utils.py
    └── utils.py


/.binstar.yml:
--------------------------------------------------------------------------------
 1 | package: toolz
 2 | platform:
 3 |   - linux-64
 4 |   - linux-32
 5 |   - osx-64
 6 |   - win-64
 7 |   - win-32
 8 | engine:
 9 |   - python=2.6
10 |   - python=2.7
11 |   - python=3.3
12 |   - python=3.4
13 | script:
14 |   - conda build conda.recipe
15 | build_targets:
16 |   files: conda
17 |   channels: main
18 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |     toolz/tests/test*
4 |     toolz/*/tests/test*
5 |     toolz/compatibility.py
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | build/
3 | dist/
4 | *.egg-info/
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | python:
 4 |     - "2.6"
 5 |     - "2.7"
 6 |     - "3.3"
 7 |     - "3.4"
 8 |     - "3.5"
 9 |     - "pypy"
10 |     - "pypy3"
11 | 
12 | env:
13 |     - PEP8_IGNORE="E731,W503"
14 | 
15 | # command to install dependencies
16 | install:
17 |     - pip install coverage pep8
18 | 
19 | # command to run tests
20 | # require 100% coverage (not including test files) to pass Travis CI test
21 | # To skip pypy: - if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then DOSTUFF ; fi
22 | script:
23 |     - coverage run --source=toolz $(which nosetests)
24 |                    --with-doctest
25 |     - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage report --show-missing --fail-under=100 ; fi
26 |     - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then pep8 --ignore=$PEP8_IGNORE --exclude=conf.py,tests,examples,bench -r --show-source . ; fi
27 | 
28 | # load coverage status to https://coveralls.io
29 | after_success:
30 |     - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then pip install coveralls --use-mirrors ; coveralls ; fi
31 | 
32 | notifications:
33 |   email: false
34 | 


--------------------------------------------------------------------------------
/AUTHORS.md:
--------------------------------------------------------------------------------
 1 | [Matthew Rocklin](http://matthewrocklin.com)    [@mrocklin](http://github.com/mrocklin/)
 2 | 
 3 | [John Jacobsen](http://eigenhombre.com)         [@eigenhombre](http://github.com/eigenhombre/)
 4 | 
 5 | Erik Welch                                      [@eriknw](https://github.com/eriknw/)
 6 | 
 7 | John Crichton                                   [@jcrichton](https://github.com/jcrichton/)
 8 | 
 9 | Han Semaj                                       [@microamp](https://github.com/microamp/)
10 | 
11 | [Graeme Coupar](https://twitter.com/obmarg)     [@obmarg](https://github.com/obmarg/)
12 | 
13 | [Leonid Shvechikov](http://brainstorage.me/shvechikov)  [@shvechikov](https://github.com/shvechikov)
14 | 
15 | Lars Buitinck                                   [@larsmans](http://github.com/larsmans)
16 | 
17 | José Ricardo                                    [@josericardo](https://github.com/josericardo)
18 | 
19 | Tom Prince                                      [@tomprince](https://github.com/tomprince)
20 | 
21 | Bart van Merriënboer                            [@bartvm](https://github.com/bartvm)
22 | 
23 | Nikolaos-Digenis Karagiannis                    [@digenis](https://github.com/digenis/)
24 | 
25 | [Antonio Lima](https://twitter.com/themiurgo)   [@themiurgo](https://github.com/themiurgo/)
26 | 
27 | Joe Jevnik                                      [@llllllllll](https://github.com/llllllllll)
28 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Matthew Rocklin
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |   a. Redistributions of source code must retain the above copyright notice,
 9 |      this list of conditions and the following disclaimer.
10 |   b. Redistributions in binary form must reproduce the above copyright
11 |      notice, this list of conditions and the following disclaimer in the
12 |      documentation and/or other materials provided with the distribution.
13 |   c. Neither the name of toolz nor the names of its contributors
14 |      may be used to endorse or promote products derived from this software
15 |      without specific prior written permission.
16 | 
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
28 | DAMAGE.
29 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include toolz/tests/*.py
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Toolz
  2 | =====
  3 | 
  4 | |Build Status| |Coverage Status| |Version Status| |Downloads|
  5 | 
  6 | A set of utility functions for iterators, functions, and dictionaries.
  7 | 
  8 | See the PyToolz documentation at http://toolz.readthedocs.org
  9 | 
 10 | LICENSE
 11 | -------
 12 | 
 13 | New BSD. See `License File <https://github.com/pytoolz/toolz/blob/master/LICENSE.txt>`__.
 14 | 
 15 | Install
 16 | -------
 17 | 
 18 | ``toolz`` is on the Python Package Index (PyPI):
 19 | 
 20 | ::
 21 | 
 22 |     pip install toolz
 23 | 
 24 | or
 25 | 
 26 | ::
 27 | 
 28 |     easy_install toolz
 29 | 
 30 | Structure and Heritage
 31 | ----------------------
 32 | 
 33 | ``toolz`` is implemented in three parts:
 34 | 
 35 | |literal itertoolz|_, for operations on iterables. Examples: ``groupby``,
 36 | ``unique``, ``interpose``,
 37 | 
 38 | |literal functoolz|_, for higher-order functions. Examples: ``memoize``,
 39 | ``curry``, ``compose``
 40 | 
 41 | |literal dicttoolz|_, for operations on dictionaries. Examples: ``assoc``,
 42 | ``update-in``, ``merge``.
 43 | 
 44 | .. |literal itertoolz| replace:: ``itertoolz``
 45 | .. _literal itertoolz: https://github.com/pytoolz/toolz/blob/master/toolz/itertoolz.py
 46 | 
 47 | .. |literal functoolz| replace:: ``functoolz``
 48 | .. _literal functoolz: https://github.com/pytoolz/toolz/blob/master/toolz/functoolz.py
 49 | 
 50 | .. |literal dicttoolz| replace:: ``dicttoolz``
 51 | .. _literal dicttoolz: https://github.com/pytoolz/toolz/blob/master/toolz/dicttoolz.py
 52 | 
 53 | These functions come from the legacy of functional languages for list
 54 | processing. They interoperate well to accomplish common complex tasks.
 55 | 
 56 | Read our `API
 57 | Documentation <http://toolz.readthedocs.org/en/latest/api.html>`__ for
 58 | more details.
 59 | 
 60 | Example
 61 | -------
 62 | 
 63 | This builds a standard wordcount function from pieces within ``toolz``:
 64 | 
 65 | .. code:: python
 66 | 
 67 |     >>> def stem(word):
 68 |     ...     """ Stem word to primitive form """
 69 |     ...     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
 70 | 
 71 |     >>> from toolz import compose, frequencies, partial
 72 |     >>> wordcount = compose(frequencies, partial(map, stem), str.split)
 73 | 
 74 |     >>> sentence = "This cat jumped over this other cat!"
 75 |     >>> wordcount(sentence)
 76 |     {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1}
 77 | 
 78 | Dependencies
 79 | ------------
 80 | 
 81 | ``toolz`` supports Python 2.6+ and Python 3.3+ with a common codebase.
 82 | It is pure Python and requires no dependencies beyond the standard
 83 | library.
 84 | 
 85 | It is, in short, a light weight dependency.
 86 | 
 87 | 
 88 | CyToolz
 89 | -------
 90 | 
 91 | The ``toolz`` project has been reimplemented in `Cython <http://cython.org>`__.
 92 | The ``cytoolz`` project is a drop-in replacement for the Pure Python
 93 | implementation.
 94 | See `CyToolz Github Page <https://github.com/pytoolz/cytoolz/>`__ for more
 95 | details.
 96 | 
 97 | See Also
 98 | --------
 99 | 
100 | -  `Underscore.js <http://underscorejs.org>`__: A similar library for
101 |    JavaScript
102 | -  `Enumerable <http://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A
103 |    similar library for Ruby
104 | -  `Clojure <http://clojure.org>`__: A functional language whose
105 |    standard library has several counterparts in ``toolz``
106 | -  `itertools <http://docs.python.org/2/library/itertools.html>`__: The
107 |    Python standard library for iterator tools
108 | -  `functools <http://docs.python.org/2/library/functools.html>`__: The
109 |    Python standard library for function tools
110 | 
111 | Contributions Welcome
112 | ---------------------
113 | 
114 | ``toolz`` aims to be a repository for utility functions, particularly
115 | those that come from the functional programming and list processing
116 | traditions. We welcome contributions that fall within this scope.
117 | 
118 | We also try to keep the API small to keep ``toolz`` manageable.  The ideal
119 | contribution is significantly different from existing functions and has
120 | precedent in a few other functional systems.
121 | 
122 | Please take a look at our
123 | `issue page <https://github.com/pytoolz/toolz/issues>`__
124 | for contribution ideas.
125 | 
126 | Community
127 | ---------
128 | 
129 | See our `mailing list <https://groups.google.com/forum/#!forum/pytoolz>`__.
130 | We're friendly.
131 | 
132 | .. |Build Status| image:: https://travis-ci.org/pytoolz/toolz.svg
133 |    :target: https://travis-ci.org/pytoolz/toolz
134 | .. |Coverage Status| image:: https://coveralls.io/repos/pytoolz/toolz/badge.svg
135 |    :target: https://coveralls.io/r/pytoolz/toolz
136 | .. |Version Status| image:: https://badge.fury.io/py/toolz.svg
137 |    :target: http://badge.fury.io/py/toolz
138 | .. |Downloads| image:: https://img.shields.io/pypi/dm/toolz.svg
139 |    :target: https://pypi.python.org/pypi/toolz/
140 | 


--------------------------------------------------------------------------------
/bench/test_curry.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import get
 2 | 
 3 | 
 4 | pairs = [(1, 2) for i in range(100000)]
 5 | 
 6 | 
 7 | def test_get_curried():
 8 |     first = get(0)
 9 |     for p in pairs:
10 |         first(p)
11 | 


--------------------------------------------------------------------------------
/bench/test_curry_baseline.py:
--------------------------------------------------------------------------------
 1 | from toolz import get
 2 | from functools import partial
 3 | 
 4 | 
 5 | pairs = [(1, 2) for i in range(100000)]
 6 | 
 7 | 
 8 | def test_get():
 9 |     first = partial(get, 0)
10 |     for p in pairs:
11 |         first(p)
12 | 


--------------------------------------------------------------------------------
/bench/test_first.py:
--------------------------------------------------------------------------------
 1 | from toolz import first, second
 2 | 
 3 | pairs = [(1, 2) for i in range(1000000)]
 4 | 
 5 | 
 6 | def test_first():
 7 |     for p in pairs:
 8 |         first(p)
 9 | 
10 | 
11 | def test_second():
12 |     for p in pairs:
13 |         second(p)
14 | 


--------------------------------------------------------------------------------
/bench/test_first_iter.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from toolz import first, second
 3 | 
 4 | 
 5 | def test_first_iter():
 6 |     iters = map(iter, [(1, 2) for i in range(1000000)])
 7 |     for p in iters:
 8 |         first(p)
 9 | 
10 | 
11 | def test_second_iter():
12 |     iters = map(iter, [(1, 2) for i in range(1000000)])
13 |     for p in iters:
14 |         second(p)
15 | 


--------------------------------------------------------------------------------
/bench/test_frequencies.py:
--------------------------------------------------------------------------------
 1 | from toolz import frequencies, identity
 2 | 
 3 | 
 4 | big_data = list(range(1000)) * 1000
 5 | small_data = list(range(100))
 6 | 
 7 | 
 8 | def test_frequencies():
 9 |     frequencies(big_data)
10 | 
11 | 
12 | def test_frequencies_small():
13 |     for i in range(1000):
14 |         frequencies(small_data)
15 | 


--------------------------------------------------------------------------------
/bench/test_get.py:
--------------------------------------------------------------------------------
1 | from toolz import get
2 | 
3 | tuples = [(1, 2, 3) for i in range(100000)]
4 | 
5 | 
6 | def test_get():
7 |     for tup in tuples:
8 |         get(1, tup)
9 | 


--------------------------------------------------------------------------------
/bench/test_get_list.py:
--------------------------------------------------------------------------------
1 | from toolz import get
2 | 
3 | tuples = [(1, 2, 3) for i in range(100000)]
4 | 
5 | 
6 | def test_get():
7 |     for tup in tuples:
8 |         get([1, 2], tup)
9 | 


--------------------------------------------------------------------------------
/bench/test_groupby.py:
--------------------------------------------------------------------------------
1 | from toolz import groupby, identity
2 | 
3 | 
4 | data = list(range(1000)) * 1000
5 | 
6 | 
7 | def test_groupby():
8 |     groupby(identity, data)
9 | 


--------------------------------------------------------------------------------
/bench/test_join.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import *
 2 | import random
 3 | 
 4 | try:
 5 |     xrange
 6 | except NameError:
 7 |     xrange = range
 8 | 
 9 | def burn(seq):
10 |     for item in seq:
11 |         pass
12 | 
13 | 
14 | small = [(i, str(i)) for i in range(100)] * 10
15 | big = pipe([110]*10000, map(range), concat, list)
16 | 
17 | 
18 | def test_many_to_many_large():
19 |     burn(join(get(0), small, identity, big))
20 | 
21 | 
22 | def test_one_to_one_tiny():
23 |     A = list(range(20))
24 |     B = A[::2] + A[1::2][::-1]
25 | 
26 |     for i in xrange(50000):
27 |         burn(join(identity, A, identity, B))
28 | 
29 | 
30 | def test_one_to_many():
31 |     A = list(range(20))
32 |     B = pipe([20]*1000, map(range), concat, list)
33 | 
34 |     for i in xrange(100):
35 |         burn(join(identity, A, identity, B))
36 | 


--------------------------------------------------------------------------------
/bench/test_memoize.py:
--------------------------------------------------------------------------------
 1 | from toolz import memoize
 2 | 
 3 | 
 4 | def test_memoize_no_kwargs():
 5 |     @memoize
 6 |     def f(x):
 7 |         return x
 8 | 
 9 |     for i in range(100000):
10 |         f(3)
11 | 


--------------------------------------------------------------------------------
/bench/test_memoize_kwargs.py:
--------------------------------------------------------------------------------
 1 | from toolz import memoize
 2 | 
 3 | 
 4 | def test_memoize_kwargs():
 5 |     @memoize
 6 |     def f(x, y=3):
 7 |         return x
 8 | 
 9 |     for i in range(100000):
10 |         f(3)
11 | 


--------------------------------------------------------------------------------
/bench/test_pluck.py:
--------------------------------------------------------------------------------
 1 | from toolz import pluck
 2 | 
 3 | tuples = [(1, 2, 3) for i in range(100000)]
 4 | less_tuples = [(1, 2, 3) for i in range(100)]
 5 | 
 6 | 
 7 | def test_pluck():
 8 |     for i in pluck(2, tuples):
 9 |         pass
10 | 
11 |     for i in range(1000):
12 |         tuple(pluck(2, less_tuples))
13 | 


--------------------------------------------------------------------------------
/bench/test_sliding_window.py:
--------------------------------------------------------------------------------
1 | from toolz import sliding_window
2 | 
3 | seq = range(1000000)
4 | 
5 | 
6 | def test_sliding_window():
7 |     list(sliding_window(3, seq))
8 | 


--------------------------------------------------------------------------------
/bench/test_wordcount.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import *
 2 | import os
 3 | 
 4 | if not os.path.exists('bench/shakespeare.txt'):
 5 |     os.system('wget http://www.gutenberg.org/ebooks/100.txt.utf-8'
 6 |               ' -O bench/shakespeare.txt')
 7 | 
 8 | 
 9 | def stem(word):
10 |     """ Stem word to primitive form """
11 |     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
12 | 
13 | wordcount = comp(frequencies, map(stem), concat, map(str.split))
14 | 
15 | 
16 | def test_shakespeare():
17 |     with open('bench/shakespeare.txt') as f:
18 |         counts = wordcount(f)
19 | 


--------------------------------------------------------------------------------
/conda.recipe/bld.bat:
--------------------------------------------------------------------------------
1 | cd %RECIPE_DIR%\..
2 | %PYTHON% setup.py install
3 | 


--------------------------------------------------------------------------------
/conda.recipe/build.sh:
--------------------------------------------------------------------------------
1 | cd $RECIPE_DIR/..
2 | $PYTHON setup.py install
3 | 


--------------------------------------------------------------------------------
/conda.recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: toolz
 3 |     version: "0.7.4"
 4 | 
 5 | build:
 6 |     number: {{environ.get('BINSTAR_BUILD', 1)}}
 7 | 
 8 | requirements:
 9 |     build:
10 |       - setuptools
11 |       - python
12 | 
13 |     run:
14 |       - python
15 | 
16 | test:
17 |     requires:
18 |       - pytest
19 |     imports:
20 |       - toolz
21 |     commands:
22 |       - py.test -x --doctest-modules --pyargs toolz
23 | 
24 | about:
25 |     home: http://toolz.readthedocs.org/
26 |     license: BSD
27 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Toolz.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Toolz.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Toolz"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Toolz"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Toolz.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Toolz.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/doc/source/api.rst:
--------------------------------------------------------------------------------
  1 | API
  2 | ===
  3 | 
  4 | This page contains a comprehensive list of all functions within ``toolz``.
  5 | Docstrings should provide sufficient understanding for any individual function.
  6 | 
  7 | Itertoolz
  8 | ---------
  9 | 
 10 | .. currentmodule:: toolz.itertoolz
 11 | 
 12 | .. autosummary::
 13 |    accumulate
 14 |    concat
 15 |    concatv
 16 |    cons
 17 |    count
 18 |    diff
 19 |    drop
 20 |    first
 21 |    frequencies
 22 |    get
 23 |    groupby
 24 |    interleave
 25 |    interpose
 26 |    isdistinct
 27 |    isiterable
 28 |    iterate
 29 |    join
 30 |    last
 31 |    mapcat
 32 |    merge_sorted
 33 |    nth
 34 |    partition
 35 |    partition_all
 36 |    peek
 37 |    pluck
 38 |    reduceby
 39 |    remove
 40 |    second
 41 |    sliding_window
 42 |    take
 43 |    tail
 44 |    take_nth
 45 |    topk
 46 |    unique
 47 | 
 48 | .. currentmodule:: toolz.recipes
 49 | 
 50 | .. autosummary::
 51 |    countby
 52 |    partitionby
 53 | 
 54 | Functoolz
 55 | ---------
 56 | 
 57 | .. currentmodule:: toolz.functoolz
 58 | 
 59 | .. autosummary::
 60 |    complement
 61 |    compose
 62 |    curry
 63 |    do
 64 |    identity
 65 |    juxt
 66 |    memoize
 67 |    pipe
 68 |    thread_first
 69 |    thread_last
 70 | 
 71 | Dicttoolz
 72 | ---------
 73 | 
 74 | .. currentmodule:: toolz.dicttoolz
 75 | 
 76 | .. autosummary::
 77 |    assoc
 78 |    dissoc
 79 |    get_in
 80 |    keyfilter
 81 |    keymap
 82 |    itemfilter
 83 |    itemmap
 84 |    merge
 85 |    merge_with
 86 |    update_in
 87 |    valfilter
 88 |    valmap
 89 | 
 90 | Sandbox
 91 | -------
 92 | 
 93 | .. currentmodule:: toolz.sandbox
 94 | 
 95 | .. autosummary::
 96 |    parallel.fold
 97 |    core.EqualityHashKey
 98 |    core.unzip
 99 | 
100 | 
101 | Definitions
102 | -----------
103 | 
104 | .. automodule:: toolz.itertoolz
105 |    :members:
106 | 
107 | .. automodule:: toolz.recipes
108 |    :members:
109 | 
110 | .. automodule:: toolz.functoolz
111 |    :members:
112 | 
113 | .. automodule:: toolz.dicttoolz
114 |    :members:
115 | 
116 | .. automodule:: toolz.sandbox.core
117 |    :members:
118 | 
119 | .. automodule:: toolz.sandbox.parallel
120 |    :members:
121 | 


--------------------------------------------------------------------------------
/doc/source/composition.rst:
--------------------------------------------------------------------------------
 1 | Composability
 2 | =============
 3 | 
 4 | Toolz functions interoperate because they consume and produce only a small
 5 | set of common, core data structures.  Each ``toolz`` function consumes
 6 | just iterables, dictionaries, and functions and each ``toolz`` function produces
 7 | just iterables, dictionaries, and functions.  This standardized interface
 8 | enables us to compose several general purpose functions to solve custom
 9 | problems.
10 | 
11 | Standard interfaces enable us to use many tools together, even if those tools
12 | were not designed with each other in mind.  We call this "using together"
13 | composition.
14 | 
15 | 
16 | Standard Interface
17 | ------------------
18 | 
19 | This is best explained by two examples; the automobile industry and LEGOs.
20 | 
21 | Autos
22 | ^^^^^
23 | 
24 | Automobile pieces are not widely composable because they do not adhere to a
25 | standard interface.  You can't connect a Porsche engine to the body of a
26 | Volkswagen Beetle but include the safety features of your favorite luxury car.
27 | As a result when something breaks you need to find a specialist who understands
28 | exactly your collection of components and, depending on the popularity of your
29 | model, replacement parts may be difficult to find.  While the customization
30 | provides a number of efficiencies important for automobiles, it limits the
31 | ability of downstream tinkerers.  This ability for future developers to tinker
32 | is paramount in good software design.
33 | 
34 | Lego
35 | ^^^^
36 | 
37 | Contrast this with Lego toys.  With Lego you *can* connect a rocket engine and
38 | skis to a rowboat.  This is a perfectly natural thing to do because every piece
39 | adheres to a simple interface - those simple and regular 5mm circular bumps.
40 | This freedom to connect pieces at will lets children unleash their imagination
41 | in such varied ways (like going arctic shark hunting with a rocket-ski-boat).
42 | 
43 | The abstractions in programming make it far more like Lego than like building
44 | cars.  This breaks down a little when we start to be constrained by performance
45 | or memory issues but this affects only a very small fraction of applications.
46 | Most of the time we have the freedom to operate in the Lego model if we choose
47 | to give up customization and embrace simple core standards.
48 | 
49 | 
50 | Other Standard Interfaces
51 | -------------------------
52 | 
53 | The Toolz project builds off of a standard interface -- this choice is not
54 | unique.  Other standard interfaces exist and provide immeasurable benefit to
55 | their application areas.
56 | 
57 | The NumPy array serves as a foundational object for numeric and scientific
58 | computing within Python.  The ability of any project to consume and produce
59 | NumPy arrays is largely responsible for the broad success of the
60 | various SciPy projects.  We see similar development today with the Pandas
61 | DataFrame.
62 | 
63 | The UNIX toolset relies on files and streams of text.
64 | 
65 | JSON emerged as the standard interface for communication over the web.  The
66 | virtues of standardization become glaringly apparent when we contrast JSON with
67 | its predecessor, XML.  XML was designed to be extensible/customizable, allowing
68 | each application to design its own interface.  This resulted in a sea of
69 | difficult to understand custom data languages that failed to develop a common
70 | analytic and data processing infrastructure.  In contrast JSON is very
71 | restrictive and allows only a fixed set of data structures, namely lists,
72 | dictionaries, numbers, strings.  Fortunately this set is common to most modern
73 | languages and so JSON is extremely widely supported, perhaps falling second
74 | only to CSV.
75 | 
76 | Standard interfaces permeate physical reality as well.  Examples range
77 | from supra-national currencies to drill bits and electrical circuitry.  In all
78 | cases the interoperation that results becomes a defining and invaluable feature
79 | of each solution.
80 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Toolz documentation build configuration file, created by
  4 | # sphinx-quickstart on Sun Sep 22 18:06:00 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | sys.path.insert(0, os.path.abspath('.'))
 21 | 
 22 | # -- General configuration -----------------------------------------------------
 23 | 
 24 | # If your documentation needs a minimal Sphinx version, state it here.
 25 | #needs_sphinx = '1.0'
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.autosummary']
 30 | 
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ['_templates']
 33 | 
 34 | # The suffix of source filenames.
 35 | source_suffix = '.rst'
 36 | 
 37 | # The encoding of source files.
 38 | #source_encoding = 'utf-8-sig'
 39 | 
 40 | # The master toctree document.
 41 | master_doc = 'index'
 42 | 
 43 | # General information about the project.
 44 | project = u'Toolz'
 45 | copyright = u'2013, Matthew Rocklin, John Jacobsen'
 46 | 
 47 | # The version info for the project you're documenting, acts as replacement for
 48 | # |version| and |release|, also used in various other places throughout the
 49 | # built documents.
 50 | #
 51 | # The short X.Y version.
 52 | import toolz
 53 | version = toolz.__version__
 54 | # The full version, including alpha/beta/rc tags.
 55 | release = toolz.__version__
 56 | 
 57 | # The language for content autogenerated by Sphinx. Refer to documentation
 58 | # for a list of supported languages.
 59 | #language = None
 60 | 
 61 | # There are two options for replacing |today|: either, you set today to some
 62 | # non-false value, then it is used:
 63 | #today = ''
 64 | # Else, today_fmt is used as the format for a strftime call.
 65 | #today_fmt = '%B %d, %Y'
 66 | 
 67 | # List of patterns, relative to source directory, that match files and
 68 | # directories to ignore when looking for source files.
 69 | exclude_patterns = []
 70 | 
 71 | # The reST default role (used for this markup: `text`) to use for all documents.
 72 | #default_role = None
 73 | 
 74 | # If true, '()' will be appended to :func: etc. cross-reference text.
 75 | #add_function_parentheses = True
 76 | 
 77 | # If true, the current module name will be prepended to all description
 78 | # unit titles (such as .. function::).
 79 | #add_module_names = True
 80 | 
 81 | # If true, sectionauthor and moduleauthor directives will be shown in the
 82 | # output. They are ignored by default.
 83 | #show_authors = False
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = 'sphinx'
 87 | 
 88 | # A list of ignored prefixes for module index sorting.
 89 | #modindex_common_prefix = []
 90 | 
 91 | 
 92 | # -- Options for HTML output ---------------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 95 | # a list of builtin themes.
 96 | html_theme = 'default'
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | #html_theme_options = {}
102 | 
103 | # Add any paths that contain custom themes here, relative to this directory.
104 | #html_theme_path = []
105 | 
106 | # The name for this set of Sphinx documents.  If None, it defaults to
107 | # "<project> v<release> documentation".
108 | #html_title = None
109 | 
110 | # A shorter title for the navigation bar.  Default is the same as html_title.
111 | #html_short_title = None
112 | 
113 | # The name of an image file (relative to this directory) to place at the top
114 | # of the sidebar.
115 | #html_logo = None
116 | 
117 | # The name of an image file (within the static path) to use as favicon of the
118 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
119 | # pixels large.
120 | #html_favicon = None
121 | 
122 | # Add any paths that contain custom static files (such as style sheets) here,
123 | # relative to this directory. They are copied after the builtin static files,
124 | # so a file named "default.css" will overwrite the builtin "default.css".
125 | html_static_path = ['_static']
126 | 
127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
128 | # using the given strftime format.
129 | #html_last_updated_fmt = '%b %d, %Y'
130 | 
131 | # If true, SmartyPants will be used to convert quotes and dashes to
132 | # typographically correct entities.
133 | #html_use_smartypants = True
134 | 
135 | # Custom sidebar templates, maps document names to template names.
136 | #html_sidebars = {}
137 | 
138 | # Additional templates that should be rendered to pages, maps page names to
139 | # template names.
140 | #html_additional_pages = {}
141 | 
142 | # If false, no module index is generated.
143 | #html_domain_indices = True
144 | 
145 | # If false, no index is generated.
146 | #html_use_index = True
147 | 
148 | # If true, the index is split into individual pages for each letter.
149 | #html_split_index = False
150 | 
151 | # If true, links to the reST sources are added to the pages.
152 | #html_show_sourcelink = True
153 | 
154 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
155 | #html_show_sphinx = True
156 | 
157 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
158 | #html_show_copyright = True
159 | 
160 | # If true, an OpenSearch description file will be output, and all pages will
161 | # contain a <link> tag referring to it.  The value of this option must be the
162 | # base URL from which the finished HTML is served.
163 | #html_use_opensearch = ''
164 | 
165 | # This is the file name suffix for HTML files (e.g. ".xhtml").
166 | #html_file_suffix = None
167 | 
168 | # Output file base name for HTML help builder.
169 | htmlhelp_basename = 'Toolzdoc'
170 | 
171 | 
172 | # -- Options for LaTeX output --------------------------------------------------
173 | 
174 | latex_elements = {
175 | # The paper size ('letterpaper' or 'a4paper').
176 | #'papersize': 'letterpaper',
177 | 
178 | # The font size ('10pt', '11pt' or '12pt').
179 | #'pointsize': '10pt',
180 | 
181 | # Additional stuff for the LaTeX preamble.
182 | #'preamble': '',
183 | }
184 | 
185 | # Grouping the document tree into LaTeX files. List of tuples
186 | # (source start file, target name, title, author, documentclass [howto/manual]).
187 | latex_documents = [
188 |   ('index', 'Toolz.tex', u'Toolz Documentation',
189 |    u'Matthew Rocklin, John Jacobsen', 'manual'),
190 | ]
191 | 
192 | # The name of an image file (relative to this directory) to place at the top of
193 | # the title page.
194 | #latex_logo = None
195 | 
196 | # For "manual" documents, if this is true, then toplevel headings are parts,
197 | # not chapters.
198 | #latex_use_parts = False
199 | 
200 | # If true, show page references after internal links.
201 | #latex_show_pagerefs = False
202 | 
203 | # If true, show URL addresses after external links.
204 | #latex_show_urls = False
205 | 
206 | # Documents to append as an appendix to all manuals.
207 | #latex_appendices = []
208 | 
209 | # If false, no module index is generated.
210 | #latex_domain_indices = True
211 | 
212 | 
213 | # -- Options for manual page output --------------------------------------------
214 | 
215 | # One entry per manual page. List of tuples
216 | # (source start file, name, description, authors, manual section).
217 | man_pages = [
218 |     ('index', 'toolz', u'Toolz Documentation',
219 |      [u'Matthew Rocklin, John Jacobsen'], 1)
220 | ]
221 | 
222 | # If true, show URL addresses after external links.
223 | #man_show_urls = False
224 | 
225 | 
226 | # -- Options for Texinfo output ------------------------------------------------
227 | 
228 | # Grouping the document tree into Texinfo files. List of tuples
229 | # (source start file, target name, title, author,
230 | #  dir menu entry, description, category)
231 | texinfo_documents = [
232 |   ('index', 'Toolz', u'Toolz Documentation',
233 |    u'Matthew Rocklin, John Jacobsen', 'Toolz', 'One line description of project.',
234 |    'Miscellaneous'),
235 | ]
236 | 
237 | # Documents to append as an appendix to all manuals.
238 | #texinfo_appendices = []
239 | 
240 | # If false, no module index is generated.
241 | #texinfo_domain_indices = True
242 | 
243 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
244 | #texinfo_show_urls = 'footnote'
245 | 
246 | 
247 | # -- Options for Epub output ---------------------------------------------------
248 | 
249 | # Bibliographic Dublin Core info.
250 | epub_title = u'Toolz'
251 | epub_author = u'Matthew Rocklin, John Jacobsen'
252 | epub_publisher = u'Matthew Rocklin, John Jacobsen'
253 | epub_copyright = u'2013, Matthew Rocklin, John Jacobsen'
254 | 
255 | # The language of the text. It defaults to the language option
256 | # or en if the language is not set.
257 | #epub_language = ''
258 | 
259 | # The scheme of the identifier. Typical schemes are ISBN or URL.
260 | #epub_scheme = ''
261 | 
262 | # The unique identifier of the text. This can be a ISBN number
263 | # or the project homepage.
264 | #epub_identifier = ''
265 | 
266 | # A unique identification for the text.
267 | #epub_uid = ''
268 | 
269 | # A tuple containing the cover image and cover page html template filenames.
270 | #epub_cover = ()
271 | 
272 | # HTML files that should be inserted before the pages created by sphinx.
273 | # The format is a list of tuples containing the path and title.
274 | #epub_pre_files = []
275 | 
276 | # HTML files shat should be inserted after the pages created by sphinx.
277 | # The format is a list of tuples containing the path and title.
278 | #epub_post_files = []
279 | 
280 | # A list of files that should not be packed into the epub file.
281 | #epub_exclude_files = []
282 | 
283 | # The depth of the table of contents in toc.ncx.
284 | #epub_tocdepth = 3
285 | 
286 | # Allow duplicate toc entries.
287 | #epub_tocdup = True
288 | 


--------------------------------------------------------------------------------
/doc/source/control.rst:
--------------------------------------------------------------------------------
  1 | Control Flow
  2 | ============
  3 | 
  4 | Programming is hard when we think simultaneously about several concepts.  Good
  5 | programming breaks down big problems into small problems and
  6 | builds up small solutions into big solutions.  By this practice the
  7 | need for simultaneous thought is restricted to only a few elements at a time.
  8 | 
  9 | All modern languages provide mechanisms to build data into data structures and
 10 | to build functions out of other functions.  The third element of programming,
 11 | besides data and functions is control flow.  Control flow is the third
 12 | element of programming, after data and functions.  Building complex control
 13 | flow out of simple control flow presents deeper challenges.
 14 | 
 15 | 
 16 | What?
 17 | -----
 18 | 
 19 | Each element in a computer program is either
 20 | 
 21 | -   A variable or value literal like ``x``, ``total``, or ``5``
 22 | -   A function or computation like the ``+`` in ``x + 1``, the function ``fib``
 23 |     in ``fib(3)``, the method ``split`` in ``line.split(',')``, or the ``=`` in
 24 |     ``x = 0``
 25 | -   Control flow like ``if``, ``for``, or ``return``
 26 | 
 27 | Here is a piece of code; see if you can label each term as either
 28 | variable/value, function/computation, or control flow
 29 | 
 30 | .. code::
 31 | 
 32 |     def fib(n):
 33 |         a, b = 0, 1
 34 |         for i in range(n):
 35 |             a, b = b, a + b
 36 |         return b
 37 | 
 38 | Programming is hard when we have to juggle many code elements of each type at
 39 | the same time.  Good programming is about managing these three elements so that
 40 | the developer is only required to think about a handful of them at a time.  For
 41 | example we might collect many integer variables into a list of integers or
 42 | build a big function out of smaller ones.  While we have natural ways to manage
 43 | data and functions, control flow presents more of a challenge.
 44 | 
 45 | We organize our data into **data structures** like lists, dictionaries, or objects
 46 | in order to group related data together -- this allows us to manipulate large
 47 | collections of related data as if we were only manipulating a single entity.
 48 | 
 49 | We **build large functions out of smaller ones**; enabling us to break up a
 50 | complex task like doing laundry into a sequence of simpler tasks.
 51 | 
 52 | .. code::
 53 | 
 54 |     def do_laundry(clothes):
 55 |         wet_clothes = wash(clothes, coins)
 56 |         dry_clothes = dry(wet_clothes, coins)
 57 |         return fold(dry_clothes)
 58 | 
 59 | **Control flow is more challenging**; how do we break down complex control flow
 60 | into simpler pieces that fit in our brain?  How do we encapsulate commonly
 61 | recurring patterns?
 62 | 
 63 | Lets motivate this with an example of a common control structure, applying a
 64 | function to each element in a list.  Imagine we want to download the HTML
 65 | source for a number of webpages.
 66 | 
 67 | .. code::
 68 | 
 69 |     from urllib import urlopen
 70 | 
 71 |     urls = ['http://www.google.com', 'http://www.wikipedia.com', 'http://www.apple.com']
 72 |     html_texts = []
 73 |     for item in urls:
 74 |         html_texts.append(urlopen(item))
 75 |     return html_texts
 76 | 
 77 | Or maybe we want to compute the Fibonacci numbers on a particular set of
 78 | integers
 79 | 
 80 | .. code::
 81 | 
 82 |     integers = [1, 2, 3, 4, 5]
 83 |     fib_integers = []
 84 |     for item in integers:
 85 |         fib_integers.append(fib(item))
 86 |     return fib_integers
 87 | 
 88 | These two unrelated applications share an identical control flow pattern.  They
 89 | apply a function (``urlopen`` or ``fib``) onto each element of an input list
 90 | (``urls``, or ``integers``), appending the result onto an output list.  Because
 91 | this control flow pattern is so common we give it a name, ``map``, and say that
 92 | we map a function (like ``urlopen``) onto a list (like ``urls``).
 93 | 
 94 | Because Python can treat functions like variables we can encode this control
 95 | pattern into a higher-order-function as follows:
 96 | 
 97 | .. code::
 98 | 
 99 |     def map(function, sequence):
100 |         output = []
101 |         for item in sequence:
102 |             output.append(function(item))
103 |         return output
104 | 
105 | This allows us to simplify our code above to the following, pithy solutions
106 | 
107 | .. code::
108 | 
109 |     html_texts = map(urlopen, urls)
110 |     fib_integers = map(fib, integers)
111 | 
112 | Experienced Python programmers know that this control pattern is so popular
113 | that it has been elevated to the status of **syntax** with the popular list
114 | comprehension
115 | 
116 | .. code::
117 | 
118 |     html_texts = [urlopen(url) for url in urls]
119 | 
120 | 
121 | Why?
122 | ----
123 | 
124 | So maybe you already knew about ``map`` and don't use it or maybe you just
125 | prefer list comprehensions.  Why should you keep reading?
126 | 
127 | Managing Complexity
128 | ^^^^^^^^^^^^^^^^^^^
129 | 
130 | The higher order function ``map`` gives us a name to call a particular control
131 | pattern.  Regardless of whether or not you use a for loop, a list
132 | comprehension, or ``map`` itself, it is useful to recognize the operation
133 | and to give it a name.  Naming control patterns lets us tackle
134 | complex problems a larger scale without burdening our mind with rote details.
135 | It is just as important as bundling data into data structures or building
136 | complex functions out of simple ones.
137 | 
138 | *Naming control flow patterns enables programmers to manipulate increasingly
139 | complex operations.*
140 | 
141 | Other Patterns
142 | ^^^^^^^^^^^^^^
143 | 
144 | The function ``map`` has friends.  Advanced programmers may know about
145 | ``map``'s siblings, ``filter`` and ``reduce``.  The ``filter`` control pattern
146 | is also handled by list comprehension syntax and ``reduce`` is often replaced
147 | by straight for loops, so if you don't want to use them there is no immediately
148 | practical reason why you would care.
149 | 
150 | Most programmers however don't know about the many cousins of
151 | ``map``/``filter``/``reduce``.  Consider for example the unsung heroine,
152 | ``groupby``.  A brief example grouping names by their length follows:
153 | 
154 | .. code::
155 | 
156 |     >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
157 |     >>> groupby(len, names)
158 |     {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
159 | 
160 | Groupby collects each element of a list into sublists determined by the value
161 | of a function.  Lets see ``groupby`` in action again, grouping numbers by
162 | evenness.
163 | 
164 | .. code::
165 | 
166 |     >>> def iseven(n):
167 |     ...     return n % 2 == 0
168 | 
169 |     >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7])
170 |     {True: [2, 4, 6], False: [1, 3, 5, 7]}
171 | 
172 | If we were to write this second operation out by hand it might look something
173 | like the following:
174 | 
175 | .. code::
176 | 
177 |     evens = []
178 |     odds = []
179 |     for item in numbers:
180 |         if iseven(item):
181 |             evens.append(item)
182 |         else:
183 |             odds.append(item)
184 | 
185 | Most programmers have written code exactly like this over and over again, just
186 | like they may have repeated the ``map`` control pattern.  When we identify code
187 | as a ``groupby`` operation we mentally collapse the detailed manipulation into
188 | a single concept.
189 | 
190 | The Toolz library contains dozens of patterns like ``map`` and ``groupby``.
191 | Learning a core set (maybe a dozen) covers the vast majority of common
192 | programming tasks often done by hand.
193 | 
194 | *A rich vocabulary of core control functions conveys the following benefits:*
195 | 
196 | -   You identify new patterns
197 | -   You make fewer errors in rote coding
198 | -   You can depend on well tested and benchmarked implementations
199 | 
200 | But this does not come for free.  As in spoken language the use of a rich
201 | vocabulary can alienate new practitioners.  Most functional languages have
202 | fallen into this trap and are seen as unapproachable and smug.  Python
203 | maintains a low-brow reputation and benefits from it.  Just as with spoken
204 | language the value of using just-the-right-word must be moderated with the
205 | comprehension of the intended audience.
206 | 


--------------------------------------------------------------------------------
/doc/source/curry.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Curry
  3 | =====
  4 | 
  5 | Traditionally partial evaluation of functions is handled with the ``partial``
  6 | higher order function from ``functools``.  Currying provides syntactic sugar.
  7 | 
  8 | .. code::
  9 | 
 10 |     >>> double = partial(mul, 2)    # Partial evaluation
 11 |     >>> double = mul(2)             # Currying
 12 | 
 13 | This syntactic sugar is valuable when developers chain several higher order
 14 | functions together.
 15 | 
 16 | Partial Evaluation
 17 | ------------------
 18 | 
 19 | Often when composing smaller functions to form big ones we need partial
 20 | evaluation.  We do this in the word counting example:
 21 | 
 22 | .. code::
 23 | 
 24 |     >>> def stem(word):
 25 |     ...     """ Stem word to primitive form """
 26 |     ...     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
 27 | 
 28 |     >>> wordcount = compose(frequencies, partial(map, stem), str.split)
 29 | 
 30 | Here we want to map the ``stem`` function onto each of the words produced by
 31 | ``str.split``.  We want a ``stem_many`` function that takes a list of words,
 32 | stems them, and returns a list back.  In full form this would look like the
 33 | following:
 34 | 
 35 | .. code::
 36 | 
 37 |     >>> def stem_many(words):
 38 |     ...     return map(stem, words)
 39 | 
 40 | The ``partial`` function lets us create this function more naturally.
 41 | 
 42 | .. code::
 43 | 
 44 |     >>> stem_many = partial(map, stem)
 45 | 
 46 | In general
 47 | 
 48 | .. code::
 49 | 
 50 |     >>> def f(x, y, z):
 51 |     ...     # Do stuff with x, y, and z
 52 | 
 53 |     >>> # partially evaluate f with known values a and b
 54 |     >>> def g(z):
 55 |     ...     return f(a, b, z)
 56 | 
 57 |     >>> # partially evaluate f with known values a and b
 58 |     >>> g = partial(f, a, b)
 59 | 
 60 | Curry
 61 | -----
 62 | 
 63 | In this context currying is just syntactic sugar for partial evaluation.  A
 64 | curried function partially evaluates if it does not receive enough arguments to
 65 | compute a result.
 66 | 
 67 | .. code::
 68 | 
 69 |     >>> from toolz import curry
 70 | 
 71 |     >>> @curry              # We can use curry as a decorator
 72 |     ... def mul(x, y):
 73 |     ...     return x * y
 74 | 
 75 |     >>> double = mul(2)     # mul didn't receive enough arguments to evaluate
 76 |     ...                     # so it holds onto the 2 and waits, returning a
 77 |     ...                     # partially evaluated function, double
 78 | 
 79 |     >>> double(5)
 80 |     10
 81 | 
 82 | So if ``map`` was curried...
 83 | 
 84 | .. code::
 85 | 
 86 |     >>> map = curry(map)
 87 | 
 88 | Then we could replace the ``partial`` with a function evaluation
 89 | 
 90 | .. code::
 91 | 
 92 |     >>> # wordcount = compose(frequencies, partial(map, stem), str.split)
 93 |     >>> wordcount = compose(frequencies, map(stem), str.split)
 94 | 
 95 | In this particular example it's probably simpler to stick with ``partial``.
 96 | Once ``partial`` starts occurring several times in your code it may be time to
 97 | switch to the ``curried`` namespace.
 98 | 
 99 | The Curried Namespace
100 | ---------------------
101 | 
102 | All functions present in the ``toolz`` namespace are curried in the
103 | ``toolz.curried`` namespace.
104 | 
105 | So you can exchange an import line like the following
106 | 
107 | .. code::
108 | 
109 |     >>> from toolz import *
110 | 
111 | For the following
112 | 
113 | .. code::
114 | 
115 |     >>> from toolz.curried import *
116 | 
117 | And all of your favorite ``toolz`` functions will curry automatically.  We've
118 | also included curried versions of the standard Python higher order functions
119 | like ``map``, ``filter``, ``reduce`` so you'll get them too (whether you like
120 | it or not.)
121 | 


--------------------------------------------------------------------------------
/doc/source/heritage.rst:
--------------------------------------------------------------------------------
 1 | Heritage
 2 | ========
 3 | 
 4 | While Python was originally intended as an imperative language
 5 | [Guido_], it contains all elements necessary to support a rich set of features
 6 | from the functional paradigm.  In particular its core data structures, lazy
 7 | iterators, and functions as first class objects can be combined to implement a
 8 | common standard library of functions shared among many functional languages.
 9 | 
10 | This was first recognized and supported through the standard libraries
11 | itertools_ and functools_ which contain functions like ``permutations``,
12 | ``chain`` and ``partial`` to complement the standard ``map``, ``filter``,
13 | ``reduce`` already found in the core language.  While these libraries contain
14 | substantial functionality they do not achieve the same level of adoption found
15 | in similar projects in other languages.  This may be because they are
16 | incomplete and lack a number of commonly related functions like ``compose`` and
17 | ``groupby`` which often complement these core operations.
18 | 
19 | A completion of this set of functions was first attempted in the projects
20 | itertoolz_ and functoolz_ (note the z).  These libraries contained
21 | several functions that were absent in the standard itertools_/functools_
22 | libraries.  The ``itertoolz``/``functoolz`` libraries were eventually merged
23 | into the monolithic ``toolz`` project described here.
24 | 
25 | Most contemporary functional languages (Haskell, Scala, Clojure, ...) contain
26 | some variation of the functions found in ``toolz``.  The ``toolz`` project
27 | generally adheres closely to the API found in the Clojure standard library (see
28 | cheatsheet_) and where disagreements occur that API usually dominates.  The
29 | ``toolz`` API is also strongly affected by the principles of the Python
30 | language itself, and often makes deviations in order to be more approachable to
31 | that community.
32 | 
33 | The development of a functional standard library within a popular imperative
34 | language is not unique.  Similar projects have arisen in other
35 | imperative-by-design languages that contain the necessary elements to support a
36 | functional standard library.  Underscore.js_ in JavaScript has attained
37 | notable popularity in the web community.  ``LINQ`` in C# follows a similar
38 | philosophy but mimics declarative database languages rather than functional
39 | ones.  Enumerable_ is is the closest project in Ruby.  Other excellent projects
40 | also exist within the Python ecosystem, most notably Fn.py_ and Funcy_.
41 | 
42 | .. [itertools] http://docs.python.org/2/library/itertools.html
43 | .. [functools] http://docs.python.org/2/library/functools.html
44 | .. [itertoolz] http://github.com/pytoolz/itertoolz
45 | .. [functoolz] http://github.com/pytoolz/functoolz
46 | .. [Underscore.js] http://underscorejs.org
47 | .. [cheatsheet] http://clojure.org/cheatsheet
48 | .. [Guido] http://python-history.blogspot.com/2009/04/origins-of-pythons-functional-features.html
49 | .. [Enumerable] http://ruby-doc.org/core-2.0.0/Enumerable.html
50 | .. [funcy] https://github.com/suor/funcy/
51 | .. [fn.py] https://github.com/kachayev/fn.py
52 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | PyToolz API Documentation
 3 | =========================
 4 | 
 5 | Toolz provides a set of utility functions for iterators, functions,
 6 | and dictionaries.  These functions interoperate well and form
 7 | the building blocks of common data analytic operations.  They extend the
 8 | standard libraries `itertools` and `functools` and borrow heavily from the
 9 | standard libraries of contemporary functional languages.
10 | 
11 | Toolz provides a suite of functions which have the following functional virtues:
12 | 
13 | -   **Composable:** They interoperate due to their use of core data structures.
14 | -   **Pure:**  They don't change their inputs or rely on external state.
15 | -   **Lazy:**  They don't run until absolutely necessary, allowing them to support large streaming data sets.
16 | 
17 | Toolz functions are *pragmatic*.  They understand that most programmers
18 | have deadlines.
19 | 
20 | -   **Low Tech:** They're just functions, no syntax or magic tricks to learn
21 | -   **Tuned:** They're profiled and optimized
22 | -   **Serializable:** They support common solutions for parallel computing
23 | 
24 | This gives developers the power to write *powerful* programs to solve *complex
25 | problems* with relatively *simple code*.  This code can be *easy to understand*
26 | without sacrificing *performance*.  Toolz enables this approach, commonly
27 | associated with functional programming, within a natural Pythonic style
28 | suitable for most developers.
29 | 
30 | BSD licensed source code is available at http://github.com/pytoolz/toolz/ .
31 | 
32 | 
33 | Contents
34 | ^^^^^^^^
35 | 
36 | .. toctree::
37 |    :maxdepth: 2
38 | 
39 |    heritage.rst
40 |    install.rst
41 |    composition.rst
42 |    purity.rst
43 |    laziness.rst
44 |    control.rst
45 |    curry.rst
46 |    streaming-analytics.rst
47 |    parallelism.rst
48 |    api.rst
49 |    tips-and-tricks.rst
50 |    references.rst
51 | 


--------------------------------------------------------------------------------
/doc/source/install.rst:
--------------------------------------------------------------------------------
 1 | Installation and Dependencies
 2 | =============================
 3 | 
 4 | Toolz is pure Python and so is easily installable by the standard
 5 | dependency manager ``pip``::
 6 | 
 7 |     pip install toolz
 8 | 
 9 | Toolz endeavors to be a very light dependency.  It accomplishes this in
10 | three ways:
11 | 
12 | 1.  Toolz is pure Python
13 | 2.  Toolz relies only on the standard library
14 | 3.  Toolz simultaneously supports Python versions 2.6, 2.7, 3.2, 3.3
15 | 


--------------------------------------------------------------------------------
/doc/source/laziness.rst:
--------------------------------------------------------------------------------
  1 | Laziness
  2 | ========
  3 | 
  4 | Lazy iterators evaluate only when necessary.  They allow us to semantically
  5 | manipulate large amounts of data while keeping very little of it actually in
  6 | memory.  They act like lists but don't take up space.
  7 | 
  8 | 
  9 | Example - A Tale of Two Cities
 10 | ------------------------------
 11 | 
 12 | We open a file containing the text of the classic text "A Tale of Two Cities"
 13 | by Charles Dickens[1_].
 14 | 
 15 | .. code::
 16 | 
 17 |     >>> book = open('tale-of-two-cities.txt')
 18 | 
 19 | Much like a secondary school student, Python owns and opens the book without
 20 | reading a single line of the text.  The object ``book`` is a lazy iterator!
 21 | Python will give us a line of the text only when we explicitly ask it to do so
 22 | 
 23 | .. code::
 24 | 
 25 |     >>> next(book)
 26 |     "It was the best of times,"
 27 | 
 28 |     >>> next(book)
 29 |     "it was the worst of times,"
 30 | 
 31 | and so on.  Each time we call ``next`` on ``book`` we burn through another line
 32 | of the text and the ``book`` iterator marches slowly onwards through the text.
 33 | 
 34 | 
 35 | Computation
 36 | -----------
 37 | 
 38 | We can lazily operate on lazy iterators without doing any actual computation.
 39 | For example lets read the book in upper case
 40 | 
 41 | .. code::
 42 | 
 43 |     >>> from toolz import map  # toolz' map is lazy by default
 44 | 
 45 |     >>> loud_book = map(str.upper, book)
 46 | 
 47 |     >>> next(loud_book)
 48 |     "IT WAS THE AGE OF WISDOM,"
 49 |     >>> next(loud_book)
 50 |     "IT WAS THE AGE OF FOOLISHNESS,"
 51 | 
 52 | It is as if we applied the function ``str.upper`` onto every line of the book;
 53 | yet the first line completes instantaneously.  Instead Python does the
 54 | uppercasing work only when it becomes necessary, i.e.  when you call ``next``
 55 | to ask for another line.
 56 | 
 57 | 
 58 | Reductions
 59 | ----------
 60 | 
 61 | You can operate on lazy iterators just as you would with lists, tuples, or
 62 | sets.  You can use them in for loops as in
 63 | 
 64 | 
 65 | .. code::
 66 | 
 67 |     for line in loud_book:
 68 |         ...
 69 | 
 70 | You can instantiate them all into memory by calling them with the constructors
 71 | ``list``, or ``tuple``.
 72 | 
 73 | .. code::
 74 | 
 75 |     loud_book = list(loud_book)
 76 | 
 77 | Of course if they are very large then this might be unwise.  Often we use
 78 | laziness to avoid loading large datasets into memory at once.  Many
 79 | computations on large datasets don't require access to all of the data at a
 80 | single time.  In particular *reductions* (like sum) often take large amounts of
 81 | sequential data (like [1, 2, 3, 4]) and produce much more manageable results
 82 | (like 10) and can do so just by viewing the data a little bit at a time.  For
 83 | example we can count all of the letters in the Tale of Two Cities trivially
 84 | using functions from ``toolz``
 85 | 
 86 | .. code::
 87 | 
 88 |     >>> from toolz import concat, frequencies
 89 |     >>> letters = frequencies(concat(loud_book))
 90 |     { 'A': 48036,
 91 |       'B': 8402,
 92 |       'C': 13812,
 93 |       'D': 28000,
 94 |       'E': 74624,
 95 |       ...
 96 | 
 97 | In this case ``frequencies`` is a sort of reduction.  At no time were more than
 98 | a few hundred bytes of Tale of Two Cities necessarily in memory.  We could just
 99 | have easily done this computation on the entire Gutenberg collection or on
100 | Wikipedia.  In this case we are limited by the size and speed of our hard drive
101 | and not by the capacity of our memory.
102 | 
103 | .. [1] http://www.gutenberg.org/cache/epub/98/pg98.txt
104 | 


--------------------------------------------------------------------------------
/doc/source/parallelism.rst:
--------------------------------------------------------------------------------
 1 | Parallelism
 2 | ===========
 3 | 
 4 | PyToolz tries to support other parallel processing libraries.  It does this
 5 | by ensuring easy serialization of ``toolz`` functions and providing
 6 | architecture-agnostic parallel algorithms.
 7 | 
 8 | In practice ``toolz`` is developed against ``multiprocessing`` and
 9 | ``IPython.parallel``.
10 | 
11 | 
12 | Serialization
13 | -------------
14 | 
15 | Multiprocessing or distributed computing requires the transmission of functions
16 | between different processes or computers.  This is done through serializing the
17 | function into text, sending that text over a wire, and deserializing the text
18 | back into a function.  To the extent possible PyToolz functions are compatible
19 | with the standard serialization library ``pickle``.
20 | 
21 | The ``pickle`` library often fails for complex functions including lambdas,
22 | closures, and class methods.  When this occurs we recommend the alternative
23 | serialization library ``dill``.
24 | 
25 | 
26 | Example with parallel map
27 | -------------------------
28 | 
29 | Most parallel processing tasks may be significantly accelerated using only a
30 | parallel map operation.  A number of high quality parallel map operations exist
31 | in other libraries, notably ``multiprocessing``, ``IPython.parallel``, and
32 | ``threading`` (if your operation is not processor bound).
33 | 
34 | In the example below we extend our wordcounting solution with a parallel map.
35 | We show how one can progress in development from sequential, to
36 | multiprocessing, to distributed computation all with the same domain code.
37 | 
38 | 
39 | .. code::
40 | 
41 |     from toolz.curried import map
42 |     from toolz import frequencies, compose, concat, merge_with
43 | 
44 |     def stem(word):
45 |         """ Stem word to primitive form
46 | 
47 |         >>> stem("Hello!")
48 |         'hello'
49 |         """
50 |         return
51 |         word.lower().rstrip(",.!)-*_?:;$'-\"").lstrip("-*'\"(_$'")
52 | 
53 | 
54 |     wordcount = compose(frequencies, map(stem), concat, map(str.split), open)
55 | 
56 |     if __name__ == '__main__':
57 |         # Filenames for thousands of books from which we'd like to count words
58 |         filenames = ['Book_%d.txt'%i for i in range(10000)]
59 | 
60 |         # Start with sequential map for development
61 |         # pmap = map
62 | 
63 |         # Advance to Multiprocessing map for heavy computation on single machine
64 |         # from multiprocessing import Pool
65 |         # p = Pool(8)
66 |         # pmap = p.map
67 | 
68 |         # Finish with distributed parallel map for big data
69 |         from IPython.parallel import Client
70 |         p = Client()[:]
71 |         pmap = p.map_sync
72 | 
73 |         total = merge_with(sum, pmap(wordcount, filenames))
74 | 
75 | This smooth transition is possible because
76 | 
77 | 1.  The ``map`` abstraction is a simple function call and so can be replaced.
78 |     This transformation would be difficult if we had written our code with a
79 |     for loop or list comprehension
80 | 2.  The operation ``wordcount`` is separate from the parallel solution.
81 | 3.  The task is embarrassingly parallel, needing only a very simple parallel
82 |     strategy.  Fortunately this is the common case.
83 | 
84 | 
85 | Parallel Algorithms
86 | -------------------
87 | 
88 | PyToolz does not implement parallel processing systems.  It does however
89 | provide parallel algorithms that can extend existing parallel systems.  Our
90 | general solution is to build algorithms that operate around a user-supplied
91 | parallel map function.
92 | 
93 | In particular we provide a parallel ``fold`` in ``toolz.sandbox.parallel.fold``.
94 | This fold can work equally well with ``multiprocessing.Pool.map``
95 | ``threading.Pool.map`` or ``IPython.parallel``'s ``map_async``.
96 | 


--------------------------------------------------------------------------------
/doc/source/purity.rst:
--------------------------------------------------------------------------------
 1 | Function Purity
 2 | ===============
 3 | 
 4 | We call a function *pure* if it meets the following criteria
 5 | 
 6 | 1.  It does not depend on hidden state, or equivalently it only depends on its
 7 |     inputs.
 8 | 2.  Evaluation of the function does not cause side effects
 9 | 
10 | In short the internal work of a pure function is isolated from the rest of the
11 | program.
12 | 
13 | Examples
14 | --------
15 | 
16 | This is made clear by two examples:
17 | 
18 | .. code::
19 | 
20 |     # A pure function
21 |     def min(x, y):
22 |         if x < y:
23 |             return x
24 |         else:
25 |             return y
26 | 
27 | 
28 |     # An impure function
29 |     exponent = 2
30 | 
31 |     def powers(L):
32 |         for i in range(len(L)):
33 |             L[i] = L[i]**exponent
34 |         return L
35 | 
36 | The function ``min`` is pure.  It always produces the same result given the
37 | same inputs and it doesn't affect any external variable.
38 | 
39 | The function ``powers`` is impure for two reasons.  First, it depends on a
40 | global variable, ``exponent``.  Second, it changes the input ``L`` which may
41 | have external state.  Consider the following execution:
42 | 
43 | .. code::
44 | 
45 |     >>> data = [1, 2, 3]
46 |     >>> result = powers(data)
47 | 
48 |     >>> print result
49 |     [1, 4, 9]
50 |     >>> print data
51 |     [1, 4, 9]
52 | 
53 | We see that ``powers`` affected the variable ``data``.  Users of our function
54 | might be surprised by this.  Usually we expect our inputs to be unchanged.
55 | 
56 | Another problem occurs when we run this code in a different context:
57 | 
58 | .. code::
59 | 
60 |     >>> data = [1, 2, 3]
61 |     >>> result = powers(data)
62 |     >>> print result
63 |     [1, 8, 27]
64 | 
65 | When we give ``powers`` the same inputs we receive different outputs; how could
66 | this be?  Someone must have changed the value of ``exponent`` to be ``3``,
67 | producing cubes rather than squares.  At first this flexibility may seem like a
68 | feature and indeed in many cases it may be.  The cost for this flexibility is
69 | that we need to keep track of the ``exponent`` variable separately whenever we
70 | use ``powers``.  As we use more functions these extra variables become a
71 | burden.
72 | 
73 | 
74 | State
75 | -----
76 | 
77 | Impure functions are often more efficient but also require that the programmer
78 | "keep track" of the state of several variables.  Keeping track of this state
79 | becomes increasingly difficult as programs grow in size.  By eschewing state
80 | programmers are able to conceptually scale out to solve much larger problems.
81 | The loss of performance is often negligible compared to the freedom to trust
82 | that your functions work as expected on your inputs.
83 | 
84 | Maintaining state provides efficiency at the cost of surprises.  Pure
85 | functions produce no surprises and so lighten the mental load of the
86 | programmer.
87 | 
88 | 
89 | Testing
90 | -------
91 | 
92 | As an added bonus, testing pure functions is substantially simpler than testing
93 | impure ones.  A programmer who has tried to test functions that include
94 | randomness will know this first-hand.
95 | 


--------------------------------------------------------------------------------
/doc/source/references.rst:
--------------------------------------------------------------------------------
 1 | References
 2 | ==========
 3 | 
 4 | -  `Underscore.js <http://underscorejs.org>`__: A similar library for
 5 |    JavaScript
 6 | -  `Enumerable <http://ruby-doc.org/core-2.0.0/Enumerable.html>`__: A
 7 |    similar library for Ruby
 8 | -  `Clojure <http://clojure.org>`__: A functional language whose
 9 |    standard library has several counterparts in ``toolz``
10 | -  `itertools <http://docs.python.org/2/library/itertools.html>`__: The
11 |    Python standard library for iterator tools
12 | -  `functools <http://docs.python.org/2/library/functools.html>`__: The
13 |    Python standard library for function tools
14 | -  `Functional Programming HOWTO <http://docs.python.org/dev/howto/functional.html>`__:
15 |    The description of functional programming features from the official
16 |    Python docs.
17 | 
18 | Contemporary Projects
19 | ---------------------
20 | 
21 | These projects also provide iterator and functional utilities within
22 | Python. Their functionality overlaps substantially with that of PyToolz.
23 | 
24 | -  `funcy <https://github.com/suor/funcy/>`__
25 | -  `fn.py <https://github.com/kachayev/fn.py>`__
26 | -  `more\_itertools <https://github.com/erikrose/more-itertools>`__
27 | 


--------------------------------------------------------------------------------
/doc/source/streaming-analytics.rst:
--------------------------------------------------------------------------------
  1 | Streaming Analytics
  2 | ===================
  3 | 
  4 | The toolz functions can be composed to analyze large streaming datasets.
  5 | Toolz supports common analytics patterns like the selection, grouping,
  6 | reduction, and joining of data through pure composable functions.  These
  7 | functions often have analogs to familiar operations in other data analytics
  8 | platforms like SQL or Pandas.
  9 | 
 10 | Throughout this document we'll use this simple dataset of accounts
 11 | 
 12 | .. code::
 13 | 
 14 |    >>> accounts = [(1, 'Alice', 100, 'F'),  # id, name, balance, gender
 15 |    ...             (2, 'Bob', 200, 'M'),
 16 |    ...             (3, 'Charlie', 150, 'M'),
 17 |    ...             (4, 'Dennis', 50, 'M'),
 18 |    ...             (5, 'Edith', 300, 'F')]
 19 | 
 20 | Selecting with ``map`` and ``filter``
 21 | -------------------------------------
 22 | 
 23 | Simple projection and linear selection from a sequence is achieved through the
 24 | standard functions ``map`` and ``filter``.
 25 | 
 26 | .. code::
 27 | 
 28 |    SELECT name, balance
 29 |    FROM accounts
 30 |    WHERE balance > 150;
 31 | 
 32 | These functions correspond to the SQL commands ``SELECT`` and ``WHERE``.
 33 | 
 34 | .. code::
 35 | 
 36 |    >>> from toolz.curried import pipe, map, filter, get
 37 |    >>> pipe(accounts, filter(lambda (id, name, balance, gender): balance > 150),
 38 |    ...                map(get([1, 2])),
 39 |    ...                list)
 40 | 
 41 | *note: this uses the curried_ versions of ``map`` and ``filter``.*
 42 | 
 43 | Of course, these operations are also well supported with standard
 44 | list/generator comprehension syntax.  This syntax is more often used and
 45 | generally considered to be more Pythonic.
 46 | 
 47 | .. code::
 48 | 
 49 |    >>> [(name, balance) for (id, name, balance, gender) in accounts
 50 |    ...                  if balance > 150]
 51 | 
 52 | 
 53 | Split-apply-combine with ``groupby`` and ``reduceby``
 54 | -----------------------------------------------------
 55 | 
 56 | We separate split-apply-combine operations into the following two concepts
 57 | 
 58 | 1.  Split the dataset into groups by some property
 59 | 2.  Reduce each of the groups with some synopsis function
 60 | 
 61 | Toolz supports this common workflow with
 62 | 
 63 | 1.  a simple in-memory solution
 64 | 2.  a more sophisticated streaming solution.
 65 | 
 66 | 
 67 | In Memory Split-Apply-Combine
 68 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 69 | 
 70 | The in-memory solution depends on the functions `groupby`_ to split, and
 71 | `valmap`_ to apply/combine.
 72 | 
 73 | .. code::
 74 | 
 75 |    SELECT gender, SUM(balance)
 76 |    FROM accounts
 77 |    GROUP BY gender;
 78 | 
 79 | We first show these two functions piece by piece to show the intermediate
 80 | groups.
 81 | 
 82 | .. code::
 83 | 
 84 |    >>> from toolz import groupby, valmap, compose
 85 |    >>> from toolz.curried import get, pluck
 86 | 
 87 |    >>> groupby(get(3), accounts)
 88 |    {'F': [(1, 'Alice', 100, 'F'), (5, 'Edith', 300, 'F')],
 89 |     'M': [(2, 'Bob', 200, 'M'), (3, 'Charlie', 150, 'M'), (4, 'Dennis', 50, 'M')]}
 90 | 
 91 |    >>> valmap(compose(sum, pluck(2)),
 92 |    ...        _)
 93 |    {'F': 400, 'M': 400}
 94 | 
 95 | 
 96 | Then we chain them together into a single computation
 97 | 
 98 | .. code::
 99 | 
100 |    >>> pipe(accounts, groupby(get(3)),
101 |    ...                valmap(compose(sum, pluck(2))))
102 |    {'F': 400, 'M': 400}
103 | 
104 | 
105 | Streaming Split-Apply-Combine
106 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
107 | 
108 | The ``groupby`` function collects the entire dataset in memory into a
109 | dictionary.  While convenient, the ``groupby`` operation is *not streaming* and
110 | so this approach is limited to datasets that can fit comfortably into memory.
111 | 
112 | Toolz achieves streaming split-apply-combine with `reduceby`_, a function that
113 | performs a simultaneous reduction on each group as the elements stream in.  To
114 | understand this section you should first be familiar with the builtin function
115 | ``reduce``.
116 | 
117 | The ``reduceby`` operation takes a key function, like ``get(3)`` or ``lambda x:
118 | x[3]``, and a binary operator like ``add`` or ``lesser = lambda acc, x: acc if
119 | acc < x else x``.  It successively applies the key function to each item in
120 | succession, accumulating running totals for each key by combining each new
121 | value with the previous using the binary operator.  It can't accept full
122 | reduction operations like ``sum`` or ``min`` as these require access to the
123 | entire group at once.  Here is a simple example:
124 | 
125 | .. code::
126 | 
127 |    >>> from toolz import reduceby
128 | 
129 |    >>> def iseven(n):
130 |    ...     return n % 2 == 0
131 | 
132 |    >>> def add(x, y):
133 |    ...     return x + y
134 | 
135 |    >>> reduceby(iseven, add, [1, 2, 3, 4])
136 |    {True: 6, False: 4}
137 | 
138 | The even numbers are added together ``(2 + 4 = 6)`` into group ``True``, and
139 | the odd numbers are added together ``(1 + 3 = 4)`` into group ``False``.
140 | 
141 | 
142 | Note that we have to replace the reduction ``sum`` with the binary operator
143 | ``add``.  The incremental nature of ``add`` allows us to do the summation work as
144 | new data comes in.  The use of binary operators like ``add`` over full reductions
145 | like ``sum`` enables computation on very large streaming datasets.
146 | 
147 | The challenge to using ``reduceby`` often lies in the construction of a
148 | suitable binary operator. Here is the solution for our accounts example
149 | that adds up the balances for each group:
150 | 
151 | .. code::
152 | 
153 |    >>> binop = lambda total, (id, name, bal, gend): total + bal
154 | 
155 |    >>> reduceby(get(3), binop, accounts)
156 |    {'F': 400, 'M': 400}
157 | 
158 | 
159 | This construction supports datasets that are much larger than available memory.
160 | Only the output must be able to fit comfortably in memory and this is rarely an
161 | issue, even for very large split-apply-combine computations.
162 | 
163 | 
164 | Semi-Streaming ``join``
165 | -----------------------
166 | 
167 | We register multiple datasets together with `join`_.  Consider a second
168 | dataset storing addresses by ID
169 | 
170 | .. code::
171 | 
172 |    >>> addresses = [(1, '123 Main Street'),  # id, address
173 |    ...              (2, '5 Adams Way'),
174 |    ...              (5, '34 Rue St Michel')]
175 | 
176 | We can join this dataset against our accounts dataset by specifying attributes
177 | which register different elements with each other; in this case they share a
178 | common first column, id.
179 | 
180 | .. code::
181 | 
182 |    SELECT accounts.name, addresses.address
183 |    FROM accounts, addresses
184 |    WHERE accounts.id = addresses.id;
185 | 
186 | 
187 | .. code::
188 | 
189 |    >>> from toolz import join, first, second
190 | 
191 |    >>> result = join(first, accounts,
192 |    ...               first, addresses)
193 | 
194 |    >>> for ((id, name, bal, gender), (id, address)) in result:
195 |    ...     print((name, address))
196 |    ('Alice', '123 Main Street')
197 |    ('Bob', '5 Adams Way')
198 |    ('Edith', '34 Rue St Michel')
199 | 
200 | Join takes four main arguments, a left and right key function and a left
201 | and right sequence. It returns a sequence of pairs of matching items. In our
202 | case the return value of ``join`` is a sequence of pairs of tuples such that the
203 | first element of each tuple (the ID) is the same.  In the example above we
204 | unpack this pair of tuples to get the fields that we want (``name`` and
205 | ``address``) from the result.
206 | 
207 | 
208 | Join on arbitrary functions / data
209 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
210 | 
211 | Those familiar with SQL are accustomed to this kind of join on columns.
212 | However a functional join is more general than this; it doesn't need to operate
213 | on tuples, and key functions do not need to get particular columns.  In the
214 | example below we match numbers from two collections so that exactly one is even
215 | and one is odd.
216 | 
217 | .. code::
218 | 
219 |    >>> def iseven(x):
220 |    ...     return x % 2 == 0
221 |    >>> def isodd(x):
222 |    ...     return x % 2 == 1
223 | 
224 |    >>> list(join(iseven, [1, 2, 3, 4],
225 |    ...           isodd, [7, 8, 9]))
226 |    [(2, 7), (4, 7), (1, 8), (3, 8), (2, 9), (4, 9)]
227 | 
228 | 
229 | Semi-Streaming Join
230 | ^^^^^^^^^^^^^^^^^^^
231 | 
232 | The Toolz Join operation fully evaluates the *left* sequence and streams the
233 | *right* sequence through memory.  Thus, if streaming support is desired the
234 | larger of the two sequences should always occupy the right side of the join.
235 | 
236 | 
237 | Algorithmic Details
238 | ^^^^^^^^^^^^^^^^^^^
239 | 
240 | The semi-streaming join operation in ``toolz`` is asymptotically optimal.
241 | Computationally it is linear in the size of the input + output.  In terms of
242 | storage the left sequence must fit in memory but the right sequence is free to
243 | stream.
244 | 
245 | The results are not normalized, as in SQL, in that they permit repeated values.  If
246 | normalization is desired, consider composing with the function ``unique`` (note
247 | that ``unique`` is not fully streaming.)
248 | 
249 | 
250 | More Complex Example
251 | ^^^^^^^^^^^^^^^^^^^^
252 | 
253 | The accounts example above connects two one-to-one relationships, ``accounts``
254 | and ``addresses``; there was exactly one name per ID and one address per ID.
255 | This need not be the case.  The join abstraction is sufficiently flexible to
256 | join one-to-many or even many-to-many relationships.  The following example
257 | finds city/person pairs where that person has a friend who has a residence in
258 | that city.  This is an example of joining two many-to-many relationships,
259 | because a person may have many friends and because a friend may have many
260 | residences.
261 | 
262 | 
263 | .. code::
264 | 
265 |    >>> friends = [('Alice', 'Edith'),
266 |    ...            ('Alice', 'Zhao'),
267 |    ...            ('Edith', 'Alice'),
268 |    ...            ('Zhao', 'Alice'),
269 |    ...            ('Zhao', 'Edith')]
270 | 
271 |    >>> cities = [('Alice', 'NYC'),
272 |    ...           ('Alice', 'Chicago'),
273 |    ...           ('Dan', 'Syndey'),
274 |    ...           ('Edith', 'Paris'),
275 |    ...           ('Edith', 'Berlin'),
276 |    ...           ('Zhao', 'Shanghai')]
277 | 
278 |    >>> # Vacation opportunities
279 |    >>> # In what cities do people have friends?
280 |    >>> result = join(second, friends,
281 |    ...               first, cities)
282 |    >>> for ((name, friend), (friend, city)) in sorted(unique(result)):
283 |    ...     print((name, city))
284 |    ('Alice', 'Berlin')
285 |    ('Alice', 'Paris')
286 |    ('Alice', 'Shanghai')
287 |    ('Edith', 'Chicago')
288 |    ('Edith', 'NYC')
289 |    ('Zhao', 'Chicago')
290 |    ('Zhao', 'NYC')
291 |    ('Zhao', 'Berlin')
292 |    ('Zhao', 'Paris')
293 | 
294 | Join is computationally powerful:
295 | 
296 | *   It is expressive enough to cover a wide set of analytics operations
297 | *   It runs in linear time relative to the size of the input and output
298 | *   Only the left sequence must fit in memory
299 | 
300 | 
301 | Disclaimer
302 | ----------
303 | 
304 | Toolz is a general purpose functional standard library, not a library
305 | specifically for data analytics.  While there are obvious benefits (streaming,
306 | composition, ...) users interested in data analytics might be better served by
307 | using projects specific to data analytics like Pandas_ or SQLAlchemy.
308 | 
309 | 
310 | .. _groupby: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.groupby
311 | .. _join: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.join
312 | .. _reduceby: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.reduceby
313 | .. _valmap: http://toolz.readthedocs.org/en/latest/api.html#toolz.itertoolz.valmap
314 | .. _Pandas: http://pandas.pydata.org/pandas-docs/stable/groupby.html
315 | .. _curried: http://toolz.readthedocs.org/en/latest/curry.html
316 | 


--------------------------------------------------------------------------------
/doc/source/tips-and-tricks.rst:
--------------------------------------------------------------------------------
  1 | Tips and Tricks
  2 | ===============
  3 | 
  4 | Toolz functions can be combined to make functions that, while common, aren't
  5 | a part of toolz's standard library. This section presents
  6 | a few of these recipes.
  7 | 
  8 | 
  9 | * .. function:: pick(whitelist, dictionary)
 10 | 
 11 |   Return a subset of the provided dictionary with keys contained in the
 12 |   whitelist.
 13 | 
 14 |   ::
 15 | 
 16 |     from toolz import keyfilter
 17 | 
 18 |     def pick(whitelist, d):
 19 |         return keyfilter(lambda k: k in whitelist, d)
 20 | 
 21 | 
 22 |   Example:
 23 | 
 24 |     >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
 25 |     >>> pick(['a', 'b'], alphabet)
 26 |     {'a': 1, 'b': 2}
 27 | 
 28 | 
 29 | * .. function:: omit(blacklist, dictionary)
 30 | 
 31 |   Return a subset of the provided dictionary with keys *not* contained in the
 32 |   blacklist.
 33 | 
 34 |   ::
 35 | 
 36 |     from toolz import keyfilter
 37 | 
 38 |     def omit(blacklist, d):
 39 |         return keyfilter(lambda k: k not in blacklist, d)
 40 | 
 41 | 
 42 |   Example:
 43 | 
 44 |     >>> alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4}
 45 |     >>> omit(['a', 'b'], alphabet)
 46 |     {'c': 3, 'd': 4}
 47 | 
 48 | 
 49 | * .. function:: compact(iterable)
 50 | 
 51 |   Filter an iterable on "truthy" values.
 52 | 
 53 |   ::
 54 | 
 55 |     from toolz import filter
 56 | 
 57 |     def compact(iter):
 58 |         return filter(None, iter)
 59 | 
 60 | 
 61 |   Example:
 62 | 
 63 |     >>> results = [0, 1, 2, None, 3, False]
 64 |     >>> list(compact(results))
 65 |     [1, 2, 3]
 66 | 
 67 | * .. function:: keyjoin(leftkey, leftseq, rightkey, rightseq)
 68 | 
 69 |   Inner join two sequences of dictionaries on specified keys, merging matches with right value
 70 |   precedence.
 71 | 
 72 |   ::
 73 | 
 74 |     from itertools import starmap
 75 |     from toolz import join, merge
 76 | 
 77 |     def keyjoin(leftkey, leftseq, rightkey, rightseq):
 78 |         return starmap(merge, join(leftkey, leftseq, rightkey, rightseq))
 79 | 
 80 | 
 81 |   Example:
 82 | 
 83 |    >>> people = [{'id': 0, 'name': 'Anonymous Guy', 'location': 'Unknown'},
 84 |                  {'id': 1, 'name': 'Karan', 'location': 'San Francisco'},
 85 |                  {'id': 2, 'name': 'Matthew', 'location': 'Oakland'}]
 86 |    >>> hobbies = [{'person_id': 1, 'hobby': 'Tennis'},
 87 |                   {'person_id': 1, 'hobby': 'Acting'},
 88 |                   {'person_id': 2, 'hobby': 'Biking'}]
 89 |    >>> list(keyjoin('id', people, 'person_id', hobbies))
 90 |    [{'hobby': 'Tennis',
 91 |      'id': 1,
 92 |      'location': 'San Francisco',
 93 |      'name': 'Karan',
 94 |      'person_id': 1},
 95 |     {'hobby': 'Acting',
 96 |      'id': 1,
 97 |      'location': 'San Francisco',
 98 |      'name': 'Karan',
 99 |      'person_id': 1},
100 |     {'hobby': 'Biking',
101 |      'id': 2,
102 |      'location': 'Oakland',
103 |      'name': 'Matthew',
104 |      'person_id': 2}]
105 | 
106 | * .. function:: areidentical(\*seqs)
107 | 
108 |   Determine if sequences are identical element-wise.
109 |   This lazily evaluates the sequences and stops as soon as the result
110 |   is determined.
111 | 
112 |   ::
113 | 
114 |     from toolz import diff
115 | 
116 |     def areidentical(*seqs):
117 |         return not any(diff(*seqs, default=object()))
118 | 
119 | 
120 |   Example:
121 | 
122 |    >>> areidentical([1, 2, 3], (1, 2, 3))
123 |    True
124 | 
125 |    >>> areidentical([1, 2, 3], [1, 2])
126 |    False
127 | 


--------------------------------------------------------------------------------
/examples/fib.py:
--------------------------------------------------------------------------------
 1 | #          /            0               if i is 0
 2 | # fib(i) = |            1               if i is 1
 3 | #          \ fib(i - 1) + fib(i - 2)    otherwise
 4 | 
 5 | 
 6 | def fib(n):
 7 |     """ Imperative definition of Fibonacci numbers """
 8 |     a, b = 0, 1
 9 |     for i in range(n):
10 |         a, b = b, a + b
11 |     return a
12 | 
13 | 
14 | # This is intuitive but VERY slow
15 | def fib(n):
16 |     """ Functional definition of Fibonacci numbers """
17 |     if n == 0 or n == 1:
18 |         return n
19 |     else:
20 |         return fib(n - 1) + fib(n - 2)
21 | 
22 | from toolz import memoize
23 | 
24 | # Oh wait, it's fast again
25 | fib = memoize(fib)
26 | 
27 | 
28 | # Provide a cache with initial values to `memoize`
29 | @memoize(cache={0: 0, 1: 1})
30 | def fib(n):
31 |     """ Functional definition of Fibonacci numbers with initial terms cached.
32 | 
33 |     fib(0) == 0
34 |     fib(1) == 1
35 |     ...
36 |     fib(n) == fib(n - 1) + fib(n - 2)
37 |     """
38 |     return fib(n - 1) + fib(n - 2)
39 | 


--------------------------------------------------------------------------------
/examples/graph.py:
--------------------------------------------------------------------------------
 1 | from toolz.curried import *
 2 | a, b, c, d, e, f, g = 'abcdefg'
 3 | 
 4 | edges = [(a, b), (b, a), (a, c), (a, d), (d, a), (d, e), (e, f), (d, f),
 5 |          (f, d), (d, g), (e, g)]
 6 | 
 7 | 
 8 | out_degrees = countby(first,  edges)
 9 | # {'a': 3, 'b': 1, 'd': 4, 'e': 2, 'f': 1}
10 | 
11 | in_degrees = countby(second, edges)
12 | # {'a': 2, 'b': 1, 'c': 1, 'd': 2, 'e': 1, 'f': 2, 'g': 2}
13 | 
14 | 
15 | out_neighbors = valmap(comp(tuple, map(second)),
16 |                        groupby(first, edges))
17 | # {'a': ('b', 'c', 'd'),
18 | #  'b': ('a',),
19 | #  'd': ('a', 'e', 'f', 'g'),
20 | #  'e': ('f', 'g'),
21 | #  'f': ('d',)}
22 | 
23 | in_neighbors = valmap(comp(tuple, map(first)),
24 |                       groupby(second, edges))
25 | # {'a': ('b', 'd'),
26 | #  'b': ('a',),
27 | #  'c': ('a',),
28 | #  'd': ('a', 'f'),
29 | #  'e': ('d',),
30 | #  'f': ('e', 'd'),
31 | #  'g': ('d', 'e')}
32 | 


--------------------------------------------------------------------------------
/examples/wordcount.py:
--------------------------------------------------------------------------------
 1 | from toolz import *
 2 | 
 3 | 
 4 | def stem(word):
 5 |     """ Stem word to primitive form """
 6 |     return word.lower().rstrip(",.!:;'-\"").lstrip("'\"")
 7 | 
 8 | wordcount = comp(frequencies, partial(map, stem), str.split)
 9 | 
10 | if __name__ == '__main__':
11 |     print(wordcount("This cat jumped over this other cat!"))
12 |     # prints {'this': 2, 'cat': 2, 'jumped': 1, 'over': 1, 'other': 1}
13 | 


--------------------------------------------------------------------------------
/release-notes:
--------------------------------------------------------------------------------
 1 | New in 0.4.2
 2 | 
 3 | Removed intersection
 4 | 
 5 | 
 6 | New in 0.5.3
 7 | 
 8 | *   get_in function
 9 | *   add itervalues, iterkeys, iteritems to compatibility
10 | *   Add do function, remove side_effects from sandbox
11 | *   Add juxt, partner to map
12 | *   Performance improvements to merge_with
13 | *   Errors from curried functions propagate upwards
14 | *   keyfilter, valfilter
15 | *   do
16 | 
17 | New Authors:
18 | 
19 | Graeme Coupar, @obmarg
20 | 
21 | 
22 | New in 0.6.0
23 | 
24 | *   memoize is curried by default
25 | *   memoize support `key` keyword argument
26 | *   Cleaned up issues in curried namespace 
27 | *   Unary functions memoize with just the single argument, not a tuple
28 | *   Flattened directory structure
29 | *   Add `pluck` function from underscore.js
30 | *   Remove `sandbox.jackknife`
31 | 
32 | 
33 | New in 0.6.1
34 | 
35 | 
36 | *   Python 3.4 support
37 | *   New `join` operation
38 | *   `join`, `groupby`, ... accept non-callable key functions.
39 | *   Many speed improvements:
40 |     *   Cache method lookup
41 |     *   Faster `merge_sorted` without key
42 |     *   An additional round of tuning on `groupby`
43 | *   Toolz builds on binstar build under mrocklin channel
44 | *   Avoid generators, favor map.  Assists in debugging.
45 | *   Cleaner `curry` implementation
46 | *   Fix serialization issues for `juxt`, `complement`
47 | *   `reduceby` no longer requires `default` keyword argument
48 | *   Fix bug in `get` where `get([1], coll)` used to return element rather than
49 |     length-one tuple
50 | *   `EqualityHashKey` added to sandbox
51 | *   `juxt` returns a tuple, not a generator
52 | 
53 | 
54 | New Authors:
55 | 
56 | Leonid Shvechikov,  José Ricardo, Lars Buitinck, Tom Prince
57 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from os.path import exists
 4 | from setuptools import setup
 5 | import toolz
 6 | 
 7 | setup(name='toolz',
 8 |       version=toolz.__version__,
 9 |       description='List processing tools and functional utilities',
10 |       url='http://github.com/pytoolz/toolz/',
11 |       author='https://raw.github.com/pytoolz/toolz/master/AUTHORS.md',
12 |       maintainer='Matthew Rocklin',
13 |       maintainer_email='mrocklin@gmail.com',
14 |       license='BSD',
15 |       keywords='functional utility itertools functools',
16 |       packages=['toolz',
17 |                 'toolz.sandbox',
18 |                 'toolz.curried'],
19 |       package_data={'toolz': ['tests/*.py']},
20 |       long_description=(open('README.rst').read() if exists('README.rst')
21 |                         else ''),
22 |       zip_safe=False)
23 | 


--------------------------------------------------------------------------------
/toolz/__init__.py:
--------------------------------------------------------------------------------
 1 | from .itertoolz import *
 2 | 
 3 | from .functoolz import *
 4 | 
 5 | from .dicttoolz import *
 6 | 
 7 | from .recipes import *
 8 | 
 9 | from .compatibility import map, filter
10 | 
11 | from . import sandbox
12 | 
13 | from functools import partial, reduce
14 | 
15 | sorted = sorted
16 | 
17 | # Aliases
18 | comp = compose
19 | 
20 | __version__ = '0.7.4'
21 | 


--------------------------------------------------------------------------------
/toolz/compatibility.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | import sys
 3 | PY3 = sys.version_info[0] > 2
 4 | 
 5 | __all__ = ('PY3', 'map', 'filter', 'range', 'zip', 'reduce', 'zip_longest',
 6 |            'iteritems', 'iterkeys', 'itervalues', 'filterfalse')
 7 | 
 8 | if PY3:
 9 |     map = map
10 |     filter = filter
11 |     range = range
12 |     zip = zip
13 |     from functools import reduce
14 |     from itertools import zip_longest
15 |     from itertools import filterfalse
16 |     iteritems = operator.methodcaller('items')
17 |     iterkeys = operator.methodcaller('keys')
18 |     itervalues = operator.methodcaller('values')
19 | else:
20 |     range = xrange
21 |     reduce = reduce
22 |     from itertools import imap as map
23 |     from itertools import ifilter as filter
24 |     from itertools import ifilterfalse as filterfalse
25 |     from itertools import izip as zip
26 |     from itertools import izip_longest as zip_longest
27 |     iteritems = operator.methodcaller('iteritems')
28 |     iterkeys = operator.methodcaller('iterkeys')
29 |     itervalues = operator.methodcaller('itervalues')
30 | 


--------------------------------------------------------------------------------
/toolz/curried/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Alternate namespece for toolz such that all functions are curried
 3 | 
 4 | Currying provides implicit partial evaluation of all functions
 5 | 
 6 | Example:
 7 | 
 8 |     Get usually requires two arguments, an index and a collection
 9 |     >>> from toolz.curried import get
10 |     >>> get(0, ('a', 'b'))
11 |     'a'
12 | 
13 |     When we use it in higher order functions we often want to pass a partially
14 |     evaluated form
15 |     >>> data = [(1, 2), (11, 22), (111, 222)]
16 |     >>> list(map(lambda seq: get(0, seq), data))
17 |     [1, 11, 111]
18 | 
19 |     The curried version allows simple expression of partial evaluation
20 |     >>> list(map(get(0), data))
21 |     [1, 11, 111]
22 | 
23 | See Also:
24 |     toolz.functoolz.curry
25 | """
26 | import inspect
27 | 
28 | from . import exceptions
29 | from . import operator
30 | import toolz
31 | 
32 | 
33 | def _nargs(f):
34 |     try:
35 |         return len(inspect.getargspec(f).args)
36 |     except TypeError:
37 |         return 0
38 | 
39 | 
40 | def _should_curry(f):
41 |     do_curry = frozenset((toolz.map, toolz.filter, toolz.sorted, toolz.reduce))
42 |     return (callable(f) and _nargs(f) > 1 or f in do_curry)
43 | 
44 | 
45 | def _curry_namespace(ns):
46 |     return dict(
47 |         (name, toolz.curry(f) if _should_curry(f) else f)
48 |         for name, f in ns.items() if '__' not in name
49 |     )
50 | 
51 | 
52 | locals().update(toolz.merge(
53 |     _curry_namespace(vars(toolz)),
54 |     _curry_namespace(vars(exceptions)),
55 | ))
56 | 
57 | # Clean up the namespace.
58 | del _nargs
59 | del _should_curry
60 | del exceptions
61 | del toolz
62 | 


--------------------------------------------------------------------------------
/toolz/curried/exceptions.py:
--------------------------------------------------------------------------------
 1 | import toolz
 2 | 
 3 | 
 4 | __all__ = ['merge_with', 'merge']
 5 | 
 6 | 
 7 | @toolz.curry
 8 | def merge_with(fn, *dicts, **kwargs):
 9 |     if len(dicts) == 0:
10 |         raise TypeError()
11 |     else:
12 |         return toolz.merge_with(fn, *dicts, **kwargs)
13 | 
14 | 
15 | @toolz.curry
16 | def merge(*dicts, **kwargs):
17 |     if len(dicts) == 0:
18 |         raise TypeError()
19 |     else:
20 |         return toolz.merge(*dicts, **kwargs)
21 | 
22 | merge_with.__doc__ = toolz.merge_with.__doc__
23 | merge.__doc__ = toolz.merge.__doc__
24 | 


--------------------------------------------------------------------------------
/toolz/curried/operator.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import operator
 4 | 
 5 | from toolz import curry
 6 | 
 7 | 
 8 | # We use a blacklist instead of whitelist because:
 9 | #   1. We have more things to include than exclude.
10 | #   2. This gives us access to things like matmul iff we are in Python >=3.5.
11 | no_curry = frozenset((
12 |     'abs',
13 |     'index',
14 |     'inv',
15 |     'invert',
16 |     'neg',
17 |     'not_',
18 |     'pos',
19 |     'truth',
20 | ))
21 | 
22 | locals().update(
23 |     dict((name, curry(f) if name not in no_curry else f)
24 |          for name, f in vars(operator).items() if callable(f)),
25 | )
26 | 
27 | # Clean up the namespace.
28 | del curry
29 | del no_curry
30 | del operator
31 | 


--------------------------------------------------------------------------------
/toolz/dicttoolz.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import operator
  3 | from toolz.compatibility import (map, zip, iteritems, iterkeys, itervalues,
  4 |                                  reduce)
  5 | 
  6 | __all__ = ('merge', 'merge_with', 'valmap', 'keymap', 'itemmap',
  7 |            'valfilter', 'keyfilter', 'itemfilter',
  8 |            'assoc', 'dissoc', 'update_in', 'get_in')
  9 | 
 10 | 
 11 | def _get_factory(f, kwargs):
 12 |     factory = kwargs.pop('factory', dict)
 13 |     if kwargs:
 14 |         raise TypeError("{0}() got an unexpected keyword argument "
 15 |                         "'{1}'".format(f.__name__, kwargs.popitem()[0]))
 16 |     return factory
 17 | 
 18 | 
 19 | def merge(*dicts, **kwargs):
 20 |     """ Merge a collection of dictionaries
 21 | 
 22 |     >>> merge({1: 'one'}, {2: 'two'})
 23 |     {1: 'one', 2: 'two'}
 24 | 
 25 |     Later dictionaries have precedence
 26 | 
 27 |     >>> merge({1: 2, 3: 4}, {3: 3, 4: 4})
 28 |     {1: 2, 3: 3, 4: 4}
 29 | 
 30 |     See Also:
 31 |         merge_with
 32 |     """
 33 |     if len(dicts) == 1 and not isinstance(dicts[0], dict):
 34 |         dicts = dicts[0]
 35 |     factory = _get_factory(merge, kwargs)
 36 | 
 37 |     rv = factory()
 38 |     for d in dicts:
 39 |         rv.update(d)
 40 |     return rv
 41 | 
 42 | 
 43 | def merge_with(func, *dicts, **kwargs):
 44 |     """ Merge dictionaries and apply function to combined values
 45 | 
 46 |     A key may occur in more than one dict, and all values mapped from the key
 47 |     will be passed to the function as a list, such as func([val1, val2, ...]).
 48 | 
 49 |     >>> merge_with(sum, {1: 1, 2: 2}, {1: 10, 2: 20})
 50 |     {1: 11, 2: 22}
 51 | 
 52 |     >>> merge_with(first, {1: 1, 2: 2}, {2: 20, 3: 30})  # doctest: +SKIP
 53 |     {1: 1, 2: 2, 3: 30}
 54 | 
 55 |     See Also:
 56 |         merge
 57 |     """
 58 |     if len(dicts) == 1 and not isinstance(dicts[0], dict):
 59 |         dicts = dicts[0]
 60 |     factory = _get_factory(merge_with, kwargs)
 61 | 
 62 |     result = factory()
 63 |     for d in dicts:
 64 |         for k, v in iteritems(d):
 65 |             if k not in result:
 66 |                 result[k] = [v]
 67 |             else:
 68 |                 result[k].append(v)
 69 |     return valmap(func, result, factory)
 70 | 
 71 | 
 72 | def valmap(func, d, factory=dict):
 73 |     """ Apply function to values of dictionary
 74 | 
 75 |     >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
 76 |     >>> valmap(sum, bills)  # doctest: +SKIP
 77 |     {'Alice': 65, 'Bob': 45}
 78 | 
 79 |     See Also:
 80 |         keymap
 81 |         itemmap
 82 |     """
 83 |     rv = factory()
 84 |     rv.update(zip(iterkeys(d), map(func, itervalues(d))))
 85 |     return rv
 86 | 
 87 | 
 88 | def keymap(func, d, factory=dict):
 89 |     """ Apply function to keys of dictionary
 90 | 
 91 |     >>> bills = {"Alice": [20, 15, 30], "Bob": [10, 35]}
 92 |     >>> keymap(str.lower, bills)  # doctest: +SKIP
 93 |     {'alice': [20, 15, 30], 'bob': [10, 35]}
 94 | 
 95 |     See Also:
 96 |         valmap
 97 |         itemmap
 98 |     """
 99 |     rv = factory()
100 |     rv.update(zip(map(func, iterkeys(d)), itervalues(d)))
101 |     return rv
102 | 
103 | 
104 | def itemmap(func, d, factory=dict):
105 |     """ Apply function to items of dictionary
106 | 
107 |     >>> accountids = {"Alice": 10, "Bob": 20}
108 |     >>> itemmap(reversed, accountids)  # doctest: +SKIP
109 |     {10: "Alice", 20: "Bob"}
110 | 
111 |     See Also:
112 |         keymap
113 |         valmap
114 |     """
115 |     rv = factory()
116 |     rv.update(map(func, iteritems(d)))
117 |     return rv
118 | 
119 | 
120 | def valfilter(predicate, d, factory=dict):
121 |     """ Filter items in dictionary by value
122 | 
123 |     >>> iseven = lambda x: x % 2 == 0
124 |     >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
125 |     >>> valfilter(iseven, d)
126 |     {1: 2, 3: 4}
127 | 
128 |     See Also:
129 |         keyfilter
130 |         itemfilter
131 |         valmap
132 |     """
133 |     rv = factory()
134 |     for k, v in iteritems(d):
135 |         if predicate(v):
136 |             rv[k] = v
137 |     return rv
138 | 
139 | 
140 | def keyfilter(predicate, d, factory=dict):
141 |     """ Filter items in dictionary by key
142 | 
143 |     >>> iseven = lambda x: x % 2 == 0
144 |     >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
145 |     >>> keyfilter(iseven, d)
146 |     {2: 3, 4: 5}
147 | 
148 |     See Also:
149 |         valfilter
150 |         itemfilter
151 |         keymap
152 |     """
153 |     rv = factory()
154 |     for k, v in iteritems(d):
155 |         if predicate(k):
156 |             rv[k] = v
157 |     return rv
158 | 
159 | 
160 | def itemfilter(predicate, d, factory=dict):
161 |     """ Filter items in dictionary by item
162 | 
163 |     >>> def isvalid(item):
164 |     ...     k, v = item
165 |     ...     return k % 2 == 0 and v < 4
166 | 
167 |     >>> d = {1: 2, 2: 3, 3: 4, 4: 5}
168 |     >>> itemfilter(isvalid, d)
169 |     {2: 3}
170 | 
171 |     See Also:
172 |         keyfilter
173 |         valfilter
174 |         itemmap
175 |     """
176 |     rv = factory()
177 |     for item in iteritems(d):
178 |         if predicate(item):
179 |             k, v = item
180 |             rv[k] = v
181 |     return rv
182 | 
183 | 
184 | def assoc(d, key, value, factory=dict):
185 |     """
186 |     Return a new dict with new key value pair
187 | 
188 |     New dict has d[key] set to value. Does not modify the initial dictionary.
189 | 
190 |     >>> assoc({'x': 1}, 'x', 2)
191 |     {'x': 2}
192 |     >>> assoc({'x': 1}, 'y', 3)   # doctest: +SKIP
193 |     {'x': 1, 'y': 3}
194 |     """
195 |     d2 = factory()
196 |     d2[key] = value
197 |     return merge(d, d2, factory=factory)
198 | 
199 | 
200 | def dissoc(d, *keys):
201 |     """
202 |     Return a new dict with the given key(s) removed.
203 | 
204 |     New dict has d[key] deleted for each supplied key.
205 |     Does not modify the initial dictionary.
206 | 
207 |     >>> dissoc({'x': 1, 'y': 2}, 'y')
208 |     {'x': 1}
209 |     >>> dissoc({'x': 1, 'y': 2}, 'y', 'x')
210 |     {}
211 |     >>> dissoc({'x': 1}, 'y') # Ignores missing keys
212 |     {'x': 1}
213 |     """
214 |     d2 = copy.copy(d)
215 |     for key in keys:
216 |         if key in d2:
217 |             del d2[key]
218 |     return d2
219 | 
220 | 
221 | def update_in(d, keys, func, default=None, factory=dict):
222 |     """ Update value in a (potentially) nested dictionary
223 | 
224 |     inputs:
225 |     d - dictionary on which to operate
226 |     keys - list or tuple giving the location of the value to be changed in d
227 |     func - function to operate on that value
228 | 
229 |     If keys == [k0,..,kX] and d[k0]..[kX] == v, update_in returns a copy of the
230 |     original dictionary with v replaced by func(v), but does not mutate the
231 |     original dictionary.
232 | 
233 |     If k0 is not a key in d, update_in creates nested dictionaries to the depth
234 |     specified by the keys, with the innermost value set to func(default).
235 | 
236 |     >>> inc = lambda x: x + 1
237 |     >>> update_in({'a': 0}, ['a'], inc)
238 |     {'a': 1}
239 | 
240 |     >>> transaction = {'name': 'Alice',
241 |     ...                'purchase': {'items': ['Apple', 'Orange'],
242 |     ...                             'costs': [0.50, 1.25]},
243 |     ...                'credit card': '5555-1234-1234-1234'}
244 |     >>> update_in(transaction, ['purchase', 'costs'], sum) # doctest: +SKIP
245 |     {'credit card': '5555-1234-1234-1234',
246 |      'name': 'Alice',
247 |      'purchase': {'costs': 1.75, 'items': ['Apple', 'Orange']}}
248 | 
249 |     >>> # updating a value when k0 is not in d
250 |     >>> update_in({}, [1, 2, 3], str, default="bar")
251 |     {1: {2: {3: 'bar'}}}
252 |     >>> update_in({1: 'foo'}, [2, 3, 4], inc, 0)
253 |     {1: 'foo', 2: {3: {4: 1}}}
254 |     """
255 |     assert len(keys) > 0
256 |     k, ks = keys[0], keys[1:]
257 |     if ks:
258 |         return assoc(d, k, update_in(d[k] if (k in d) else factory(),
259 |                                      ks, func, default, factory),
260 |                      factory)
261 |     else:
262 |         innermost = func(d[k]) if (k in d) else func(default)
263 |         return assoc(d, k, innermost, factory)
264 | 
265 | 
266 | def get_in(keys, coll, default=None, no_default=False):
267 |     """
268 |     Returns coll[i0][i1]...[iX] where [i0, i1, ..., iX]==keys.
269 | 
270 |     If coll[i0][i1]...[iX] cannot be found, returns ``default``, unless
271 |     ``no_default`` is specified, then it raises KeyError or IndexError.
272 | 
273 |     ``get_in`` is a generalization of ``operator.getitem`` for nested data
274 |     structures such as dictionaries and lists.
275 | 
276 |     >>> transaction = {'name': 'Alice',
277 |     ...                'purchase': {'items': ['Apple', 'Orange'],
278 |     ...                             'costs': [0.50, 1.25]},
279 |     ...                'credit card': '5555-1234-1234-1234'}
280 |     >>> get_in(['purchase', 'items', 0], transaction)
281 |     'Apple'
282 |     >>> get_in(['name'], transaction)
283 |     'Alice'
284 |     >>> get_in(['purchase', 'total'], transaction)
285 |     >>> get_in(['purchase', 'items', 'apple'], transaction)
286 |     >>> get_in(['purchase', 'items', 10], transaction)
287 |     >>> get_in(['purchase', 'total'], transaction, 0)
288 |     0
289 |     >>> get_in(['y'], {}, no_default=True)
290 |     Traceback (most recent call last):
291 |         ...
292 |     KeyError: 'y'
293 | 
294 |     See Also:
295 |         itertoolz.get
296 |         operator.getitem
297 |     """
298 |     try:
299 |         return reduce(operator.getitem, keys, coll)
300 |     except (KeyError, IndexError, TypeError):
301 |         if no_default:
302 |             raise
303 |         return default
304 | 


--------------------------------------------------------------------------------
/toolz/functoolz.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce, partial
  2 | import inspect
  3 | import operator
  4 | import sys
  5 | 
  6 | 
  7 | __all__ = ('identity', 'thread_first', 'thread_last', 'memoize', 'compose',
  8 |            'pipe', 'complement', 'juxt', 'do', 'curry', 'flip')
  9 | 
 10 | 
 11 | def identity(x):
 12 |     """ Identity function. Return x
 13 | 
 14 |     >>> identity(3)
 15 |     3
 16 |     """
 17 |     return x
 18 | 
 19 | 
 20 | def thread_first(val, *forms):
 21 |     """ Thread value through a sequence of functions/forms
 22 | 
 23 |     >>> def double(x): return 2*x
 24 |     >>> def inc(x):    return x + 1
 25 |     >>> thread_first(1, inc, double)
 26 |     4
 27 | 
 28 |     If the function expects more than one input you can specify those inputs
 29 |     in a tuple.  The value is used as the first input.
 30 | 
 31 |     >>> def add(x, y): return x + y
 32 |     >>> def pow(x, y): return x**y
 33 |     >>> thread_first(1, (add, 4), (pow, 2))  # pow(add(1, 4), 2)
 34 |     25
 35 | 
 36 |     So in general
 37 |         thread_first(x, f, (g, y, z))
 38 |     expands to
 39 |         g(f(x), y, z)
 40 | 
 41 |     See Also:
 42 |         thread_last
 43 |     """
 44 |     def evalform_front(val, form):
 45 |         if callable(form):
 46 |             return form(val)
 47 |         if isinstance(form, tuple):
 48 |             func, args = form[0], form[1:]
 49 |             args = (val,) + args
 50 |             return func(*args)
 51 |     return reduce(evalform_front, forms, val)
 52 | 
 53 | 
 54 | def thread_last(val, *forms):
 55 |     """ Thread value through a sequence of functions/forms
 56 | 
 57 |     >>> def double(x): return 2*x
 58 |     >>> def inc(x):    return x + 1
 59 |     >>> thread_last(1, inc, double)
 60 |     4
 61 | 
 62 |     If the function expects more than one input you can specify those inputs
 63 |     in a tuple.  The value is used as the last input.
 64 | 
 65 |     >>> def add(x, y): return x + y
 66 |     >>> def pow(x, y): return x**y
 67 |     >>> thread_last(1, (add, 4), (pow, 2))  # pow(2, add(4, 1))
 68 |     32
 69 | 
 70 |     So in general
 71 |         thread_last(x, f, (g, y, z))
 72 |     expands to
 73 |         g(y, z, f(x))
 74 | 
 75 |     >>> def iseven(x):
 76 |     ...     return x % 2 == 0
 77 |     >>> list(thread_last([1, 2, 3], (map, inc), (filter, iseven)))
 78 |     [2, 4]
 79 | 
 80 |     See Also:
 81 |         thread_first
 82 |     """
 83 |     def evalform_back(val, form):
 84 |         if callable(form):
 85 |             return form(val)
 86 |         if isinstance(form, tuple):
 87 |             func, args = form[0], form[1:]
 88 |             args = args + (val,)
 89 |             return func(*args)
 90 |     return reduce(evalform_back, forms, val)
 91 | 
 92 | 
 93 | # This is a kludge for Python 3.4.0 support
 94 | # currently len(inspect.getargspec(map).args) == 0, a wrong result.
 95 | # As this is fixed in future versions then hopefully this kludge can be
 96 | # removed.
 97 | known_numargs = {map: 2, filter: 2, reduce: 2}
 98 | 
 99 | 
100 | def _num_required_args(func):
101 |     """ Number of args for func
102 | 
103 |     >>> def foo(a, b, c=None):
104 |     ...     return a + b + c
105 | 
106 |     >>> _num_required_args(foo)
107 |     2
108 | 
109 |     >>> def bar(*args):
110 |     ...     return sum(args)
111 | 
112 |     >>> print(_num_required_args(bar))
113 |     None
114 |     """
115 |     if func in known_numargs:
116 |         return known_numargs[func]
117 |     try:
118 |         spec = inspect.getargspec(func)
119 |         if spec.varargs:
120 |             return None
121 |         num_defaults = len(spec.defaults) if spec.defaults else 0
122 |         return len(spec.args) - num_defaults
123 |     except TypeError:
124 |         return None
125 | 
126 | 
127 | class curry(object):
128 |     """ Curry a callable function
129 | 
130 |     Enables partial application of arguments through calling a function with an
131 |     incomplete set of arguments.
132 | 
133 |     >>> def mul(x, y):
134 |     ...     return x * y
135 |     >>> mul = curry(mul)
136 | 
137 |     >>> double = mul(2)
138 |     >>> double(10)
139 |     20
140 | 
141 |     Also supports keyword arguments
142 | 
143 |     >>> @curry                  # Can use curry as a decorator
144 |     ... def f(x, y, a=10):
145 |     ...     return a * (x + y)
146 | 
147 |     >>> add = f(a=1)
148 |     >>> add(2, 3)
149 |     5
150 | 
151 |     See Also:
152 |         toolz.curried - namespace of curried functions
153 |                         http://toolz.readthedocs.org/en/latest/curry.html
154 |     """
155 |     def __init__(self, *args, **kwargs):
156 |         if not args:
157 |             raise TypeError('__init__() takes at least 2 arguments (1 given)')
158 |         func, args = args[0], args[1:]
159 |         if not callable(func):
160 |             raise TypeError("Input must be callable")
161 | 
162 |         # curry- or functools.partial-like object?  Unpack and merge arguments
163 |         if (hasattr(func, 'func')
164 |                 and hasattr(func, 'args')
165 |                 and hasattr(func, 'keywords')
166 |                 and isinstance(func.args, tuple)):
167 |             _kwargs = {}
168 |             if func.keywords:
169 |                 _kwargs.update(func.keywords)
170 |             _kwargs.update(kwargs)
171 |             kwargs = _kwargs
172 |             args = func.args + args
173 |             func = func.func
174 | 
175 |         if kwargs:
176 |             self._partial = partial(func, *args, **kwargs)
177 |         else:
178 |             self._partial = partial(func, *args)
179 | 
180 |         self.__doc__ = getattr(func, '__doc__', None)
181 |         self.__name__ = getattr(func, '__name__', '<curry>')
182 | 
183 |     @property
184 |     def func(self):
185 |         return self._partial.func
186 | 
187 |     @property
188 |     def args(self):
189 |         return self._partial.args
190 | 
191 |     @property
192 |     def keywords(self):
193 |         return self._partial.keywords
194 | 
195 |     @property
196 |     def func_name(self):
197 |         return self.__name__
198 | 
199 |     def __str__(self):
200 |         return str(self.func)
201 | 
202 |     def __repr__(self):
203 |         return repr(self.func)
204 | 
205 |     def __hash__(self):
206 |         return hash((self.func, self.args,
207 |                      frozenset(self.keywords.items()) if self.keywords
208 |                      else None))
209 | 
210 |     def __eq__(self, other):
211 |         return (isinstance(other, curry) and self.func == other.func and
212 |                 self.args == other.args and self.keywords == other.keywords)
213 | 
214 |     def __ne__(self, other):
215 |         return not self.__eq__(other)
216 | 
217 |     def __call__(self, *args, **kwargs):
218 |         try:
219 |             return self._partial(*args, **kwargs)
220 |         except TypeError:
221 |             # If there was a genuine TypeError
222 |             required_args = _num_required_args(self.func)
223 |             if (required_args is not None and
224 |                     len(args) + len(self.args) >= required_args):
225 |                 raise
226 | 
227 |         return curry(self._partial, *args, **kwargs)
228 | 
229 |     def __get__(self, instance, owner):
230 |         if instance is None:
231 |             return self
232 |         return curry(self, instance)
233 | 
234 |     # pickle protocol because functools.partial objects can't be pickled
235 |     def __getstate__(self):
236 |         # dictoolz.keyfilter, I miss you!
237 |         userdict = tuple((k, v) for k, v in self.__dict__.items()
238 |                          if k != '_partial')
239 |         return self.func, self.args, self.keywords, userdict
240 | 
241 |     def __setstate__(self, state):
242 |         func, args, kwargs, userdict = state
243 |         self.__init__(func, *args, **(kwargs or {}))
244 |         self.__dict__.update(userdict)
245 | 
246 | 
247 | def has_kwargs(f):
248 |     """ Does a function have keyword arguments?
249 | 
250 |     >>> def f(x, y=0):
251 |     ...     return x + y
252 | 
253 |     >>> has_kwargs(f)
254 |     True
255 |     """
256 |     if sys.version_info[0] == 2:  # pragma: no cover
257 |         spec = inspect.getargspec(f)
258 |         return bool(spec and (spec.keywords or spec.defaults))
259 |     if sys.version_info[0] == 3:  # pragma: no cover
260 |         spec = inspect.getfullargspec(f)
261 |         return bool(spec.defaults)
262 | 
263 | 
264 | def isunary(f):
265 |     """ Does a function have only a single argument?
266 | 
267 |     >>> def f(x):
268 |     ...     return x
269 | 
270 |     >>> isunary(f)
271 |     True
272 |     >>> isunary(lambda x, y: x + y)
273 |     False
274 |     """
275 |     try:
276 |         if sys.version_info[0] == 2:  # pragma: no cover
277 |             spec = inspect.getargspec(f)
278 |         if sys.version_info[0] == 3:  # pragma: no cover
279 |             spec = inspect.getfullargspec(f)
280 |         return bool(spec and spec.varargs is None and not has_kwargs(f)
281 |                     and len(spec.args) == 1)
282 |     except TypeError:  # pragma: no cover
283 |         return None    # in Python < 3.4 builtins fail, return None
284 | 
285 | 
286 | @curry
287 | def memoize(func, cache=None, key=None):
288 |     """ Cache a function's result for speedy future evaluation
289 | 
290 |     Considerations:
291 |         Trades memory for speed.
292 |         Only use on pure functions.
293 | 
294 |     >>> def add(x, y):  return x + y
295 |     >>> add = memoize(add)
296 | 
297 |     Or use as a decorator
298 | 
299 |     >>> @memoize
300 |     ... def add(x, y):
301 |     ...     return x + y
302 | 
303 |     Use the ``cache`` keyword to provide a dict-like object as an initial cache
304 | 
305 |     >>> @memoize(cache={(1, 2): 3})
306 |     ... def add(x, y):
307 |     ...     return x + y
308 | 
309 |     Note that the above works as a decorator because ``memoize`` is curried.
310 | 
311 |     It is also possible to provide a ``key(args, kwargs)`` function that
312 |     calculates keys used for the cache, which receives an ``args`` tuple and
313 |     ``kwargs`` dict as input, and must return a hashable value.  However,
314 |     the default key function should be sufficient most of the time.
315 | 
316 |     >>> # Use key function that ignores extraneous keyword arguments
317 |     >>> @memoize(key=lambda args, kwargs: args)
318 |     ... def add(x, y, verbose=False):
319 |     ...     if verbose:
320 |     ...         print('Calculating %s + %s' % (x, y))
321 |     ...     return x + y
322 |     """
323 |     if cache is None:
324 |         cache = {}
325 | 
326 |     try:
327 |         may_have_kwargs = has_kwargs(func)
328 |         # Is unary function (single arg, no variadic argument or keywords)?
329 |         is_unary = isunary(func)
330 |     except TypeError:  # pragma: no cover
331 |         may_have_kwargs = True
332 |         is_unary = False
333 | 
334 |     def memof(*args, **kwargs):
335 |         try:
336 |             if key is not None:
337 |                 k = key(args, kwargs)
338 |             elif is_unary:
339 |                 k = args[0]
340 |             elif may_have_kwargs:
341 |                 k = (args or None,
342 |                      frozenset(kwargs.items()) if kwargs else None)
343 |             else:
344 |                 k = args
345 | 
346 |             in_cache = k in cache
347 |         except TypeError:
348 |             raise TypeError("Arguments to memoized function must be hashable")
349 | 
350 |         if in_cache:
351 |             return cache[k]
352 |         else:
353 |             result = func(*args, **kwargs)
354 |             cache[k] = result
355 |             return result
356 | 
357 |     try:
358 |         memof.__name__ = func.__name__
359 |     except AttributeError:
360 |         pass
361 |     memof.__doc__ = func.__doc__
362 |     return memof
363 | 
364 | 
365 | class Compose(object):
366 |     """ A composition of functions
367 | 
368 |     See Also:
369 |         compose
370 |     """
371 |     __slots__ = 'first', 'funcs'
372 | 
373 |     def __init__(self, funcs):
374 |         funcs = tuple(reversed(funcs))
375 |         self.first = funcs[0]
376 |         self.funcs = funcs[1:]
377 | 
378 |     def __call__(self, *args, **kwargs):
379 |         ret = self.first(*args, **kwargs)
380 |         for f in self.funcs:
381 |             ret = f(ret)
382 |         return ret
383 | 
384 |     def __getstate__(self):
385 |         return self.first, self.funcs
386 | 
387 |     def __setstate__(self, state):
388 |         self.first, self.funcs = state
389 | 
390 |     @property
391 |     def __doc__(self):
392 |         def composed_doc(*fs):
393 |             """Generate a docstring for the composition of fs.
394 |             """
395 |             if not fs:
396 |                 # Argument name for the docstring.
397 |                 return '*args, **kwargs'
398 | 
399 |             return '{f}({g})'.format(f=fs[0].__name__, g=composed_doc(*fs[1:]))
400 | 
401 |         try:
402 |             return (
403 |                 'lambda *args, **kwargs: ' +
404 |                 composed_doc(*reversed((self.first,) + self.funcs))
405 |             )
406 |         except AttributeError:
407 |             # One of our callables does not have a `__name__`, whatever.
408 |             return 'A composition of functions'
409 | 
410 |     @property
411 |     def __name__(self):
412 |         try:
413 |             return '_of_'.join(
414 |                 f.__name__ for f in reversed((self.first,) + self.funcs),
415 |             )
416 |         except AttributeError:
417 |             return type(self).__name__
418 | 
419 | 
420 | def compose(*funcs):
421 |     """ Compose functions to operate in series.
422 | 
423 |     Returns a function that applies other functions in sequence.
424 | 
425 |     Functions are applied from right to left so that
426 |     ``compose(f, g, h)(x, y)`` is the same as ``f(g(h(x, y)))``.
427 | 
428 |     If no arguments are provided, the identity function (f(x) = x) is returned.
429 | 
430 |     >>> inc = lambda i: i + 1
431 |     >>> compose(str, inc)(3)
432 |     '4'
433 | 
434 |     See Also:
435 |         pipe
436 |     """
437 |     if not funcs:
438 |         return identity
439 |     if len(funcs) == 1:
440 |         return funcs[0]
441 |     else:
442 |         return Compose(funcs)
443 | 
444 | 
445 | def pipe(data, *funcs):
446 |     """ Pipe a value through a sequence of functions
447 | 
448 |     I.e. ``pipe(data, f, g, h)`` is equivalent to ``h(g(f(data)))``
449 | 
450 |     We think of the value as progressing through a pipe of several
451 |     transformations, much like pipes in UNIX
452 | 
453 |     ``$ cat data | f | g | h``
454 | 
455 |     >>> double = lambda i: 2 * i
456 |     >>> pipe(3, double, str)
457 |     '6'
458 | 
459 |     See Also:
460 |         compose
461 |         thread_first
462 |         thread_last
463 |     """
464 |     for func in funcs:
465 |         data = func(data)
466 |     return data
467 | 
468 | 
469 | def complement(func):
470 |     """ Convert a predicate function to its logical complement.
471 | 
472 |     In other words, return a function that, for inputs that normally
473 |     yield True, yields False, and vice-versa.
474 | 
475 |     >>> def iseven(n): return n % 2 == 0
476 |     >>> isodd = complement(iseven)
477 |     >>> iseven(2)
478 |     True
479 |     >>> isodd(2)
480 |     False
481 |     """
482 |     return compose(operator.not_, func)
483 | 
484 | 
485 | class juxt(object):
486 |     """
487 |     Creates a function that calls several functions with the same arguments.
488 | 
489 |     Takes several functions and returns a function that applies its arguments
490 |     to each of those functions then returns a tuple of the results.
491 | 
492 |     Name comes from juxtaposition: the fact of two things being seen or placed
493 |     close together with contrasting effect.
494 | 
495 |     >>> inc = lambda x: x + 1
496 |     >>> double = lambda x: x * 2
497 |     >>> juxt(inc, double)(10)
498 |     (11, 20)
499 |     >>> juxt([inc, double])(10)
500 |     (11, 20)
501 |     """
502 |     __slots__ = ['funcs']
503 | 
504 |     def __init__(self, *funcs):
505 |         if len(funcs) == 1 and not callable(funcs[0]):
506 |             funcs = funcs[0]
507 |         self.funcs = tuple(funcs)
508 | 
509 |     def __call__(self, *args, **kwargs):
510 |         return tuple(func(*args, **kwargs) for func in self.funcs)
511 | 
512 |     def __getstate__(self):
513 |         return self.funcs
514 | 
515 |     def __setstate__(self, state):
516 |         self.funcs = state
517 | 
518 | 
519 | def do(func, x):
520 |     """ Runs ``func`` on ``x``, returns ``x``
521 | 
522 |     Because the results of ``func`` are not returned, only the side
523 |     effects of ``func`` are relevant.
524 | 
525 |     Logging functions can be made by composing ``do`` with a storage function
526 |     like ``list.append`` or ``file.write``
527 | 
528 |     >>> from toolz import compose
529 |     >>> from toolz.curried import do
530 | 
531 |     >>> log = []
532 |     >>> inc = lambda x: x + 1
533 |     >>> inc = compose(inc, do(log.append))
534 |     >>> inc(1)
535 |     2
536 |     >>> inc(11)
537 |     12
538 |     >>> log
539 |     [1, 11]
540 | 
541 |     """
542 |     func(x)
543 |     return x
544 | 
545 | 
546 | @curry
547 | def flip(func, a, b):
548 |     """Call the function call with the arguments flipped.
549 | 
550 |     This function is curried.
551 | 
552 |     >>> def div(a, b):
553 |     ...     return a / b
554 |     ...
555 |     >>> flip(div, 2, 1)
556 |     0.5
557 |     >>> div_by_two = flip(div, 2)
558 |     >>> div_by_two(4)
559 |     2.0
560 | 
561 |     This is particularly useful for built in functions and functions defined
562 |     in C extensions that accept positional only arguments. For example:
563 |     isinstance, issubclass.
564 | 
565 |     >>> data = [1, 'a', 'b', 2, 1.5, object(), 3]
566 |     >>> only_ints = list(filter(flip(isinstance, int), data))
567 |     >>> only_ints
568 |     [1, 2, 3]
569 |     """
570 |     return func(b, a)
571 | 


--------------------------------------------------------------------------------
/toolz/itertoolz.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import heapq
  3 | import collections
  4 | import operator
  5 | from functools import partial
  6 | from toolz.compatibility import (map, filterfalse, zip, zip_longest, iteritems)
  7 | from toolz.utils import no_default
  8 | 
  9 | 
 10 | __all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
 11 |            'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',
 12 |            'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
 13 |            'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
 14 |            'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
 15 |            'join', 'tail', 'diff', 'topk', 'peek')
 16 | 
 17 | 
 18 | def remove(predicate, seq):
 19 |     """ Return those items of sequence for which predicate(item) is False
 20 | 
 21 |     >>> def iseven(x):
 22 |     ...     return x % 2 == 0
 23 |     >>> list(remove(iseven, [1, 2, 3, 4]))
 24 |     [1, 3]
 25 |     """
 26 |     return filterfalse(predicate, seq)
 27 | 
 28 | 
 29 | def accumulate(binop, seq, initial=no_default):
 30 |     """ Repeatedly apply binary function to a sequence, accumulating results
 31 | 
 32 |     >>> from operator import add, mul
 33 |     >>> list(accumulate(add, [1, 2, 3, 4, 5]))
 34 |     [1, 3, 6, 10, 15]
 35 |     >>> list(accumulate(mul, [1, 2, 3, 4, 5]))
 36 |     [1, 2, 6, 24, 120]
 37 | 
 38 |     Accumulate is similar to ``reduce`` and is good for making functions like
 39 |     cumulative sum:
 40 | 
 41 |     >>> from functools import partial, reduce
 42 |     >>> sum    = partial(reduce, add)
 43 |     >>> cumsum = partial(accumulate, add)
 44 | 
 45 |     Accumulate also takes an optional argument that will be used as the first
 46 |     value. This is similar to reduce.
 47 | 
 48 |     >>> list(accumulate(add, [1, 2, 3], -1))
 49 |     [-1, 0, 2, 5]
 50 |     >>> list(accumulate(add, [], 1))
 51 |     [1]
 52 | 
 53 |     See Also:
 54 |         itertools.accumulate :  In standard itertools for Python 3.2+
 55 |     """
 56 |     seq = iter(seq)
 57 |     result = next(seq) if initial is no_default else initial
 58 |     yield result
 59 |     for elem in seq:
 60 |         result = binop(result, elem)
 61 |         yield result
 62 | 
 63 | 
 64 | def groupby(key, seq):
 65 |     """ Group a collection by a key function
 66 | 
 67 |     >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
 68 |     >>> groupby(len, names)  # doctest: +SKIP
 69 |     {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
 70 | 
 71 |     >>> iseven = lambda x: x % 2 == 0
 72 |     >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8])  # doctest: +SKIP
 73 |     {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
 74 | 
 75 |     Non-callable keys imply grouping on a member.
 76 | 
 77 |     >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
 78 |     ...                    {'name': 'Bob', 'gender': 'M'},
 79 |     ...                    {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
 80 |     {'F': [{'gender': 'F', 'name': 'Alice'}],
 81 |      'M': [{'gender': 'M', 'name': 'Bob'},
 82 |            {'gender': 'M', 'name': 'Charlie'}]}
 83 | 
 84 |     See Also:
 85 |         countby
 86 |     """
 87 |     if not callable(key):
 88 |         key = getter(key)
 89 |     d = collections.defaultdict(lambda: [].append)
 90 |     for item in seq:
 91 |         d[key(item)](item)
 92 |     rv = {}
 93 |     for k, v in iteritems(d):
 94 |         rv[k] = v.__self__
 95 |     return rv
 96 | 
 97 | 
 98 | def merge_sorted(*seqs, **kwargs):
 99 |     """ Merge and sort a collection of sorted collections
100 | 
101 |     This works lazily and only keeps one value from each iterable in memory.
102 | 
103 |     >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
104 |     [1, 2, 3, 4, 5, 6]
105 | 
106 |     >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
107 |     'aaabbbccc'
108 | 
109 |     The "key" function used to sort the input may be passed as a keyword.
110 | 
111 |     >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
112 |     [2, 1, 3, 3]
113 |     """
114 |     key = kwargs.get('key', None)
115 |     if key is None:
116 |         # heapq.merge does what we do below except by val instead of key(val)
117 |         return heapq.merge(*seqs)
118 |     else:
119 |         return _merge_sorted_key(seqs, key)
120 | 
121 | 
122 | def _merge_sorted_key(seqs, key):
123 |     # The commented code below shows an alternative (slower) implementation
124 |     # to apply a key function for sorting.
125 |     #
126 |     # mapper = lambda i, item: (key(item), i, item)
127 |     # keyiters = [map(partial(mapper, i), itr) for i, itr in
128 |     #             enumerate(seqs)]
129 |     # return (item for (item_key, i, item) in heapq.merge(*keyiters))
130 | 
131 |     # binary heap as a priority queue
132 |     pq = []
133 | 
134 |     # Initial population
135 |     for itnum, it in enumerate(map(iter, seqs)):
136 |         try:
137 |             item = next(it)
138 |             pq.append([key(item), itnum, item, it])
139 |         except StopIteration:
140 |             pass
141 |     heapq.heapify(pq)
142 | 
143 |     # Repeatedly yield and then repopulate from the same iterator
144 |     heapreplace = heapq.heapreplace
145 |     heappop = heapq.heappop
146 |     while len(pq) > 1:
147 |         try:
148 |             while True:
149 |                 # raises IndexError when pq is empty
150 |                 _, itnum, item, it = s = pq[0]
151 |                 yield item
152 |                 item = next(it)  # raises StopIteration when exhausted
153 |                 s[0] = key(item)
154 |                 s[2] = item
155 |                 heapreplace(pq, s)  # restore heap condition
156 |         except StopIteration:
157 |             heappop(pq)  # remove empty iterator
158 |     if pq:
159 |         # Much faster when only a single iterable remains
160 |         _, itnum, item, it = pq[0]
161 |         yield item
162 |         for item in it:
163 |             yield item
164 | 
165 | 
166 | def interleave(seqs, pass_exceptions=()):
167 |     """ Interleave a sequence of sequences
168 | 
169 |     >>> list(interleave([[1, 2], [3, 4]]))
170 |     [1, 3, 2, 4]
171 | 
172 |     >>> ''.join(interleave(('ABC', 'XY')))
173 |     'AXBYC'
174 | 
175 |     Both the individual sequences and the sequence of sequences may be infinite
176 | 
177 |     Returns a lazy iterator
178 |     """
179 |     iters = map(iter, seqs)
180 |     while iters:
181 |         newiters = []
182 |         for itr in iters:
183 |             try:
184 |                 yield next(itr)
185 |                 newiters.append(itr)
186 |             except (StopIteration,) + tuple(pass_exceptions):
187 |                 pass
188 |         iters = newiters
189 | 
190 | 
191 | def unique(seq, key=None):
192 |     """ Return only unique elements of a sequence
193 | 
194 |     >>> tuple(unique((1, 2, 3)))
195 |     (1, 2, 3)
196 |     >>> tuple(unique((1, 2, 1, 3)))
197 |     (1, 2, 3)
198 | 
199 |     Uniqueness can be defined by key keyword
200 | 
201 |     >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))
202 |     ('cat', 'mouse')
203 |     """
204 |     seen = set()
205 |     seen_add = seen.add
206 |     if key is None:
207 |         for item in seq:
208 |             if item not in seen:
209 |                 seen_add(item)
210 |                 yield item
211 |     else:  # calculate key
212 |         for item in seq:
213 |             val = key(item)
214 |             if val not in seen:
215 |                 seen_add(val)
216 |                 yield item
217 | 
218 | 
219 | def isiterable(x):
220 |     """ Is x iterable?
221 | 
222 |     >>> isiterable([1, 2, 3])
223 |     True
224 |     >>> isiterable('abc')
225 |     True
226 |     >>> isiterable(5)
227 |     False
228 |     """
229 |     try:
230 |         iter(x)
231 |         return True
232 |     except TypeError:
233 |         return False
234 | 
235 | 
236 | def isdistinct(seq):
237 |     """ All values in sequence are distinct
238 | 
239 |     >>> isdistinct([1, 2, 3])
240 |     True
241 |     >>> isdistinct([1, 2, 1])
242 |     False
243 | 
244 |     >>> isdistinct("Hello")
245 |     False
246 |     >>> isdistinct("World")
247 |     True
248 |     """
249 |     if iter(seq) is seq:
250 |         seen = set()
251 |         seen_add = seen.add
252 |         for item in seq:
253 |             if item in seen:
254 |                 return False
255 |             seen_add(item)
256 |         return True
257 |     else:
258 |         return len(seq) == len(set(seq))
259 | 
260 | 
261 | def take(n, seq):
262 |     """ The first n elements of a sequence
263 | 
264 |     >>> list(take(2, [10, 20, 30, 40, 50]))
265 |     [10, 20]
266 | 
267 |     See Also:
268 |         drop
269 |         tail
270 |     """
271 |     return itertools.islice(seq, n)
272 | 
273 | 
274 | def tail(n, seq):
275 |     """ The last n elements of a sequence
276 | 
277 |     >>> tail(2, [10, 20, 30, 40, 50])
278 |     [40, 50]
279 | 
280 |     See Also:
281 |         drop
282 |         take
283 |     """
284 |     try:
285 |         return seq[-n:]
286 |     except (TypeError, KeyError):
287 |         return tuple(collections.deque(seq, n))
288 | 
289 | 
290 | def drop(n, seq):
291 |     """ The sequence following the first n elements
292 | 
293 |     >>> list(drop(2, [10, 20, 30, 40, 50]))
294 |     [30, 40, 50]
295 | 
296 |     See Also:
297 |         take
298 |         tail
299 |     """
300 |     return itertools.islice(seq, n, None)
301 | 
302 | 
303 | def take_nth(n, seq):
304 |     """ Every nth item in seq
305 | 
306 |     >>> list(take_nth(2, [10, 20, 30, 40, 50]))
307 |     [10, 30, 50]
308 |     """
309 |     return itertools.islice(seq, 0, None, n)
310 | 
311 | 
312 | def first(seq):
313 |     """ The first element in a sequence
314 | 
315 |     >>> first('ABC')
316 |     'A'
317 |     """
318 |     return next(iter(seq))
319 | 
320 | 
321 | def second(seq):
322 |     """ The second element in a sequence
323 | 
324 |     >>> second('ABC')
325 |     'B'
326 |     """
327 |     return next(itertools.islice(seq, 1, None))
328 | 
329 | 
330 | def nth(n, seq):
331 |     """ The nth element in a sequence
332 | 
333 |     >>> nth(1, 'ABC')
334 |     'B'
335 |     """
336 |     if isinstance(seq, (tuple, list, collections.Sequence)):
337 |         return seq[n]
338 |     else:
339 |         return next(itertools.islice(seq, n, None))
340 | 
341 | 
342 | def last(seq):
343 |     """ The last element in a sequence
344 | 
345 |     >>> last('ABC')
346 |     'C'
347 |     """
348 |     return tail(1, seq)[0]
349 | 
350 | 
351 | rest = partial(drop, 1)
352 | 
353 | 
354 | def _get(ind, seq, default):
355 |     try:
356 |         return seq[ind]
357 |     except (KeyError, IndexError):
358 |         return default
359 | 
360 | 
361 | def get(ind, seq, default=no_default):
362 |     """ Get element in a sequence or dict
363 | 
364 |     Provides standard indexing
365 | 
366 |     >>> get(1, 'ABC')       # Same as 'ABC'[1]
367 |     'B'
368 | 
369 |     Pass a list to get multiple values
370 | 
371 |     >>> get([1, 2], 'ABC')  # ('ABC'[1], 'ABC'[2])
372 |     ('B', 'C')
373 | 
374 |     Works on any value that supports indexing/getitem
375 |     For example here we see that it works with dictionaries
376 | 
377 |     >>> phonebook = {'Alice':  '555-1234',
378 |     ...              'Bob':    '555-5678',
379 |     ...              'Charlie':'555-9999'}
380 |     >>> get('Alice', phonebook)
381 |     '555-1234'
382 | 
383 |     >>> get(['Alice', 'Bob'], phonebook)
384 |     ('555-1234', '555-5678')
385 | 
386 |     Provide a default for missing values
387 | 
388 |     >>> get(['Alice', 'Dennis'], phonebook, None)
389 |     ('555-1234', None)
390 | 
391 |     See Also:
392 |         pluck
393 |     """
394 |     try:
395 |         return seq[ind]
396 |     except TypeError:  # `ind` may be a list
397 |         if isinstance(ind, list):
398 |             if default is no_default:
399 |                 if len(ind) > 1:
400 |                     return operator.itemgetter(*ind)(seq)
401 |                 elif ind:
402 |                     return (seq[ind[0]],)
403 |                 else:
404 |                     return ()
405 |             else:
406 |                 return tuple(_get(i, seq, default) for i in ind)
407 |         elif default is not no_default:
408 |             return default
409 |         else:
410 |             raise
411 |     except (KeyError, IndexError):  # we know `ind` is not a list
412 |         if default is no_default:
413 |             raise
414 |         else:
415 |             return default
416 | 
417 | 
418 | def concat(seqs):
419 |     """ Concatenate zero or more iterables, any of which may be infinite.
420 | 
421 |     An infinite sequence will prevent the rest of the arguments from
422 |     being included.
423 | 
424 |     We use chain.from_iterable rather than chain(*seqs) so that seqs
425 |     can be a generator.
426 | 
427 |     >>> list(concat([[], [1], [2, 3]]))
428 |     [1, 2, 3]
429 | 
430 |     See also:
431 |         itertools.chain.from_iterable  equivalent
432 |     """
433 |     return itertools.chain.from_iterable(seqs)
434 | 
435 | 
436 | def concatv(*seqs):
437 |     """ Variadic version of concat
438 | 
439 |     >>> list(concatv([], ["a"], ["b", "c"]))
440 |     ['a', 'b', 'c']
441 | 
442 |     See also:
443 |         itertools.chain
444 |     """
445 |     return concat(seqs)
446 | 
447 | 
448 | def mapcat(func, seqs):
449 |     """ Apply func to each sequence in seqs, concatenating results.
450 | 
451 |     >>> list(mapcat(lambda s: [c.upper() for c in s],
452 |     ...             [["a", "b"], ["c", "d", "e"]]))
453 |     ['A', 'B', 'C', 'D', 'E']
454 |     """
455 |     return concat(map(func, seqs))
456 | 
457 | 
458 | def cons(el, seq):
459 |     """ Add el to beginning of (possibly infinite) sequence seq.
460 | 
461 |     >>> list(cons(1, [2, 3]))
462 |     [1, 2, 3]
463 |     """
464 |     yield el
465 |     for s in seq:
466 |         yield s
467 | 
468 | 
469 | def interpose(el, seq):
470 |     """ Introduce element between each pair of elements in seq
471 | 
472 |     >>> list(interpose("a", [1, 2, 3]))
473 |     [1, 'a', 2, 'a', 3]
474 |     """
475 |     combined = zip(itertools.repeat(el), seq)
476 |     return drop(1, concat(combined))
477 | 
478 | 
479 | def frequencies(seq):
480 |     """ Find number of occurrences of each value in seq
481 | 
482 |     >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat'])  #doctest: +SKIP
483 |     {'cat': 3, 'ox': 1, 'pig': 2}
484 | 
485 |     See Also:
486 |         countby
487 |         groupby
488 |     """
489 |     d = collections.defaultdict(int)
490 |     for item in seq:
491 |         d[item] += 1
492 |     return dict(d)
493 | 
494 | 
495 | def reduceby(key, binop, seq, init=no_default):
496 |     """ Perform a simultaneous groupby and reduction
497 | 
498 |     The computation:
499 | 
500 |     >>> result = reduceby(key, binop, seq, init)      # doctest: +SKIP
501 | 
502 |     is equivalent to the following:
503 | 
504 |     >>> def reduction(group):                           # doctest: +SKIP
505 |     ...     return reduce(binop, group, init)           # doctest: +SKIP
506 | 
507 |     >>> groups = groupby(key, seq)                    # doctest: +SKIP
508 |     >>> result = valmap(reduction, groups)              # doctest: +SKIP
509 | 
510 |     But the former does not build the intermediate groups, allowing it to
511 |     operate in much less space.  This makes it suitable for larger datasets
512 |     that do not fit comfortably in memory
513 | 
514 |     The ``init`` keyword argument is the default initialization of the
515 |     reduction.  This can be either a constant value like ``0`` or a callable
516 |     like ``lambda : 0`` as might be used in ``defaultdict``.
517 | 
518 |     Simple Examples
519 |     ---------------
520 | 
521 |     >>> from operator import add, mul
522 |     >>> iseven = lambda x: x % 2 == 0
523 | 
524 |     >>> data = [1, 2, 3, 4, 5]
525 | 
526 |     >>> reduceby(iseven, add, data)  # doctest: +SKIP
527 |     {False: 9, True: 6}
528 | 
529 |     >>> reduceby(iseven, mul, data)  # doctest: +SKIP
530 |     {False: 15, True: 8}
531 | 
532 |     Complex Example
533 |     ---------------
534 | 
535 |     >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
536 |     ...             {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
537 |     ...             {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
538 |     ...             {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
539 | 
540 |     >>> reduceby('state',                        # doctest: +SKIP
541 |     ...          lambda acc, x: acc + x['cost'],
542 |     ...          projects, 0)
543 |     {'CA': 1200000, 'IL': 2100000}
544 | 
545 |     Example Using ``init``
546 |     ----------------------
547 | 
548 |     >>> def set_add(s, i):
549 |     ...     s.add(i)
550 |     ...     return s
551 | 
552 |     >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set)  # doctest: +SKIP
553 |     {True:  set([2, 4]),
554 |      False: set([1, 3])}
555 |     """
556 |     if init is not no_default and not callable(init):
557 |         _init = init
558 |         init = lambda: _init
559 |     if not callable(key):
560 |         key = getter(key)
561 |     d = {}
562 |     for item in seq:
563 |         k = key(item)
564 |         if k not in d:
565 |             if init is no_default:
566 |                 d[k] = item
567 |                 continue
568 |             else:
569 |                 d[k] = init()
570 |         d[k] = binop(d[k], item)
571 |     return d
572 | 
573 | 
574 | def iterate(func, x):
575 |     """ Repeatedly apply a function func onto an original input
576 | 
577 |     Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..
578 | 
579 |     >>> def inc(x):  return x + 1
580 |     >>> counter = iterate(inc, 0)
581 |     >>> next(counter)
582 |     0
583 |     >>> next(counter)
584 |     1
585 |     >>> next(counter)
586 |     2
587 | 
588 |     >>> double = lambda x: x * 2
589 |     >>> powers_of_two = iterate(double, 1)
590 |     >>> next(powers_of_two)
591 |     1
592 |     >>> next(powers_of_two)
593 |     2
594 |     >>> next(powers_of_two)
595 |     4
596 |     >>> next(powers_of_two)
597 |     8
598 | 
599 |     """
600 |     while True:
601 |         yield x
602 |         x = func(x)
603 | 
604 | 
605 | def sliding_window(n, seq):
606 |     """ A sequence of overlapping subsequences
607 | 
608 |     >>> list(sliding_window(2, [1, 2, 3, 4]))
609 |     [(1, 2), (2, 3), (3, 4)]
610 | 
611 |     This function creates a sliding window suitable for transformations like
612 |     sliding means / smoothing
613 | 
614 |     >>> mean = lambda seq: float(sum(seq)) / len(seq)
615 |     >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
616 |     [1.5, 2.5, 3.5]
617 |     """
618 |     it = iter(seq)
619 |     # An efficient FIFO data structure with maximum length
620 |     d = collections.deque(itertools.islice(it, n), n)
621 |     if len(d) != n:
622 |         raise StopIteration()
623 |     d_append = d.append
624 |     for item in it:
625 |         yield tuple(d)
626 |         d_append(item)
627 |     yield tuple(d)
628 | 
629 | 
630 | no_pad = '__no__pad__'
631 | 
632 | 
633 | def partition(n, seq, pad=no_pad):
634 |     """ Partition sequence into tuples of length n
635 | 
636 |     >>> list(partition(2, [1, 2, 3, 4]))
637 |     [(1, 2), (3, 4)]
638 | 
639 |     If the length of ``seq`` is not evenly divisible by ``n``, the final tuple
640 |     is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:
641 | 
642 |     >>> list(partition(2, [1, 2, 3, 4, 5]))
643 |     [(1, 2), (3, 4)]
644 | 
645 |     >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))
646 |     [(1, 2), (3, 4), (5, None)]
647 | 
648 |     See Also:
649 |         partition_all
650 |     """
651 |     args = [iter(seq)] * n
652 |     if pad is no_pad:
653 |         return zip(*args)
654 |     else:
655 |         return zip_longest(*args, fillvalue=pad)
656 | 
657 | 
658 | def partition_all(n, seq):
659 |     """ Partition all elements of sequence into tuples of length at most n
660 | 
661 |     The final tuple may be shorter to accommodate extra elements.
662 | 
663 |     >>> list(partition_all(2, [1, 2, 3, 4]))
664 |     [(1, 2), (3, 4)]
665 | 
666 |     >>> list(partition_all(2, [1, 2, 3, 4, 5]))
667 |     [(1, 2), (3, 4), (5,)]
668 | 
669 |     See Also:
670 |         partition
671 |     """
672 |     args = [iter(seq)] * n
673 |     it = zip_longest(*args, fillvalue=no_pad)
674 |     prev = next(it)
675 |     for item in it:
676 |         yield prev
677 |         prev = item
678 |     if prev[-1] is no_pad:
679 |         yield prev[:prev.index(no_pad)]
680 |     else:
681 |         yield prev
682 | 
683 | 
684 | def count(seq):
685 |     """ Count the number of items in seq
686 | 
687 |     Like the builtin ``len`` but works on lazy sequencies.
688 | 
689 |     Not to be confused with ``itertools.count``
690 | 
691 |     See also:
692 |         len
693 |     """
694 |     if hasattr(seq, '__len__'):
695 |         return len(seq)
696 |     return sum(1 for i in seq)
697 | 
698 | 
699 | def pluck(ind, seqs, default=no_default):
700 |     """ plucks an element or several elements from each item in a sequence.
701 | 
702 |     ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
703 |     elements of each item in the sequence.
704 | 
705 |     This is equivalent to running `map(curried.get(ind), seqs)`
706 | 
707 |     ``ind`` can be either a single string/index or a sequence of
708 |     strings/indices.
709 |     ``seqs`` should be sequence containing sequences or dicts.
710 | 
711 |     e.g.
712 | 
713 |     >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
714 |     >>> list(pluck('name', data))
715 |     ['Cheese', 'Pies']
716 |     >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
717 |     [(1, 2), (4, 5)]
718 | 
719 |     See Also:
720 |         get
721 |         map
722 |     """
723 |     if default is no_default:
724 |         get = getter(ind)
725 |         return map(get, seqs)
726 |     elif isinstance(ind, list):
727 |         return (tuple(_get(item, seq, default) for item in ind)
728 |                 for seq in seqs)
729 |     return (_get(ind, seq, default) for seq in seqs)
730 | 
731 | 
732 | def getter(index):
733 |     if isinstance(index, list):
734 |         if len(index) == 1:
735 |             index = index[0]
736 |             return lambda x: (x[index],)
737 |         elif index:
738 |             return operator.itemgetter(*index)
739 |         else:
740 |             return lambda x: ()
741 |     else:
742 |         return operator.itemgetter(index)
743 | 
744 | 
745 | def join(leftkey, leftseq, rightkey, rightseq,
746 |          left_default=no_default, right_default=no_default):
747 |     """ Join two sequences on common attributes
748 | 
749 |     This is a semi-streaming operation.  The LEFT sequence is fully evaluated
750 |     and placed into memory.  The RIGHT sequence is evaluated lazily and so can
751 |     be arbitrarily large.
752 | 
753 |     >>> friends = [('Alice', 'Edith'),
754 |     ...            ('Alice', 'Zhao'),
755 |     ...            ('Edith', 'Alice'),
756 |     ...            ('Zhao', 'Alice'),
757 |     ...            ('Zhao', 'Edith')]
758 | 
759 |     >>> cities = [('Alice', 'NYC'),
760 |     ...           ('Alice', 'Chicago'),
761 |     ...           ('Dan', 'Syndey'),
762 |     ...           ('Edith', 'Paris'),
763 |     ...           ('Edith', 'Berlin'),
764 |     ...           ('Zhao', 'Shanghai')]
765 | 
766 |     >>> # Vacation opportunities
767 |     >>> # In what cities do people have friends?
768 |     >>> result = join(second, friends,
769 |     ...               first, cities)
770 |     >>> for ((a, b), (c, d)) in sorted(unique(result)):
771 |     ...     print((a, d))
772 |     ('Alice', 'Berlin')
773 |     ('Alice', 'Paris')
774 |     ('Alice', 'Shanghai')
775 |     ('Edith', 'Chicago')
776 |     ('Edith', 'NYC')
777 |     ('Zhao', 'Chicago')
778 |     ('Zhao', 'NYC')
779 |     ('Zhao', 'Berlin')
780 |     ('Zhao', 'Paris')
781 | 
782 |     Specify outer joins with keyword arguments ``left_default`` and/or
783 |     ``right_default``.  Here is a full outer join in which unmatched elements
784 |     are paired with None.
785 | 
786 |     >>> identity = lambda x: x
787 |     >>> list(join(identity, [1, 2, 3],
788 |     ...           identity, [2, 3, 4],
789 |     ...           left_default=None, right_default=None))
790 |     [(2, 2), (3, 3), (None, 4), (1, None)]
791 | 
792 |     Usually the key arguments are callables to be applied to the sequences.  If
793 |     the keys are not obviously callable then it is assumed that indexing was
794 |     intended, e.g. the following is a legal change
795 | 
796 |     >>> # result = join(second, friends, first, cities)
797 |     >>> result = join(1, friends, 0, cities)  # doctest: +SKIP
798 |     """
799 |     if not callable(leftkey):
800 |         leftkey = getter(leftkey)
801 |     if not callable(rightkey):
802 |         rightkey = getter(rightkey)
803 | 
804 |     d = groupby(leftkey, leftseq)
805 |     seen_keys = set()
806 | 
807 |     for item in rightseq:
808 |         key = rightkey(item)
809 |         seen_keys.add(key)
810 |         try:
811 |             left_matches = d[key]
812 |             for match in left_matches:
813 |                 yield (match, item)
814 |         except KeyError:
815 |             if left_default is not no_default:
816 |                 yield (left_default, item)
817 | 
818 |     if right_default is not no_default:
819 |         for key, matches in d.items():
820 |             if key not in seen_keys:
821 |                 for match in matches:
822 |                     yield (match, right_default)
823 | 
824 | 
825 | def diff(*seqs, **kwargs):
826 |     """ Return those items that differ between sequences
827 | 
828 |     >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
829 |     [(3, 10)]
830 | 
831 |     Shorter sequences may be padded with a ``default`` value:
832 | 
833 |     >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
834 |     [(3, 10), (None, 100)]
835 | 
836 |     A ``key`` function may also be applied to each item to use during
837 |     comparisons:
838 | 
839 |     >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
840 |     [('bananas', 'Oranges')]
841 |     """
842 |     N = len(seqs)
843 |     if N == 1 and isinstance(seqs[0], list):
844 |         seqs = seqs[0]
845 |         N = len(seqs)
846 |     if N < 2:
847 |         raise TypeError('Too few sequences given (min 2 required)')
848 |     default = kwargs.get('default', no_default)
849 |     if default is no_default:
850 |         iters = zip(*seqs)
851 |     else:
852 |         iters = zip_longest(*seqs, fillvalue=default)
853 |     key = kwargs.get('key', None)
854 |     if key is None:
855 |         for items in iters:
856 |             if items.count(items[0]) != N:
857 |                 yield items
858 |     else:
859 |         for items in iters:
860 |             vals = tuple(map(key, items))
861 |             if vals.count(vals[0]) != N:
862 |                 yield items
863 | 
864 | 
865 | def topk(k, seq, key=None):
866 |     """
867 |     Find the k largest elements of a sequence
868 | 
869 |     Operates lazily in ``n*log(k)`` time
870 | 
871 |     >>> topk(2, [1, 100, 10, 1000])
872 |     (1000, 100)
873 | 
874 |     Use a key function to change sorted order
875 | 
876 |     >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)
877 |     ('Charlie', 'Alice')
878 | 
879 |     See also:
880 |         heapq.nlargest
881 |     """
882 |     if key is not None and not callable(key):
883 |         key = getter(key)
884 |     return tuple(heapq.nlargest(k, seq, key=key))
885 | 
886 | 
887 | def peek(seq):
888 |     """ Retrieve the next element of a sequence
889 | 
890 |     Returns the first element and an iterable equivalent to the original
891 |     sequence, still having the element retrieved.
892 | 
893 |     >>> seq = [0, 1, 2, 3, 4]
894 |     >>> first, seq = peek(seq)
895 |     >>> first
896 |     0
897 |     >>> list(seq)
898 |     [0, 1, 2, 3, 4]
899 | 
900 |     """
901 |     iterator = iter(seq)
902 |     item = next(iterator)
903 |     return item, itertools.chain([item], iterator)
904 | 


--------------------------------------------------------------------------------
/toolz/recipes.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | from .itertoolz import frequencies, pluck, getter
 3 | from .compatibility import map
 4 | 
 5 | 
 6 | __all__ = ('countby', 'partitionby')
 7 | 
 8 | 
 9 | def countby(key, seq):
10 |     """ Count elements of a collection by a key function
11 | 
12 |     >>> countby(len, ['cat', 'mouse', 'dog'])
13 |     {3: 2, 5: 1}
14 | 
15 |     >>> def iseven(x): return x % 2 == 0
16 |     >>> countby(iseven, [1, 2, 3])  # doctest:+SKIP
17 |     {True: 1, False: 2}
18 | 
19 |     See Also:
20 |         groupby
21 |     """
22 |     if not callable(key):
23 |         key = getter(key)
24 |     return frequencies(map(key, seq))
25 | 
26 | 
27 | def partitionby(func, seq):
28 |     """ Partition a sequence according to a function
29 | 
30 |     Partition `s` into a sequence of lists such that, when traversing
31 |     `s`, every time the output of `func` changes a new list is started
32 |     and that and subsequent items are collected into that list.
33 | 
34 |     >>> is_space = lambda c: c == " "
35 |     >>> list(partitionby(is_space, "I have space"))
36 |     [('I',), (' ',), ('h', 'a', 'v', 'e'), (' ',), ('s', 'p', 'a', 'c', 'e')]
37 | 
38 |     >>> is_large = lambda x: x > 10
39 |     >>> list(partitionby(is_large, [1, 2, 1, 99, 88, 33, 99, -1, 5]))
40 |     [(1, 2, 1), (99, 88, 33, 99), (-1, 5)]
41 | 
42 |     See also:
43 |         partition
44 |         groupby
45 |         itertools.groupby
46 |     """
47 |     return map(tuple, pluck(1, itertools.groupby(seq, key=func)))
48 | 


--------------------------------------------------------------------------------
/toolz/sandbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .core import EqualityHashKey, unzip
2 | from .parallel import fold
3 | 


--------------------------------------------------------------------------------
/toolz/sandbox/core.py:
--------------------------------------------------------------------------------
  1 | from toolz.itertoolz import getter, cons, pluck
  2 | from itertools import tee, starmap
  3 | 
  4 | 
  5 | # See #166: https://github.com/pytoolz/toolz/issues/166
  6 | # See #173: https://github.com/pytoolz/toolz/pull/173
  7 | class EqualityHashKey(object):
  8 |     """ Create a hash key that uses equality comparisons between items.
  9 | 
 10 |     This may be used to create hash keys for otherwise unhashable types:
 11 | 
 12 |     >>> from toolz import curry
 13 |     >>> EqualityHashDefault = curry(EqualityHashKey, None)
 14 |     >>> set(map(EqualityHashDefault, [[], (), [1], [1]]))  # doctest: +SKIP
 15 |     {=[]=, =()=, =[1]=}
 16 | 
 17 |     **Caution:** adding N ``EqualityHashKey`` items to a hash container
 18 |     may require O(N**2) operations, not O(N) as for typical hashable types.
 19 |     Therefore, a suitable key function such as ``tuple`` or ``frozenset``
 20 |     is usually preferred over using ``EqualityHashKey`` if possible.
 21 | 
 22 |     The ``key`` argument to ``EqualityHashKey`` should be a function or
 23 |     index that returns a hashable object that effectively distinguishes
 24 |     unequal items.  This helps avoid the poor scaling that occurs when
 25 |     using the default key.  For example, the above example can be improved
 26 |     by using a key function that distinguishes items by length or type:
 27 | 
 28 |     >>> EqualityHashLen = curry(EqualityHashKey, len)
 29 |     >>> EqualityHashType = curry(EqualityHashKey, type)  # this works too
 30 |     >>> set(map(EqualityHashLen, [[], (), [1], [1]]))  # doctest: +SKIP
 31 |     {=[]=, =()=, =[1]=}
 32 | 
 33 |     ``EqualityHashKey`` is convenient to use when a suitable key function
 34 |     is complicated or unavailable.  For example, the following returns all
 35 |     unique values based on equality:
 36 | 
 37 |     >>> from toolz import unique
 38 |     >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}]
 39 |     >>> list(unique(vals, key=EqualityHashDefault))
 40 |     [[], (), [1], [2], {}]
 41 | 
 42 |     **Warning:** don't change the equality value of an item already in a hash
 43 |     containter.  Unhashable types are unhashable for a reason.  For example:
 44 | 
 45 |     >>> L1 = [1] ; L2 = [2]
 46 |     >>> s = set(map(EqualityHashDefault, [L1, L2]))
 47 |     >>> s  # doctest: +SKIP
 48 |     {=[1]=, =[2]=}
 49 | 
 50 |     >>> L1[0] = 2  # Don't do this!  ``s`` now has duplicate items!
 51 |     >>> s  # doctest: +SKIP
 52 |     {=[2]=, =[2]=}
 53 | 
 54 |     Although this may appear problematic, immutable data types is a common
 55 |     idiom in functional programming, and``EqualityHashKey`` easily allows
 56 |     the same idiom to be used by convention rather than strict requirement.
 57 | 
 58 |     See Also:
 59 |         identity
 60 |     """
 61 |     __slots__ = ['item', 'key']
 62 |     _default_hashkey = '__default__hashkey__'
 63 | 
 64 |     def __init__(self, key, item):
 65 |         if key is None:
 66 |             self.key = self._default_hashkey
 67 |         elif not callable(key):
 68 |             self.key = getter(key)
 69 |         else:
 70 |             self.key = key
 71 |         self.item = item
 72 | 
 73 |     def __hash__(self):
 74 |         if self.key == self._default_hashkey:
 75 |             val = self.key
 76 |         else:
 77 |             val = self.key(self.item)
 78 |         return hash(val)
 79 | 
 80 |     def __eq__(self, other):
 81 |         try:
 82 |             return (self._default_hashkey == other._default_hashkey and
 83 |                     self.item == other.item)
 84 |         except AttributeError:
 85 |             return False
 86 | 
 87 |     def __ne__(self, other):
 88 |         return not self.__eq__(other)
 89 | 
 90 |     def __str__(self):
 91 |         return '=%s=' % str(self.item)
 92 | 
 93 |     def __repr__(self):
 94 |         return '=%s=' % repr(self.item)
 95 | 
 96 | 
 97 | # See issue #293: https://github.com/pytoolz/toolz/issues/239
 98 | def unzip(seq):
 99 |     """Inverse of ``zip``
100 | 
101 |     >>> a, b = unzip([('a', 1), ('b', 2)])
102 |     >>> list(a)
103 |     ['a', 'b']
104 |     >>> list(b)
105 |     [1, 2]
106 | 
107 |     Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this
108 |     implementation can handle a finite sequence of infinite sequences.
109 | 
110 |     Caveats:
111 | 
112 |     * The implementation uses ``tee``, and so can use a significant amount
113 |       of auxiliary storage if the resulting iterators are consumed at
114 |       different times.
115 | 
116 |     * The top level sequence cannot be infinite.
117 | 
118 |     """
119 | 
120 |     seq = iter(seq)
121 | 
122 |     # Check how many iterators we need
123 |     try:
124 |         first = tuple(next(seq))
125 |     except StopIteration:
126 |         return tuple()
127 | 
128 |     # and create them
129 |     niters = len(first)
130 |     seqs = tee(cons(first, seq), niters)
131 | 
132 |     return tuple(starmap(pluck, enumerate(seqs)))
133 | 


--------------------------------------------------------------------------------
/toolz/sandbox/parallel.py:
--------------------------------------------------------------------------------
 1 | from toolz.itertoolz import partition_all
 2 | from toolz.compatibility import reduce, map
 3 | from toolz.utils import no_default
 4 | 
 5 | 
 6 | def fold(binop, seq, default=no_default, map=map, chunksize=128, combine=None):
 7 |     """
 8 |     Reduce without guarantee of ordered reduction.
 9 | 
10 |     inputs:
11 | 
12 |     ``binop``     - associative operator. The associative property allows us to
13 |                     leverage a parallel map to perform reductions in parallel.
14 |     ``seq``       - a sequence to be aggregated
15 |     ``default``   - an identity element like 0 for ``add`` or 1 for mul
16 | 
17 |     ``map``       - an implementation of ``map``. This may be parallel and
18 |                     determines how work is distributed.
19 |     ``chunksize`` - Number of elements of ``seq`` that should be handled
20 |                     within a single function call
21 |     ``combine``   - Binary operator to combine two intermediate results.
22 |                     If ``binop`` is of type (total, item) -> total
23 |                     then ``combine`` is of type (total, total) -> total
24 |                     Defaults to ``binop`` for common case of operators like add
25 | 
26 |     Fold chunks up the collection into blocks of size ``chunksize`` and then
27 |     feeds each of these to calls to ``reduce``. This work is distributed
28 |     with a call to ``map``, gathered back and then refolded to finish the
29 |     computation. In this way ``fold`` specifies only how to chunk up data but
30 |     leaves the distribution of this work to an externally provided ``map``
31 |     function. This function can be sequential or rely on multithreading,
32 |     multiprocessing, or even distributed solutions.
33 | 
34 |     If ``map`` intends to serialize functions it should be prepared to accept
35 |     and serialize lambdas. Note that the standard ``pickle`` module fails
36 |     here.
37 | 
38 |     Example
39 |     -------
40 | 
41 |     >>> # Provide a parallel map to accomplish a parallel sum
42 |     >>> from operator import add
43 |     >>> fold(add, [1, 2, 3, 4], chunksize=2, map=map)
44 |     10
45 |     """
46 |     if combine is None:
47 |         combine = binop
48 | 
49 |     chunks = partition_all(chunksize, seq)
50 | 
51 |     # Evaluate sequence in chunks via map
52 |     if default is no_default:
53 |         results = map(lambda chunk: reduce(binop, chunk), chunks)
54 |     else:
55 |         results = map(lambda chunk: reduce(binop, chunk, default), chunks)
56 | 
57 |     results = list(results)  # TODO: Support complete laziness
58 | 
59 |     if len(results) == 1:    # Return completed result
60 |         return results[0]
61 |     else:                    # Recurse to reaggregate intermediate results
62 |         return fold(combine, results, map=map, chunksize=chunksize)
63 | 


--------------------------------------------------------------------------------
/toolz/sandbox/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | from toolz import curry, unique, first, take
  2 | from toolz.sandbox.core import EqualityHashKey, unzip
  3 | from itertools import count, repeat
  4 | from toolz.compatibility import map, zip
  5 | 
  6 | 
  7 | def test_EqualityHashKey_default_key():
  8 |     EqualityHashDefault = curry(EqualityHashKey, None)
  9 |     L1 = [1]
 10 |     L2 = [2]
 11 |     data1 = [L1, L1, L2, [], [], [1], [2], {}, ()]
 12 |     set1 = set(map(EqualityHashDefault, data1))
 13 |     set2 = set(map(EqualityHashDefault, [[], [1], [2], {}, ()]))
 14 |     assert set1 == set2
 15 |     assert len(set1) == 5
 16 | 
 17 |     # Test that ``EqualityHashDefault(item)`` is distinct from ``item``
 18 |     T0 = ()
 19 |     T1 = (1,)
 20 |     data2 = list(map(EqualityHashDefault, [T0, T0, T1, T1, (), (1,)]))
 21 |     data2.extend([T0, T1, (), (1,)])
 22 |     set3 = set(data2)
 23 |     assert set3 == set([(), (1,), EqualityHashDefault(()),
 24 |                         EqualityHashDefault((1,))])
 25 |     assert len(set3) == 4
 26 |     assert EqualityHashDefault(()) in set3
 27 |     assert EqualityHashDefault((1,)) in set3
 28 | 
 29 |     # Miscellaneous
 30 |     E1 = EqualityHashDefault(L1)
 31 |     E2 = EqualityHashDefault(L2)
 32 |     assert str(E1) == '=[1]='
 33 |     assert repr(E1) == '=[1]='
 34 |     assert E1 != E2
 35 |     assert not (E1 == E2)
 36 |     assert E1 == EqualityHashDefault(L1)
 37 |     assert not (E1 != EqualityHashDefault(L1))
 38 |     assert E1 != L1
 39 |     assert not (E1 == L1)
 40 | 
 41 | 
 42 | def test_EqualityHashKey_callable_key():
 43 |     # Common simple hash key functions.
 44 |     EqualityHashLen = curry(EqualityHashKey, len)
 45 |     EqualityHashType = curry(EqualityHashKey, type)
 46 |     EqualityHashId = curry(EqualityHashKey, id)
 47 |     EqualityHashFirst = curry(EqualityHashKey, first)
 48 |     data1 = [[], [1], (), (1,), {}, {1: 2}]
 49 |     data2 = [[1, 2], (1, 2), (1, 3), [1, 3], [2, 1], {1: 2}]
 50 |     assert list(unique(data1*3, key=EqualityHashLen)) == data1
 51 |     assert list(unique(data2*3, key=EqualityHashLen)) == data2
 52 |     assert list(unique(data1*3, key=EqualityHashType)) == data1
 53 |     assert list(unique(data2*3, key=EqualityHashType)) == data2
 54 |     assert list(unique(data1*3, key=EqualityHashId)) == data1
 55 |     assert list(unique(data2*3, key=EqualityHashId)) == data2
 56 |     assert list(unique(data2*3, key=EqualityHashFirst)) == data2
 57 | 
 58 | 
 59 | def test_EqualityHashKey_index_key():
 60 |     d1 = {'firstname': 'Alice', 'age': 21, 'data': {}}
 61 |     d2 = {'firstname': 'Alice', 'age': 34, 'data': {}}
 62 |     d3a = {'firstname': 'Bob', 'age': 56, 'data': {}}
 63 |     d3b = {'firstname': 'Bob', 'age': 56, 'data': {}}
 64 |     EqualityHashFirstname = curry(EqualityHashKey, 'firstname')
 65 |     assert list(unique(3*[d1, d2, d3a, d3b],
 66 |                        key=EqualityHashFirstname)) == [d1, d2, d3a]
 67 |     EqualityHashFirstnameAge = curry(EqualityHashKey, ['firstname', 'age'])
 68 |     assert list(unique(3*[d1, d2, d3a, d3b],
 69 |                        key=EqualityHashFirstnameAge)) == [d1, d2, d3a]
 70 |     list1 = [0] * 10
 71 |     list2 = [0] * 100
 72 |     list3a = [1] * 10
 73 |     list3b = [1] * 10
 74 |     EqualityHash0 = curry(EqualityHashKey, 0)
 75 |     assert list(unique(3*[list1, list2, list3a, list3b],
 76 |                        key=EqualityHash0)) == [list1, list2, list3a]
 77 | 
 78 | 
 79 | def test_unzip():
 80 |     def _to_lists(seq, n=10):
 81 |         """iter of iters -> finite list of finite lists
 82 |         """
 83 |         def initial(s):
 84 |             return list(take(n, s))
 85 | 
 86 |         return initial(map(initial, seq))
 87 | 
 88 |     def _assert_initial_matches(a, b, n=10):
 89 |         assert list(take(n, a)) == list(take(n, b))
 90 | 
 91 |     # Unzips a simple list correctly
 92 |     assert _to_lists(unzip([('a', 1), ('b', 2), ('c', 3)])) \
 93 |         == [['a', 'b', 'c'], [1, 2, 3]]
 94 | 
 95 |     # Can handle a finite number of infinite iterators (the naive unzip
 96 |     # implementation `zip(*args)` impelementation fails on this example).
 97 |     a, b, c = unzip(zip(count(1), repeat(0), repeat(1)))
 98 |     _assert_initial_matches(a, count(1))
 99 |     _assert_initial_matches(b, repeat(0))
100 |     _assert_initial_matches(c, repeat(1))
101 | 
102 |     # Sensibly handles empty input
103 |     assert list(unzip(zip([]))) == []
104 | 


--------------------------------------------------------------------------------
/toolz/sandbox/tests/test_parallel.py:
--------------------------------------------------------------------------------
 1 | from toolz.sandbox.parallel import fold
 2 | from toolz import reduce
 3 | from operator import add
 4 | 
 5 | 
 6 | def test_fold():
 7 |     assert fold(add, range(10), 0) == reduce(add, range(10), 0)
 8 |     assert fold(add, range(10), 0, chunksize=2) == reduce(add, range(10), 0)
 9 |     assert fold(add, range(10)) == fold(add, range(10), 0)
10 | 
11 |     def setadd(s, item):
12 |         s = s.copy()
13 |         s.add(item)
14 |         return s
15 | 
16 |     assert fold(setadd, [1, 2, 3], set()) == set((1, 2, 3))
17 |     assert (fold(setadd, [1, 2, 3], set(), chunksize=2, combine=set.union)
18 |             == set((1, 2, 3)))
19 | 


--------------------------------------------------------------------------------
/toolz/tests/test_compatibility.py:
--------------------------------------------------------------------------------
 1 | from toolz.compatibility import map, filter, iteritems, iterkeys, itervalues
 2 | 
 3 | 
 4 | def test_map_filter_are_lazy():
 5 |     def bad(x):
 6 |         raise Exception()
 7 |     map(bad, [1, 2, 3])
 8 |     filter(bad, [1, 2, 3])
 9 | 
10 | 
11 | def test_dict_iteration():
12 |     d = {'a': 1, 'b': 2, 'c': 3}
13 |     assert not isinstance(iteritems(d), list)
14 |     assert not isinstance(iterkeys(d), list)
15 |     assert not isinstance(itervalues(d), list)
16 |     assert set(iteritems(d)) == set(d.items())
17 |     assert set(iterkeys(d)) == set(d.keys())
18 |     assert set(itervalues(d)) == set(d.values())
19 | 


--------------------------------------------------------------------------------
/toolz/tests/test_curried.py:
--------------------------------------------------------------------------------
 1 | import toolz
 2 | import toolz.curried
 3 | from toolz.curried import (take, first, second, sorted, merge_with, reduce,
 4 |                            merge, operator as cop)
 5 | from collections import defaultdict
 6 | from operator import add
 7 | 
 8 | 
 9 | def test_take():
10 |     assert list(take(2)([1, 2, 3])) == [1, 2]
11 | 
12 | 
13 | def test_first():
14 |     assert first is toolz.itertoolz.first
15 | 
16 | 
17 | def test_merge():
18 |     assert merge(factory=lambda: defaultdict(int))({1: 1}) == {1: 1}
19 |     assert merge({1: 1}) == {1: 1}
20 |     assert merge({1: 1}, factory=lambda: defaultdict(int)) == {1: 1}
21 | 
22 | 
23 | def test_merge_with():
24 |     assert merge_with(sum)({1: 1}, {1: 2}) == {1: 3}
25 | 
26 | 
27 | def test_merge_with_list():
28 |     assert merge_with(sum, [{'a': 1}, {'a': 2}]) == {'a': 3}
29 | 
30 | 
31 | def test_sorted():
32 |     assert sorted(key=second)([(1, 2), (2, 1)]) == [(2, 1), (1, 2)]
33 | 
34 | 
35 | def test_reduce():
36 |     assert reduce(add)((1, 2, 3)) == 6
37 | 
38 | 
39 | def test_module_name():
40 |     assert toolz.curried.__name__ == 'toolz.curried'
41 | 
42 | 
43 | def test_curried_operator():
44 |     for k, v in vars(cop).items():
45 |         if not callable(v):
46 |             continue
47 | 
48 |         if not isinstance(v, toolz.curry):
49 |             try:
50 |                 # Make sure it is unary
51 |                 # We cannot use isunary because it might be defined in C.
52 |                 v(1)
53 |             except TypeError:
54 |                 raise AssertionError(
55 |                     'toolz.curried.operator.%s is not curried!' % k,
56 |                 )
57 | 
58 |     # Make sure this isn't totally empty.
59 |     assert len(set(vars(cop)) & set(['add', 'sub', 'mul'])) == 3
60 | 


--------------------------------------------------------------------------------
/toolz/tests/test_dicttoolz.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict as _defaultdict
  2 | from toolz.dicttoolz import (merge, merge_with, valmap, keymap, update_in,
  3 |                              assoc, dissoc, keyfilter, valfilter, itemmap,
  4 |                              itemfilter)
  5 | from toolz.utils import raises
  6 | from toolz.compatibility import PY3
  7 | 
  8 | 
  9 | def inc(x):
 10 |     return x + 1
 11 | 
 12 | 
 13 | def iseven(i):
 14 |     return i % 2 == 0
 15 | 
 16 | 
 17 | class TestDict(object):
 18 |     """Test typical usage: dict inputs, no factory keyword.
 19 | 
 20 |     Class attributes:
 21 |         D: callable that inputs a dict and creates or returns a MutableMapping
 22 |         kw: kwargs dict to specify "factory" keyword (if applicable)
 23 |     """
 24 |     D = dict
 25 |     kw = {}
 26 | 
 27 |     def test_merge(self):
 28 |         D, kw = self.D, self.kw
 29 |         assert merge(D({1: 1, 2: 2}), D({3: 4}), **kw) == D({1: 1, 2: 2, 3: 4})
 30 | 
 31 |     def test_merge_iterable_arg(self):
 32 |         D, kw = self.D, self.kw
 33 |         assert merge([D({1: 1, 2: 2}), D({3: 4})], **kw) == D({1: 1, 2: 2, 3: 4})
 34 | 
 35 |     def test_merge_with(self):
 36 |         D, kw = self.D, self.kw
 37 |         dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20})
 38 |         assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22})
 39 |         assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20)})
 40 | 
 41 |         dicts = D({1: 1, 2: 2, 3: 3}), D({1: 10, 2: 20})
 42 |         assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22, 3: 3})
 43 |         assert merge_with(tuple, *dicts, **kw) == D({1: (1, 10), 2: (2, 20), 3: (3,)})
 44 | 
 45 |         assert not merge_with(sum)
 46 | 
 47 |     def test_merge_with_iterable_arg(self):
 48 |         D, kw = self.D, self.kw
 49 |         dicts = D({1: 1, 2: 2}), D({1: 10, 2: 20})
 50 |         assert merge_with(sum, *dicts, **kw) == D({1: 11, 2: 22})
 51 |         assert merge_with(sum, dicts, **kw) == D({1: 11, 2: 22})
 52 |         assert merge_with(sum, iter(dicts), **kw) == D({1: 11, 2: 22})
 53 | 
 54 |     def test_valmap(self):
 55 |         D, kw = self.D, self.kw
 56 |         assert valmap(inc, D({1: 1, 2: 2}), **kw) == D({1: 2, 2: 3})
 57 | 
 58 |     def test_keymap(self):
 59 |         D, kw = self.D, self.kw
 60 |         assert keymap(inc, D({1: 1, 2: 2}), **kw) == D({2: 1, 3: 2})
 61 | 
 62 |     def test_itemmap(self):
 63 |         D, kw = self.D, self.kw
 64 |         assert itemmap(reversed, D({1: 2, 2: 4}), **kw) == D({2: 1, 4: 2})
 65 | 
 66 |     def test_valfilter(self):
 67 |         D, kw = self.D, self.kw
 68 |         assert valfilter(iseven, D({1: 2, 2: 3}), **kw) == D({1: 2})
 69 | 
 70 |     def test_keyfilter(self):
 71 |         D, kw = self.D, self.kw
 72 |         assert keyfilter(iseven, D({1: 2, 2: 3}), **kw) == D({2: 3})
 73 | 
 74 |     def test_itemfilter(self):
 75 |         D, kw = self.D, self.kw
 76 |         assert itemfilter(lambda item: iseven(item[0]), D({1: 2, 2: 3}), **kw) == D({2: 3})
 77 |         assert itemfilter(lambda item: iseven(item[1]), D({1: 2, 2: 3}), **kw) == D({1: 2})
 78 | 
 79 |     def test_assoc(self):
 80 |         D, kw = self.D, self.kw
 81 |         assert assoc(D({}), "a", 1, **kw) == D({"a": 1})
 82 |         assert assoc(D({"a": 1}), "a", 3, **kw) == D({"a": 3})
 83 |         assert assoc(D({"a": 1}), "b", 3, **kw) == D({"a": 1, "b": 3})
 84 | 
 85 |         # Verify immutability:
 86 |         d = D({'x': 1})
 87 |         oldd = d
 88 |         assoc(d, 'x', 2, **kw)
 89 |         assert d is oldd
 90 | 
 91 |     def test_dissoc(self):
 92 |         D, kw = self.D, self.kw
 93 |         assert dissoc(D({"a": 1}), "a") == D({})
 94 |         assert dissoc(D({"a": 1, "b": 2}), "a") == D({"b": 2})
 95 |         assert dissoc(D({"a": 1, "b": 2}), "b") == D({"a": 1})
 96 |         assert dissoc(D({"a": 1, "b": 2}), "a", "b") == D({})
 97 |         assert dissoc(D({"a": 1}), "a") == dissoc(dissoc(D({"a": 1}), "a"), "a")
 98 | 
 99 |         # Verify immutability:
100 |         d = D({'x': 1})
101 |         oldd = d
102 |         d2 = dissoc(d, 'x')
103 |         assert d is oldd
104 |         assert d2 is not oldd
105 | 
106 |     def test_update_in(self):
107 |         D, kw = self.D, self.kw
108 |         assert update_in(D({"a": 0}), ["a"], inc, **kw) == D({"a": 1})
109 |         assert update_in(D({"a": 0, "b": 1}), ["b"], str, **kw) == D({"a": 0, "b": "1"})
110 |         assert (update_in(D({"t": 1, "v": D({"a": 0})}), ["v", "a"], inc, **kw) ==
111 |                 D({"t": 1, "v": D({"a": 1})}))
112 |         # Handle one missing key.
113 |         assert update_in(D({}), ["z"], str, None, **kw) == D({"z": "None"})
114 |         assert update_in(D({}), ["z"], inc, 0, **kw) == D({"z": 1})
115 |         assert update_in(D({}), ["z"], lambda x: x+"ar", default="b", **kw) == D({"z": "bar"})
116 |         # Same semantics as Clojure for multiple missing keys, ie. recursively
117 |         # create nested empty dictionaries to the depth specified by the
118 |         # keys with the innermost value set to f(default).
119 |         assert update_in(D({}), [0, 1], inc, default=-1, **kw) == D({0: D({1: 0})})
120 |         assert update_in(D({}), [0, 1], str, default=100, **kw) == D({0: D({1: "100"})})
121 |         assert (update_in(D({"foo": "bar", 1: 50}), ["d", 1, 0], str, 20, **kw) ==
122 |                 D({"foo": "bar", 1: 50, "d": D({1: D({0: "20"})})}))
123 |         # Verify immutability:
124 |         d = D({'x': 1})
125 |         oldd = d
126 |         update_in(d, ['x'], inc, **kw)
127 |         assert d is oldd
128 | 
129 |     def test_factory(self):
130 |         D, kw = self.D, self.kw
131 |         assert merge(defaultdict(int, D({1: 2})), D({2: 3})) == {1: 2, 2: 3}
132 |         assert (merge(defaultdict(int, D({1: 2})), D({2: 3}),
133 |                       factory=lambda: defaultdict(int)) ==
134 |                 defaultdict(int, D({1: 2, 2: 3})))
135 |         assert not (merge(defaultdict(int, D({1: 2})), D({2: 3}),
136 |                           factory=lambda: defaultdict(int)) == {1: 2, 2: 3})
137 |         assert raises(TypeError, lambda: merge(D({1: 2}), D({2: 3}), factoryy=dict))
138 | 
139 | 
140 | class defaultdict(_defaultdict):
141 |     def __eq__(self, other):
142 |         return (super(defaultdict, self).__eq__(other) and
143 |                 isinstance(other, _defaultdict) and
144 |                 self.default_factory == other.default_factory)
145 | 
146 | 
147 | class TestDefaultDict(TestDict):
148 |     """Test defaultdict as input and factory
149 | 
150 |     Class attributes:
151 |         D: callable that inputs a dict and creates or returns a MutableMapping
152 |         kw: kwargs dict to specify "factory" keyword (if applicable)
153 |     """
154 |     @staticmethod
155 |     def D(dict_):
156 |         return defaultdict(int, dict_)
157 | 
158 |     kw = {'factory': lambda: defaultdict(int)}
159 | 
160 | 
161 | class CustomMapping(object):
162 |     """Define methods of the MutableMapping protocol required by dicttoolz"""
163 |     def __init__(self, *args, **kwargs):
164 |         self._d = dict(*args, **kwargs)
165 | 
166 |     def __getitem__(self, key):
167 |         return self._d[key]
168 | 
169 |     def __setitem__(self, key, val):
170 |         self._d[key] = val
171 | 
172 |     def __delitem__(self, key):
173 |         del self._d[key]
174 | 
175 |     def __iter__(self):
176 |         return iter(self._d)
177 | 
178 |     def __len__(self):
179 |         return len(self._d)
180 | 
181 |     def __contains__(self, key):
182 |         return key in self._d
183 | 
184 |     def __eq__(self, other):
185 |         return isinstance(other, CustomMapping) and self._d == other._d
186 | 
187 |     def __ne__(self, other):
188 |         return not isinstance(other, CustomMapping) or self._d != other._d
189 | 
190 |     def keys(self):
191 |         return self._d.keys()
192 | 
193 |     def values(self):
194 |         return self._d.values()
195 | 
196 |     def items(self):
197 |         return self._d.items()
198 | 
199 |     def update(self, *args, **kwargs):
200 |         self._d.update(*args, **kwargs)
201 | 
202 |     # Should we require these to be defined for Python 2?
203 |     if not PY3:
204 |         def iterkeys(self):
205 |             return self._d.iterkeys()
206 | 
207 |         def itervalues(self):
208 |             return self._d.itervalues()
209 | 
210 |         def iteritems(self):
211 |             return self._d.iteritems()
212 | 
213 |     # Unused methods that are part of the MutableMapping protocol
214 |     #def get(self, key, *args):
215 |     #    return self._d.get(key, *args)
216 | 
217 |     #def pop(self, key, *args):
218 |     #    return self._d.pop(key, *args)
219 | 
220 |     #def popitem(self, key):
221 |     #    return self._d.popitem()
222 | 
223 |     #def clear(self):
224 |     #    self._d.clear()
225 | 
226 |     #def setdefault(self, key, *args):
227 |     #    return self._d.setdefault(self, key, *args)
228 | 
229 | 
230 | class TestCustomMapping(TestDict):
231 |     """Test CustomMapping as input and factory
232 | 
233 |     Class attributes:
234 |         D: callable that inputs a dict and creates or returns a MutableMapping
235 |         kw: kwargs dict to specify "factory" keyword (if applicable)
236 |     """
237 |     D = CustomMapping
238 |     kw = {'factory': lambda: CustomMapping()}
239 | 
240 | 


--------------------------------------------------------------------------------
/toolz/tests/test_functoolz.py:
--------------------------------------------------------------------------------
  1 | import platform
  2 | 
  3 | 
  4 | from toolz.functoolz import (thread_first, thread_last, memoize, curry,
  5 |                              compose, pipe, complement, do, juxt, flip)
  6 | from toolz.functoolz import _num_required_args
  7 | from operator import add, mul, itemgetter
  8 | from toolz.utils import raises
  9 | from functools import partial
 10 | 
 11 | 
 12 | def iseven(x):
 13 |     return x % 2 == 0
 14 | 
 15 | 
 16 | def isodd(x):
 17 |     return x % 2 == 1
 18 | 
 19 | 
 20 | def inc(x):
 21 |     return x + 1
 22 | 
 23 | 
 24 | def double(x):
 25 |     return 2 * x
 26 | 
 27 | 
 28 | def test_thread_first():
 29 |     assert thread_first(2) == 2
 30 |     assert thread_first(2, inc) == 3
 31 |     assert thread_first(2, inc, inc) == 4
 32 |     assert thread_first(2, double, inc) == 5
 33 |     assert thread_first(2, (add, 5), double) == 14
 34 | 
 35 | 
 36 | def test_thread_last():
 37 |     assert list(thread_last([1, 2, 3], (map, inc), (filter, iseven))) == [2, 4]
 38 |     assert list(thread_last([1, 2, 3], (map, inc), (filter, isodd))) == [3]
 39 |     assert thread_last(2, (add, 5), double) == 14
 40 | 
 41 | 
 42 | def test_memoize():
 43 |     fn_calls = [0]  # Storage for side effects
 44 | 
 45 |     def f(x, y):
 46 |         """ A docstring """
 47 |         fn_calls[0] += 1
 48 |         return x + y
 49 |     mf = memoize(f)
 50 | 
 51 |     assert mf(2, 3) == mf(2, 3)
 52 |     assert fn_calls == [1]  # function was only called once
 53 |     assert mf.__doc__ == f.__doc__
 54 |     assert raises(TypeError, lambda: mf(1, {}))
 55 | 
 56 | 
 57 | def test_memoize_kwargs():
 58 |     fn_calls = [0]  # Storage for side effects
 59 | 
 60 |     def f(x, y=0):
 61 |         return x + y
 62 | 
 63 |     mf = memoize(f)
 64 | 
 65 |     assert mf(1) == f(1)
 66 |     assert mf(1, 2) == f(1, 2)
 67 |     assert mf(1, y=2) == f(1, y=2)
 68 |     assert mf(1, y=3) == f(1, y=3)
 69 | 
 70 | 
 71 | def test_memoize_curried():
 72 |     @curry
 73 |     def f(x, y=0):
 74 |         return x + y
 75 | 
 76 |     f2 = f(y=1)
 77 |     fm2 = memoize(f2)
 78 | 
 79 |     assert fm2(3) == f2(3)
 80 |     assert fm2(3) == f2(3)
 81 | 
 82 | 
 83 | def test_memoize_partial():
 84 |     def f(x, y=0):
 85 |         return x + y
 86 | 
 87 |     f2 = partial(f, y=1)
 88 |     fm2 = memoize(f2)
 89 | 
 90 |     assert fm2(3) == f2(3)
 91 |     assert fm2(3) == f2(3)
 92 | 
 93 | 
 94 | def test_memoize_key_signature():
 95 |     # Single argument should not be tupled as a key.  No keywords.
 96 |     mf = memoize(lambda x: False, cache={1: True})
 97 |     assert mf(1) is True
 98 |     assert mf(2) is False
 99 | 
100 |     # Single argument must be tupled if signature has varargs.  No keywords.
101 |     mf = memoize(lambda x, *args: False, cache={(1,): True, (1, 2): 2})
102 |     assert mf(1) is True
103 |     assert mf(2) is False
104 |     assert mf(1, 1) is False
105 |     assert mf(1, 2) == 2
106 |     assert mf((1, 2)) is False
107 | 
108 |     # More than one argument is always tupled.  No keywords.
109 |     mf = memoize(lambda x, y: False, cache={(1, 2): True})
110 |     assert mf(1, 2) is True
111 |     assert mf(1, 3) is False
112 |     assert raises(TypeError, lambda: mf((1, 2)))
113 | 
114 |     # Nullary function (no inputs) uses empty tuple as the key
115 |     mf = memoize(lambda: False, cache={(): True})
116 |     assert mf() is True
117 | 
118 |     # Single argument must be tupled if there are keyword arguments, because
119 |     # keyword arguments may be passed as unnamed args.
120 |     mf = memoize(lambda x, y=0: False,
121 |                  cache={((1,), frozenset((('y', 2),))): 2,
122 |                         ((1, 2), None): 3})
123 |     assert mf(1, y=2) == 2
124 |     assert mf(1, 2) == 3
125 |     assert mf(2, y=2) is False
126 |     assert mf(2, 2) is False
127 |     assert mf(1) is False
128 |     assert mf((1, 2)) is False
129 | 
130 |     # Keyword-only signatures must still have an "args" tuple.
131 |     mf = memoize(lambda x=0: False, cache={(None, frozenset((('x', 1),))): 1,
132 |                                            ((1,), None): 2})
133 |     assert mf() is False
134 |     assert mf(x=1) == 1
135 |     assert mf(1) == 2
136 | 
137 | 
138 | def test_memoize_curry_cache():
139 |     @memoize(cache={1: True})
140 |     def f(x):
141 |         return False
142 | 
143 |     assert f(1) is True
144 |     assert f(2) is False
145 | 
146 | 
147 | def test_memoize_key():
148 |     @memoize(key=lambda args, kwargs: args[0])
149 |     def f(x, y, *args, **kwargs):
150 |         return x + y
151 | 
152 |     assert f(1, 2) == 3
153 |     assert f(1, 3) == 3
154 | 
155 | 
156 | def test_curry_simple():
157 |     cmul = curry(mul)
158 |     double = cmul(2)
159 |     assert callable(double)
160 |     assert double(10) == 20
161 |     assert repr(cmul) == repr(mul)
162 | 
163 |     cmap = curry(map)
164 |     assert list(cmap(inc)([1, 2, 3])) == [2, 3, 4]
165 | 
166 |     assert raises(TypeError, lambda: curry())
167 |     assert raises(TypeError, lambda: curry({1: 2}))
168 | 
169 | 
170 | def test_curry_kwargs():
171 |     def f(a, b, c=10):
172 |         return (a + b) * c
173 | 
174 |     f = curry(f)
175 |     assert f(1, 2, 3) == 9
176 |     assert f(1)(2, 3) == 9
177 |     assert f(1, 2) == 30
178 |     assert f(1, c=3)(2) == 9
179 |     assert f(c=3)(1, 2) == 9
180 | 
181 |     def g(a=1, b=10, c=0):
182 |         return a + b + c
183 | 
184 |     cg = curry(g, b=2)
185 |     assert cg() == 3
186 |     assert cg(b=3) == 4
187 |     assert cg(a=0) == 2
188 |     assert cg(a=0, b=1) == 1
189 |     assert cg(0) == 2  # pass "a" as arg, not kwarg
190 |     assert raises(TypeError, lambda: cg(1, 2))  # pass "b" as arg AND kwarg
191 | 
192 |     def h(x, func=int):
193 |         return func(x)
194 | 
195 |     if platform.python_implementation() != 'PyPy'\
196 |             or platform.python_version_tuple()[0] != '3':  # Bug on PyPy3<2.5
197 |         # __init__ must not pick func as positional arg
198 |         assert curry(h)(0.0) == 0
199 |         assert curry(h)(func=str)(0.0) == '0.0'
200 |         assert curry(h, func=str)(0.0) == '0.0'
201 | 
202 | 
203 | def test_curry_passes_errors():
204 |     @curry
205 |     def f(a, b):
206 |         if not isinstance(a, int):
207 |             raise TypeError()
208 |         return a + b
209 | 
210 |     assert f(1, 2) == 3
211 |     assert raises(TypeError, lambda: f('1', 2))
212 |     assert raises(TypeError, lambda: f('1')(2))
213 |     assert raises(TypeError, lambda: f(1, 2, 3))
214 | 
215 | 
216 | def test_curry_docstring():
217 |     def f(x, y):
218 |         """ A docstring """
219 |         return x
220 | 
221 |     g = curry(f)
222 |     assert g.__doc__ == f.__doc__
223 |     assert str(g) == str(f)
224 |     assert f(1, 2) == g(1, 2)
225 | 
226 | 
227 | def test_curry_is_like_partial():
228 |     def foo(a, b, c=1):
229 |         return a + b + c
230 | 
231 |     p, c = partial(foo, 1, c=2), curry(foo)(1, c=2)
232 |     assert p.keywords == c.keywords
233 |     assert p.args == c.args
234 |     assert p(3) == c(3)
235 | 
236 |     p, c = partial(foo, 1), curry(foo)(1)
237 |     assert p.keywords == c.keywords
238 |     assert p.args == c.args
239 |     assert p(3) == c(3)
240 |     assert p(3, c=2) == c(3, c=2)
241 | 
242 |     p, c = partial(foo, c=1), curry(foo)(c=1)
243 |     assert p.keywords == c.keywords
244 |     assert p.args == c.args
245 |     assert p(1, 2) == c(1, 2)
246 | 
247 | 
248 | def test_curry_is_idempotent():
249 |     def foo(a, b, c=1):
250 |         return a + b + c
251 | 
252 |     f = curry(foo, 1, c=2)
253 |     g = curry(f)
254 |     assert isinstance(f, curry)
255 |     assert isinstance(g, curry)
256 |     assert not isinstance(g.func, curry)
257 |     assert not hasattr(g.func, 'func')
258 |     assert f.func == g.func
259 |     assert f.args == g.args
260 |     assert f.keywords == g.keywords
261 | 
262 | 
263 | def test_curry_attributes_readonly():
264 |     def foo(a, b, c=1):
265 |         return a + b + c
266 | 
267 |     f = curry(foo, 1, c=2)
268 |     assert raises(AttributeError, lambda: setattr(f, 'args', (2,)))
269 |     assert raises(AttributeError, lambda: setattr(f, 'keywords', {'c': 3}))
270 |     assert raises(AttributeError, lambda: setattr(f, 'func', f))
271 | 
272 | 
273 | def test_curry_attributes_writable():
274 |     def foo(a, b, c=1):
275 |         return a + b + c
276 | 
277 |     f = curry(foo, 1, c=2)
278 |     f.__name__ = 'newname'
279 |     f.__doc__ = 'newdoc'
280 |     assert f.__name__ == 'newname'
281 |     assert f.__doc__ == 'newdoc'
282 |     if hasattr(f, 'func_name'):
283 |         assert f.__name__ == f.func_name
284 | 
285 | 
286 | def test_curry_comparable():
287 |     def foo(a, b, c=1):
288 |         return a + b + c
289 |     f1 = curry(foo, 1, c=2)
290 |     f2 = curry(foo, 1, c=2)
291 |     g1 = curry(foo, 1, c=3)
292 |     h1 = curry(foo, c=2)
293 |     h2 = h1(c=2)
294 |     h3 = h1()
295 |     assert f1 == f2
296 |     assert not (f1 != f2)
297 |     assert f1 != g1
298 |     assert not (f1 == g1)
299 |     assert f1 != h1
300 |     assert h1 == h2
301 |     assert h1 == h3
302 | 
303 |     # test function comparison works
304 |     def bar(a, b, c=1):
305 |         return a + b + c
306 |     b1 = curry(bar, 1, c=2)
307 |     assert b1 != f1
308 | 
309 |     assert set([f1, f2, g1, h1, h2, h3, b1, b1()]) == set([f1, g1, h1, b1])
310 | 
311 |     # test unhashable input
312 |     unhash1 = curry(foo, [])
313 |     assert raises(TypeError, lambda: hash(unhash1))
314 |     unhash2 = curry(foo, c=[])
315 |     assert raises(TypeError, lambda: hash(unhash2))
316 | 
317 | 
318 | def test_curry_doesnot_transmogrify():
319 |     # Early versions of `curry` transmogrified to `partial` objects if
320 |     # only one positional argument remained even if keyword arguments
321 |     # were present.  Now, `curry` should always remain `curry`.
322 |     def f(x, y=0):
323 |         return x + y
324 | 
325 |     cf = curry(f)
326 |     assert cf(y=1)(y=2)(y=3)(1) == f(1, 3)
327 | 
328 | 
329 | def test_curry_on_classmethods():
330 |     class A(object):
331 |         BASE = 10
332 | 
333 |         def __init__(self, base):
334 |             self.BASE = base
335 | 
336 |         @curry
337 |         def addmethod(self, x, y):
338 |             return self.BASE + x + y
339 | 
340 |         @classmethod
341 |         @curry
342 |         def addclass(cls, x, y):
343 |             return cls.BASE + x + y
344 | 
345 |         @staticmethod
346 |         @curry
347 |         def addstatic(x, y):
348 |             return x + y
349 | 
350 |     a = A(100)
351 |     assert a.addmethod(3, 4) == 107
352 |     assert a.addmethod(3)(4) == 107
353 |     assert A.addmethod(a, 3, 4) == 107
354 |     assert A.addmethod(a)(3)(4) == 107
355 | 
356 |     assert a.addclass(3, 4) == 17
357 |     assert a.addclass(3)(4) == 17
358 |     assert A.addclass(3, 4) == 17
359 |     assert A.addclass(3)(4) == 17
360 | 
361 |     assert a.addstatic(3, 4) == 7
362 |     assert a.addstatic(3)(4) == 7
363 |     assert A.addstatic(3, 4) == 7
364 |     assert A.addstatic(3)(4) == 7
365 | 
366 |     # we want this to be of type curry
367 |     assert isinstance(a.addmethod, curry)
368 |     assert isinstance(A.addmethod, curry)
369 | 
370 | 
371 | def test_memoize_on_classmethods():
372 |     class A(object):
373 |         BASE = 10
374 |         HASH = 10
375 | 
376 |         def __init__(self, base):
377 |             self.BASE = base
378 | 
379 |         @memoize
380 |         def addmethod(self, x, y):
381 |             return self.BASE + x + y
382 | 
383 |         @classmethod
384 |         @memoize
385 |         def addclass(cls, x, y):
386 |             return cls.BASE + x + y
387 | 
388 |         @staticmethod
389 |         @memoize
390 |         def addstatic(x, y):
391 |             return x + y
392 | 
393 |         def __hash__(self):
394 |             return self.HASH
395 | 
396 |     a = A(100)
397 |     assert a.addmethod(3, 4) == 107
398 |     assert A.addmethod(a, 3, 4) == 107
399 | 
400 |     a.BASE = 200
401 |     assert a.addmethod(3, 4) == 107
402 |     a.HASH = 200
403 |     assert a.addmethod(3, 4) == 207
404 | 
405 |     assert a.addclass(3, 4) == 17
406 |     assert A.addclass(3, 4) == 17
407 |     A.BASE = 20
408 |     assert A.addclass(3, 4) == 17
409 |     A.HASH = 20  # hashing of class is handled by metaclass
410 |     assert A.addclass(3, 4) == 17  # hence, != 27
411 | 
412 |     assert a.addstatic(3, 4) == 7
413 |     assert A.addstatic(3, 4) == 7
414 | 
415 | 
416 | def test__num_required_args():
417 |     assert _num_required_args(map) != 0
418 |     assert _num_required_args(lambda x: x) == 1
419 |     assert _num_required_args(lambda x, y: x) == 2
420 | 
421 |     def foo(x, y, z=2):
422 |         pass
423 |     assert _num_required_args(foo) == 2
424 | 
425 | 
426 | def test_compose():
427 |     assert compose()(0) == 0
428 |     assert compose(inc)(0) == 1
429 |     assert compose(double, inc)(0) == 2
430 |     assert compose(str, iseven, inc, double)(3) == "False"
431 |     assert compose(str, add)(1, 2) == '3'
432 | 
433 |     def f(a, b, c=10):
434 |         return (a + b) * c
435 | 
436 |     assert compose(str, inc, f)(1, 2, c=3) == '10'
437 | 
438 |     # Define two functions with different names
439 |     def f(a):
440 |         return a
441 | 
442 |     def g(a):
443 |         return a
444 | 
445 |     composed = compose(f, g)
446 |     assert composed.__name__ == 'f_of_g'
447 |     assert composed.__doc__ == 'lambda *args, **kwargs: f(g(*args, **kwargs))'
448 | 
449 |     # Create an object with no __name__.
450 |     h = object()
451 | 
452 |     composed = compose(f, h)
453 |     assert composed.__name__ == 'Compose'
454 |     assert composed.__doc__ == 'A composition of functions'
455 | 
456 | 
457 | def test_pipe():
458 |     assert pipe(1, inc) == 2
459 |     assert pipe(1, inc, inc) == 3
460 |     assert pipe(1, double, inc, iseven) is False
461 | 
462 | 
463 | def test_complement():
464 |     # No args:
465 |     assert complement(lambda: False)()
466 |     assert not complement(lambda: True)()
467 | 
468 |     # Single arity:
469 |     assert complement(iseven)(1)
470 |     assert not complement(iseven)(2)
471 |     assert complement(complement(iseven))(2)
472 |     assert not complement(complement(isodd))(2)
473 | 
474 |     # Multiple arities:
475 |     both_even = lambda a, b: iseven(a) and iseven(b)
476 |     assert complement(both_even)(1, 2)
477 |     assert not complement(both_even)(2, 2)
478 | 
479 |     # Generic truthiness:
480 |     assert complement(lambda: "")()
481 |     assert complement(lambda: 0)()
482 |     assert complement(lambda: None)()
483 |     assert complement(lambda: [])()
484 | 
485 |     assert not complement(lambda: "x")()
486 |     assert not complement(lambda: 1)()
487 |     assert not complement(lambda: [1])()
488 | 
489 | 
490 | def test_do():
491 |     inc = lambda x: x + 1
492 |     assert do(inc, 1) == 1
493 | 
494 |     log = []
495 |     assert do(log.append, 1) == 1
496 |     assert log == [1]
497 | 
498 | 
499 | def test_juxt_generator_input():
500 |     data = list(range(10))
501 |     juxtfunc = juxt(itemgetter(2*i) for i in range(5))
502 |     assert juxtfunc(data) == (0, 2, 4, 6, 8)
503 |     assert juxtfunc(data) == (0, 2, 4, 6, 8)
504 | 
505 | 
506 | def test_flip():
507 |     def f(a, b):
508 |         return a, b
509 | 
510 |     assert flip(f, 'a', 'b') == ('b', 'a')
511 | 


--------------------------------------------------------------------------------
/toolz/tests/test_itertoolz.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | from itertools import starmap
  3 | from toolz.utils import raises
  4 | from functools import partial
  5 | from toolz.itertoolz import (remove, groupby, merge_sorted,
  6 |                              concat, concatv, interleave, unique,
  7 |                              isiterable, getter,
  8 |                              mapcat, isdistinct, first, second,
  9 |                              nth, take, tail, drop, interpose, get,
 10 |                              rest, last, cons, frequencies,
 11 |                              reduceby, iterate, accumulate,
 12 |                              sliding_window, count, partition,
 13 |                              partition_all, take_nth, pluck, join,
 14 |                              diff, topk, peek)
 15 | from toolz.compatibility import range, filter
 16 | from operator import add, mul
 17 | 
 18 | 
 19 | def identity(x):
 20 |     return x
 21 | 
 22 | 
 23 | def iseven(x):
 24 |     return x % 2 == 0
 25 | 
 26 | 
 27 | def isodd(x):
 28 |     return x % 2 == 1
 29 | 
 30 | 
 31 | def inc(x):
 32 |     return x + 1
 33 | 
 34 | 
 35 | def double(x):
 36 |     return 2 * x
 37 | 
 38 | 
 39 | def test_remove():
 40 |     r = remove(iseven, range(5))
 41 |     assert type(r) is not list
 42 |     assert list(r) == list(filter(isodd, range(5)))
 43 | 
 44 | 
 45 | def test_groupby():
 46 |     assert groupby(iseven, [1, 2, 3, 4]) == {True: [2, 4], False: [1, 3]}
 47 | 
 48 | 
 49 | def test_groupby_non_callable():
 50 |     assert groupby(0, [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
 51 |         {1: [(1, 2), (1, 3)],
 52 |          2: [(2, 2), (2, 4)]}
 53 | 
 54 |     assert groupby([0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
 55 |         {(1,): [(1, 2), (1, 3)],
 56 |          (2,): [(2, 2), (2, 4)]}
 57 | 
 58 |     assert groupby([0, 0], [(1, 2), (1, 3), (2, 2), (2, 4)]) == \
 59 |         {(1, 1): [(1, 2), (1, 3)],
 60 |          (2, 2): [(2, 2), (2, 4)]}
 61 | 
 62 | 
 63 | def test_merge_sorted():
 64 |     assert list(merge_sorted([1, 2, 3], [1, 2, 3])) == [1, 1, 2, 2, 3, 3]
 65 |     assert list(merge_sorted([1, 3, 5], [2, 4, 6])) == [1, 2, 3, 4, 5, 6]
 66 |     assert list(merge_sorted([1], [2, 4], [3], [])) == [1, 2, 3, 4]
 67 |     assert list(merge_sorted([5, 3, 1], [6, 4, 3], [],
 68 |                              key=lambda x: -x)) == [6, 5, 4, 3, 3, 1]
 69 |     assert list(merge_sorted([2, 1, 3], [1, 2, 3],
 70 |                              key=lambda x: x // 3)) == [2, 1, 1, 2, 3, 3]
 71 |     assert list(merge_sorted([2, 3], [1, 3],
 72 |                              key=lambda x: x // 3)) == [2, 1, 3, 3]
 73 |     assert ''.join(merge_sorted('abc', 'abc', 'abc')) == 'aaabbbccc'
 74 |     assert ''.join(merge_sorted('abc', 'abc', 'abc', key=ord)) == 'aaabbbccc'
 75 |     assert ''.join(merge_sorted('cba', 'cba', 'cba',
 76 |                                 key=lambda x: -ord(x))) == 'cccbbbaaa'
 77 |     assert list(merge_sorted([1], [2, 3, 4], key=identity)) == [1, 2, 3, 4]
 78 | 
 79 |     data = [[(1, 2), (0, 4), (3, 6)], [(5, 3), (6, 5), (8, 8)],
 80 |             [(9, 1), (9, 8), (9, 9)]]
 81 |     assert list(merge_sorted(*data, key=lambda x: x[1])) == [
 82 |         (9, 1), (1, 2), (5, 3), (0, 4), (6, 5), (3, 6), (8, 8), (9, 8), (9, 9)]
 83 | 
 84 | 
 85 | def test_interleave():
 86 |     assert ''.join(interleave(('ABC', '123'))) == 'A1B2C3'
 87 |     assert ''.join(interleave(('ABC', '1'))) == 'A1BC'
 88 | 
 89 | 
 90 | def test_unique():
 91 |     assert tuple(unique((1, 2, 3))) == (1, 2, 3)
 92 |     assert tuple(unique((1, 2, 1, 3))) == (1, 2, 3)
 93 |     assert tuple(unique((1, 2, 3), key=iseven)) == (1, 2)
 94 | 
 95 | 
 96 | def test_isiterable():
 97 |     assert isiterable([1, 2, 3]) is True
 98 |     assert isiterable('abc') is True
 99 |     assert isiterable(5) is False
100 | 
101 | 
102 | def test_isdistinct():
103 |     assert isdistinct([1, 2, 3]) is True
104 |     assert isdistinct([1, 2, 1]) is False
105 | 
106 |     assert isdistinct("Hello") is False
107 |     assert isdistinct("World") is True
108 | 
109 |     assert isdistinct(iter([1, 2, 3])) is True
110 |     assert isdistinct(iter([1, 2, 1])) is False
111 | 
112 | 
113 | def test_nth():
114 |     assert nth(2, 'ABCDE') == 'C'
115 |     assert nth(2, iter('ABCDE')) == 'C'
116 |     assert nth(1, (3, 2, 1)) == 2
117 |     assert nth(0, {'foo': 'bar'}) == 'foo'
118 |     assert raises(StopIteration, lambda: nth(10, {10: 'foo'}))
119 |     assert nth(-2, 'ABCDE') == 'D'
120 |     assert raises(ValueError, lambda: nth(-2, iter('ABCDE')))
121 | 
122 | 
123 | def test_first():
124 |     assert first('ABCDE') == 'A'
125 |     assert first((3, 2, 1)) == 3
126 |     assert isinstance(first({0: 'zero', 1: 'one'}), int)
127 | 
128 | 
129 | def test_second():
130 |     assert second('ABCDE') == 'B'
131 |     assert second((3, 2, 1)) == 2
132 |     assert isinstance(second({0: 'zero', 1: 'one'}), int)
133 | 
134 | 
135 | def test_last():
136 |     assert last('ABCDE') == 'E'
137 |     assert last((3, 2, 1)) == 1
138 |     assert isinstance(last({0: 'zero', 1: 'one'}), int)
139 | 
140 | 
141 | def test_rest():
142 |     assert list(rest('ABCDE')) == list('BCDE')
143 |     assert list(rest((3, 2, 1))) == list((2, 1))
144 | 
145 | 
146 | def test_take():
147 |     assert list(take(3, 'ABCDE')) == list('ABC')
148 |     assert list(take(2, (3, 2, 1))) == list((3, 2))
149 | 
150 | 
151 | def test_tail():
152 |     assert list(tail(3, 'ABCDE')) == list('CDE')
153 |     assert list(tail(3, iter('ABCDE'))) == list('CDE')
154 |     assert list(tail(2, (3, 2, 1))) == list((2, 1))
155 | 
156 | 
157 | def test_drop():
158 |     assert list(drop(3, 'ABCDE')) == list('DE')
159 |     assert list(drop(1, (3, 2, 1))) == list((2, 1))
160 | 
161 | 
162 | def test_take_nth():
163 |     assert list(take_nth(2, 'ABCDE')) == list('ACE')
164 | 
165 | 
166 | def test_get():
167 |     assert get(1, 'ABCDE') == 'B'
168 |     assert list(get([1, 3], 'ABCDE')) == list('BD')
169 |     assert get('a', {'a': 1, 'b': 2, 'c': 3}) == 1
170 |     assert get(['a', 'b'], {'a': 1, 'b': 2, 'c': 3}) == (1, 2)
171 | 
172 |     assert get('foo', {}, default='bar') == 'bar'
173 |     assert get({}, [1, 2, 3], default='bar') == 'bar'
174 |     assert get([0, 2], 'AB', 'C') == ('A', 'C')
175 | 
176 |     assert get([0], 'AB') == ('A',)
177 |     assert get([], 'AB') == ()
178 | 
179 |     assert raises(IndexError, lambda: get(10, 'ABC'))
180 |     assert raises(KeyError, lambda: get(10, {'a': 1}))
181 |     assert raises(TypeError, lambda: get({}, [1, 2, 3]))
182 |     assert raises(TypeError, lambda: get([1, 2, 3], 1, None))
183 | 
184 | 
185 | def test_mapcat():
186 |     assert (list(mapcat(identity, [[1, 2, 3], [4, 5, 6]])) ==
187 |             [1, 2, 3, 4, 5, 6])
188 | 
189 |     assert (list(mapcat(reversed, [[3, 2, 1, 0], [6, 5, 4], [9, 8, 7]])) ==
190 |             list(range(10)))
191 | 
192 |     inc = lambda i: i + 1
193 |     assert ([4, 5, 6, 7, 8, 9] ==
194 |             list(mapcat(partial(map, inc), [[3, 4, 5], [6, 7, 8]])))
195 | 
196 | 
197 | def test_cons():
198 |     assert list(cons(1, [2, 3])) == [1, 2, 3]
199 | 
200 | 
201 | def test_concat():
202 |     assert list(concat([[], [], []])) == []
203 |     assert (list(take(5, concat([['a', 'b'], range(1000000000)]))) ==
204 |             ['a', 'b', 0, 1, 2])
205 | 
206 | 
207 | def test_concatv():
208 |     assert list(concatv([], [], [])) == []
209 |     assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) ==
210 |             ['a', 'b', 0, 1, 2])
211 | 
212 | 
213 | def test_interpose():
214 |     assert "a" == first(rest(interpose("a", range(1000000000))))
215 |     assert "tXaXrXzXaXn" == "".join(interpose("X", "tarzan"))
216 |     assert list(interpose(0, itertools.repeat(1, 4))) == [1, 0, 1, 0, 1, 0, 1]
217 |     assert list(interpose('.', ['a', 'b', 'c'])) == ['a', '.', 'b', '.', 'c']
218 | 
219 | 
220 | def test_frequencies():
221 |     assert (frequencies(["cat", "pig", "cat", "eel",
222 |                         "pig", "dog", "dog", "dog"]) ==
223 |             {"cat": 2, "eel": 1, "pig": 2, "dog": 3})
224 |     assert frequencies([]) == {}
225 |     assert frequencies("onomatopoeia") == {"a": 2, "e": 1, "i": 1, "m": 1,
226 |                                            "o": 4, "n": 1, "p": 1, "t": 1}
227 | 
228 | 
229 | def test_reduceby():
230 |     data = [1, 2, 3, 4, 5]
231 |     iseven = lambda x: x % 2 == 0
232 |     assert reduceby(iseven, add, data, 0) == {False: 9, True: 6}
233 |     assert reduceby(iseven, mul, data, 1) == {False: 15, True: 8}
234 | 
235 |     projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
236 |                 {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
237 |                 {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
238 |                 {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
239 |     assert reduceby(lambda x: x['state'],
240 |                     lambda acc, x: acc + x['cost'],
241 |                     projects, 0) == {'CA': 1200000, 'IL': 2100000}
242 | 
243 |     assert reduceby('state',
244 |                     lambda acc, x: acc + x['cost'],
245 |                     projects, 0) == {'CA': 1200000, 'IL': 2100000}
246 | 
247 | 
248 | def test_reduce_by_init():
249 |     assert reduceby(iseven, add, [1, 2, 3, 4]) == {True: 2 + 4, False: 1 + 3}
250 | 
251 | 
252 | def test_reduce_by_callable_default():
253 |     def set_add(s, i):
254 |         s.add(i)
255 |         return s
256 | 
257 |     assert reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2], set) == \
258 |         {True: set([2, 4]), False: set([1, 3])}
259 | 
260 | 
261 | def test_iterate():
262 |     assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4]
263 |     assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8]
264 | 
265 | 
266 | def test_accumulate():
267 |     assert list(accumulate(add, [1, 2, 3, 4, 5])) == [1, 3, 6, 10, 15]
268 |     assert list(accumulate(mul, [1, 2, 3, 4, 5])) == [1, 2, 6, 24, 120]
269 |     assert list(accumulate(add, [1, 2, 3, 4, 5], -1)) == [-1, 0, 2, 5, 9, 14]
270 | 
271 |     def binop(a, b):
272 |         raise AssertionError('binop should not be called')
273 | 
274 |     start = object()
275 |     assert list(accumulate(binop, [], start)) == [start]
276 | 
277 | 
278 | def test_accumulate_works_on_consumable_iterables():
279 |     assert list(accumulate(add, iter((1, 2, 3)))) == [1, 3, 6]
280 | 
281 | 
282 | def test_sliding_window():
283 |     assert list(sliding_window(2, [1, 2, 3, 4])) == [(1, 2), (2, 3), (3, 4)]
284 |     assert list(sliding_window(3, [1, 2, 3, 4])) == [(1, 2, 3), (2, 3, 4)]
285 | 
286 | 
287 | def test_sliding_window_of_short_iterator():
288 |     assert list(sliding_window(3, [1, 2])) == []
289 | 
290 | 
291 | def test_partition():
292 |     assert list(partition(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)]
293 |     assert list(partition(3, range(7))) == [(0, 1, 2), (3, 4, 5)]
294 |     assert list(partition(3, range(4), pad=-1)) == [(0, 1, 2),
295 |                                                     (3, -1, -1)]
296 |     assert list(partition(2, [])) == []
297 | 
298 | 
299 | def test_partition_all():
300 |     assert list(partition_all(2, [1, 2, 3, 4])) == [(1, 2), (3, 4)]
301 |     assert list(partition_all(3, range(5))) == [(0, 1, 2), (3, 4)]
302 |     assert list(partition_all(2, [])) == []
303 | 
304 | 
305 | def test_count():
306 |     assert count((1, 2, 3)) == 3
307 |     assert count([]) == 0
308 |     assert count(iter((1, 2, 3, 4))) == 4
309 | 
310 |     assert count('hello') == 5
311 |     assert count(iter('hello')) == 5
312 | 
313 | 
314 | def test_pluck():
315 |     assert list(pluck(0, [[0, 1], [2, 3], [4, 5]])) == [0, 2, 4]
316 |     assert list(pluck([0, 1], [[0, 1, 2], [3, 4, 5]])) == [(0, 1), (3, 4)]
317 |     assert list(pluck(1, [[0], [0, 1]], None)) == [None, 1]
318 | 
319 |     data = [{'id': 1, 'name': 'cheese'}, {'id': 2, 'name': 'pies', 'price': 1}]
320 |     assert list(pluck('id', data)) == [1, 2]
321 |     assert list(pluck('price', data, None)) == [None, 1]
322 |     assert list(pluck(['id', 'name'], data)) == [(1, 'cheese'), (2, 'pies')]
323 |     assert list(pluck(['name'], data)) == [('cheese',), ('pies',)]
324 |     assert list(pluck(['price', 'other'], data, None)) == [(None, None),
325 |                                                            (1, None)]
326 | 
327 |     assert raises(IndexError, lambda: list(pluck(1, [[0]])))
328 |     assert raises(KeyError, lambda: list(pluck('name', [{'id': 1}])))
329 | 
330 | 
331 | def test_join():
332 |     names = [(1, 'one'), (2, 'two'), (3, 'three')]
333 |     fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)]
334 | 
335 |     def addpair(pair):
336 |         return pair[0] + pair[1]
337 | 
338 |     result = set(starmap(add, join(first, names, second, fruit)))
339 | 
340 |     expected = set([((1, 'one', 'apple', 1)),
341 |                     ((1, 'one', 'orange', 1)),
342 |                     ((2, 'two', 'banana', 2)),
343 |                     ((2, 'two', 'coconut', 2))])
344 | 
345 |     assert result == expected
346 | 
347 | 
348 | def test_getter():
349 |     assert getter(0)('Alice') == 'A'
350 |     assert getter([0])('Alice') == ('A',)
351 |     assert getter([])('Alice') == ()
352 | 
353 | 
354 | def test_key_as_getter():
355 |     squares = [(i, i**2) for i in range(5)]
356 |     pows = [(i, i**2, i**3) for i in range(5)]
357 | 
358 |     assert set(join(0, squares, 0, pows)) == set(join(lambda x: x[0], squares,
359 |                                                       lambda x: x[0], pows))
360 | 
361 |     get = lambda x: (x[0], x[1])
362 |     assert set(join([0, 1], squares, [0, 1], pows)) == set(join(get, squares,
363 |                                                                 get, pows))
364 | 
365 |     get = lambda x: (x[0],)
366 |     assert set(join([0], squares, [0], pows)) == set(join(get, squares,
367 |                                                           get, pows))
368 | 
369 | 
370 | def test_join_double_repeats():
371 |     names = [(1, 'one'), (2, 'two'), (3, 'three'), (1, 'uno'), (2, 'dos')]
372 |     fruit = [('apple', 1), ('orange', 1), ('banana', 2), ('coconut', 2)]
373 | 
374 |     result = set(starmap(add, join(first, names, second, fruit)))
375 | 
376 |     expected = set([((1, 'one', 'apple', 1)),
377 |                     ((1, 'one', 'orange', 1)),
378 |                     ((2, 'two', 'banana', 2)),
379 |                     ((2, 'two', 'coconut', 2)),
380 |                     ((1, 'uno', 'apple', 1)),
381 |                     ((1, 'uno', 'orange', 1)),
382 |                     ((2, 'dos', 'banana', 2)),
383 |                     ((2, 'dos', 'coconut', 2))])
384 | 
385 |     assert result == expected
386 | 
387 | 
388 | def test_join_missing_element():
389 |     names = [(1, 'one'), (2, 'two'), (3, 'three')]
390 |     fruit = [('apple', 5), ('orange', 1)]
391 | 
392 |     result = set(starmap(add, join(first, names, second, fruit)))
393 | 
394 |     expected = set([((1, 'one', 'orange', 1))])
395 | 
396 |     assert result == expected
397 | 
398 | 
399 | def test_left_outer_join():
400 |     result = set(join(identity, [1, 2], identity, [2, 3], left_default=None))
401 |     expected = set([(2, 2), (None, 3)])
402 | 
403 |     assert result == expected
404 | 
405 | 
406 | def test_right_outer_join():
407 |     result = set(join(identity, [1, 2], identity, [2, 3], right_default=None))
408 |     expected = set([(2, 2), (1, None)])
409 | 
410 |     assert result == expected
411 | 
412 | 
413 | def test_outer_join():
414 |     result = set(join(identity, [1, 2], identity, [2, 3],
415 |                       left_default=None, right_default=None))
416 |     expected = set([(2, 2), (1, None), (None, 3)])
417 | 
418 |     assert result == expected
419 | 
420 | 
421 | def test_diff():
422 |     assert raises(TypeError, lambda: list(diff()))
423 |     assert raises(TypeError, lambda: list(diff([1, 2])))
424 |     assert raises(TypeError, lambda: list(diff([1, 2], 3)))
425 |     assert list(diff([1, 2], (1, 2), iter([1, 2]))) == []
426 |     assert list(diff([1, 2, 3], (1, 10, 3), iter([1, 2, 10]))) == [
427 |         (2, 10, 2), (3, 3, 10)]
428 |     assert list(diff([1, 2], [10])) == [(1, 10)]
429 |     assert list(diff([1, 2], [10], default=None)) == [(1, 10), (2, None)]
430 |     # non-variadic usage
431 |     assert raises(TypeError, lambda: list(diff([])))
432 |     assert raises(TypeError, lambda: list(diff([[]])))
433 |     assert raises(TypeError, lambda: list(diff([[1, 2]])))
434 |     assert raises(TypeError, lambda: list(diff([[1, 2], 3])))
435 |     assert list(diff([(1, 2), (1, 3)])) == [(2, 3)]
436 | 
437 |     data1 = [{'cost': 1, 'currency': 'dollar'},
438 |              {'cost': 2, 'currency': 'dollar'}]
439 | 
440 |     data2 = [{'cost': 100, 'currency': 'yen'},
441 |              {'cost': 300, 'currency': 'yen'}]
442 | 
443 |     conversions = {'dollar': 1, 'yen': 0.01}
444 | 
445 |     def indollars(item):
446 |         return conversions[item['currency']] * item['cost']
447 | 
448 |     list(diff(data1, data2, key=indollars)) == [
449 |         ({'cost': 2, 'currency': 'dollar'}, {'cost': 300, 'currency': 'yen'})]
450 | 
451 | 
452 | def test_topk():
453 |     assert topk(2, [4, 1, 5, 2]) == (5, 4)
454 |     assert topk(2, [4, 1, 5, 2], key=lambda x: -x) == (1, 2)
455 |     assert topk(2, iter([5, 1, 4, 2]), key=lambda x: -x) == (1, 2)
456 | 
457 |     assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9},
458 |                     {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='a') == \
459 |         ({'a': 10, 'b': 1}, {'a': 9, 'b': 2})
460 | 
461 |     assert topk(2, [{'a': 1, 'b': 10}, {'a': 2, 'b': 9},
462 |                     {'a': 10, 'b': 1}, {'a': 9, 'b': 2}], key='b') == \
463 |         ({'a': 1, 'b': 10}, {'a': 2, 'b': 9})
464 |     assert topk(2, [(0, 4), (1, 3), (2, 2), (3, 1), (4, 0)], 0) == \
465 |         ((4, 0), (3, 1))
466 | 
467 | 
468 | def test_topk_is_stable():
469 |     assert topk(4, [5, 9, 2, 1, 5, 3], key=lambda x: 1) == (5, 9, 2, 1)
470 | 
471 | 
472 | def test_peek():
473 |     alist = ["Alice", "Bob", "Carol"]
474 |     element, blist  = peek(alist)
475 |     element == alist[0]
476 |     assert list(blist) == alist
477 | 
478 |     assert raises(StopIteration, lambda: peek([]))
479 | 


--------------------------------------------------------------------------------
/toolz/tests/test_recipes.py:
--------------------------------------------------------------------------------
 1 | from toolz import first, identity, countby, partitionby
 2 | 
 3 | 
 4 | def iseven(x):
 5 |     return x % 2 == 0
 6 | 
 7 | 
 8 | def test_countby():
 9 |     assert countby(iseven, [1, 2, 3]) == {True: 1, False: 2}
10 |     assert countby(len, ['cat', 'dog', 'mouse']) == {3: 2, 5: 1}
11 |     assert countby(0, ('ab', 'ac', 'bc')) == {'a': 2, 'b': 1}
12 | 
13 | 
14 | def test_partitionby():
15 |     assert list(partitionby(identity, [])) == []
16 | 
17 |     vowels = "aeiou"
18 |     assert (list(partitionby(vowels.__contains__, "abcdefghi")) ==
19 |             [("a",), ("b", "c", "d"), ("e",), ("f", "g", "h"), ("i",)])
20 | 
21 |     assert (list(map(first,
22 |                      partitionby(identity,
23 |                                  [1, 1, 1, 2, 3, 3, 2, 2, 3]))) ==
24 |             [1, 2, 3, 2, 3])
25 | 
26 |     assert ''.join(map(first,
27 |                        partitionby(identity, "Khhhaaaaannnnn!!!!"))) == 'Khan!'
28 | 


--------------------------------------------------------------------------------
/toolz/tests/test_serialization.py:
--------------------------------------------------------------------------------
 1 | from toolz import *
 2 | import pickle
 3 | 
 4 | 
 5 | def test_compose():
 6 |     f = compose(str, sum)
 7 |     g = pickle.loads(pickle.dumps(f))
 8 |     assert f((1, 2)) == g((1, 2))
 9 | 
10 | 
11 | def test_curry():
12 |     f = curry(map)(str)
13 |     g = pickle.loads(pickle.dumps(f))
14 |     assert list(f((1, 2, 3))) == list(g((1, 2, 3)))
15 | 
16 | 
17 | def test_juxt():
18 |     f = juxt(str, int, bool)
19 |     g = pickle.loads(pickle.dumps(f))
20 |     assert f(1) == g(1)
21 |     assert f.funcs == g.funcs
22 | 
23 | 
24 | def test_complement():
25 |     f = complement(bool)
26 |     assert f(True) is False
27 |     assert f(False) is True
28 |     g = pickle.loads(pickle.dumps(f))
29 |     assert f(True) == g(True)
30 |     assert f(False) == g(False)
31 | 


--------------------------------------------------------------------------------
/toolz/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from toolz.utils import raises
2 | 
3 | 
4 | def test_raises():
5 |     assert raises(ZeroDivisionError, lambda: 1 / 0)
6 |     assert not raises(ZeroDivisionError, lambda: 1)
7 | 


--------------------------------------------------------------------------------
/toolz/utils.py:
--------------------------------------------------------------------------------
 1 | def raises(err, lamda):
 2 |     try:
 3 |         lamda()
 4 |         return False
 5 |     except err:
 6 |         return True
 7 | 
 8 | 
 9 | no_default = '__no__default__'
10 | 


--------------------------------------------------------------------------------