├── .coveragerc
├── .github
    └── workflows
    │   ├── pre-commit_hooks.yaml
    │   └── test.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── .version
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── benchmark
    └── test_benchmark.py
├── docs
    ├── Makefile
    ├── api.rst
    ├── cli.rst
    ├── conf.py
    ├── index.rst
    ├── influances.rst
    ├── license.rst
    ├── versions.rst
    └── welcome.rst
├── mypy.ini
├── pyproject.toml
├── pytest.ini
├── setup.cfg
└── src
    ├── flupy
        ├── __init__.py
        ├── cli
        │   ├── __init__.py
        │   ├── cli.py
        │   └── utils.py
        ├── fluent.py
        └── py.typed
    └── tests
        ├── test_cli.py
        ├── test_cli_utils.py
        ├── test_flu.py
        └── test_version.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | exclude_lines =
3 |     pragma: no cover
4 |     if TYPE_CHECKING:
5 |     raise AssertionError
6 |     raise NotImplementedError
7 |     @overload
8 |     pass
9 | 


--------------------------------------------------------------------------------
/.github/workflows/pre-commit_hooks.yaml:
--------------------------------------------------------------------------------
 1 | name: pre-commit hooks
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 | 
11 |     - uses: actions/checkout@v1
12 | 
13 |     - name: python setup 3.9
14 |       uses: actions/setup-python@v1
15 |       with:
16 |         python-version: '3.9'
17 | 
18 |     - name: Install Poetry
19 |       uses: snok/install-poetry@v1
20 |       with:
21 |         version: 1.7.1
22 |         virtualenvs-create: true
23 |         virtualenvs-in-project: true
24 | 
25 |     - name: Install dependencies
26 |       run: |
27 |         poetry install --with dev
28 | 
29 |     - name: run tests
30 |       run: |
31 |         poetry run pre-commit run --all


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     strategy:
10 |         matrix:
11 |             python-version: ['3.9', '3.10', '3.11', '3.12', '3.13']
12 | 
13 |     steps:
14 | 
15 |     - uses: actions/checkout@v1
16 | 
17 |     - name: python setup ${{ matrix.python-version }}
18 |       uses: actions/setup-python@v1
19 |       with:
20 |         python-version: ${{ matrix.python-version }}
21 | 
22 |     - name: Install Poetry
23 |       uses: snok/install-poetry@v1
24 |       with:
25 |         version: 1.7.1
26 |         virtualenvs-create: true
27 |         virtualenvs-in-project: true
28 | 
29 |     - name: Install dependencies
30 |       run: |
31 |         poetry install --with dev
32 | 
33 |     - name: run tests
34 |       run: |
35 |         poetry run pytest --cov=src/flupy src/tests --cov-report=xml
36 | 
37 |     - name: upload coverage to codecov
38 |       uses: codecov/codecov-action@v1
39 |       with:
40 |         token: ${{ secrets.CODECOV_TOKEN }}
41 |         file: ./coverage.xml
42 |         flags: unittests
43 |         name: codecov-umbrella
44 |         fail_ci_if_error: true
45 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | docs/*
 2 | # Temporary Python files
 3 | *.pyc
 4 | *.egg-info
 5 | __pycache__
 6 | .ipynb_checkpoints
 7 | 
 8 | # pyenv
 9 | .python-version
10 | 
11 | .benchmarks
12 | poetry.lock
13 | 
14 | pip-wheel-metadata/
15 | 
16 | .vscode
17 | 
18 | # Temporary OS files
19 | Icon*
20 | 
21 | # Pytest cache
22 | .pytest_cache/*
23 | 
24 | # Virtual environment
25 | venv/*
26 | 
27 | # Temporary virtual environment files
28 | /.cache/
29 | /.venv/
30 | 
31 | # Temporary server files
32 | .env
33 | *.pid
34 | *.swp
35 | 
36 | # Generated documentation
37 | /docs/gen/
38 | /docs/apidocs/
39 | /docs/_build/
40 | /site/
41 | /*.html
42 | /*.rst
43 | /docs/*.png
44 | 
45 | # Google Drive
46 | *.gdoc
47 | *.gsheet
48 | *.gslides
49 | *.gdraw
50 | 
51 | # Testing and coverage results
52 | /.pytest/
53 | /.coverage
54 | /.coverage.*
55 | /htmlcov/
56 | /xmlreport/
57 | /pyunit.xml
58 | /tmp/
59 | *.tmp
60 | 
61 | # Build and release directories
62 | /build/
63 | /dist/
64 | *.spec
65 | 
66 | # Sublime Text
67 | *.sublime-workspace
68 | 
69 | # Eclipse
70 | .settings
71 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/mirrors-isort
 3 |     rev: v5.10.1
 4 |     hooks:
 5 |     -   id: isort
 6 |         args: ['--multi-line=3', '--trailing-comma', '--force-grid-wrap=0', '--use-parentheses', '--line-width=88']
 7 | 
 8 | 
 9 | -   repo: https://github.com/pre-commit/pre-commit-hooks
10 |     rev: v5.0.0
11 |     hooks:
12 |     - id: trailing-whitespace
13 |     - id: check-added-large-files
14 |     - id: check-yaml
15 |     - id: mixed-line-ending
16 |       args: ['--fix=lf']
17 | 
18 | -   repo: https://github.com/humitos/mirrors-autoflake.git
19 |     rev: v1.1
20 |     hooks:
21 |     -   id: autoflake
22 |         args: ['--in-place', '--remove-all-unused-imports']
23 | 
24 | -   repo: https://github.com/psf/black
25 |     rev: 25.1.0
26 |     hooks:
27 |     - id: black
28 |       language_version: python3.9
29 | 
30 | -   repo: https://github.com/pre-commit/mirrors-mypy
31 |     rev: v1.15.0
32 |     hooks:
33 |     -   id: mypy
34 |         files: flupy/
35 |         args: ["--config-file", "mypy.ini"]
36 | 
37 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | build:
2 |     image: latest
3 | python:
4 |     version: 3.8
5 |     setup_py_install: true
6 | 


--------------------------------------------------------------------------------
/.version:
--------------------------------------------------------------------------------
1 | 1.0.11
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # For Contributors
 2 | 
 3 | ## Setup
 4 | 
 5 | ### Requirements
 6 | 
 7 | * Make:
 8 |     * Windows: http://mingw.org/download/installer
 9 |     * Mac: http://developer.apple.com/xcode
10 |     * Linux: http://www.gnu.org/software/make
11 | * pipenv: http://docs.pipenv.org
12 | * Pandoc: http://johnmacfarlane.net/pandoc/installing.html
13 | * Graphviz: http://www.graphviz.org/Download.php
14 | 
15 | To confirm these system dependencies are configured correctly:
16 | 
17 | ```sh
18 | $ make doctor
19 | ```
20 | 
21 | ### Installation
22 | 
23 | Install project dependencies into a virtual environment:
24 | 
25 | ```sh
26 | $ make install
27 | ```
28 | 
29 | ## Development Tasks
30 | 
31 | ### Testing
32 | 
33 | Manually run the tests:
34 | 
35 | ```sh
36 | $ make test
37 | ```
38 | 
39 | or keep them running on change:
40 | 
41 | ```sh
42 | $ make watch
43 | ```
44 | 
45 | > In order to have OS X notifications, `brew install terminal-notifier`.
46 | 
47 | ### Documentation
48 | 
49 | Build the documentation:
50 | 
51 | ```sh
52 | $ make docs
53 | ```
54 | 
55 | ### Static Analysis
56 | 
57 | Run linters and static analyzers:
58 | 
59 | ```sh
60 | $ make pylint
61 | $ make pycodestyle
62 | $ make pydocstyle
63 | $ make check  # includes all checks
64 | ```
65 | 
66 | ## Continuous Integration
67 | 
68 | The CI server will report overall build status:
69 | 
70 | ```sh
71 | $ make ci
72 | ```
73 | 
74 | ## Release Tasks
75 | 
76 | Release to PyPI:
77 | 
78 | ```sh
79 | $ make upload
80 | ```
81 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # License
 2 | 
 3 | **The MIT License (MIT)**
 4 | 
 5 | Copyright &copy; 2017, Oliver Rice
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in
15 | all copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 | THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # flupy
 2 | 
 3 | <p>
 4 | 
 5 | <a href="https://flupy.readthedocs.io/en/latest/?badge=latest"><img src="https://readthedocs.org/projects/flupy/badge/?version=latest" alt="Tests" height="18"></a>
 6 | <a href="https://codecov.io/gh/olirice/flupy"><img src="https://codecov.io/gh/olirice/flupy/branch/master/graph/badge.svg" height="18"></a>
 7 | <a href="https://github.com/psf/black">
 8 |         <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Codestyle Black" height="18">
 9 |     </a>
10 | </p>
11 | 
12 | <p>
13 |     <a href="https://www.python.org/downloads/"><img src="https://img.shields.io/badge/python-3.6+-blue.svg" alt="Python version" height="18"></a>
14 |   <a href="https://badge.fury.io/py/flupy"><img src="https://badge.fury.io/py/flupy.svg" alt="PyPI version" height="18"></a>
15 |     <a href="https://github.com/olirice/flupy/blob/master/LICENSE"><img src="https://img.shields.io/pypi/l/markdown-subtemplate.svg" alt="License" height="18"></a>
16 |     <a href="https://pypi.org/project/flupy/"><img src="https://img.shields.io/pypi/dm/flupy.svg" alt="Download count" height="18"></a>
17 | </p>
18 | 
19 | ---
20 | 
21 | **Documentation**: <a href="https://flupy.readthedocs.io/en/latest/" target="_blank">https://flupy.readthedocs.io/en/latest/</a>
22 | 
23 | **Source Code**: <a href="https://github.com/olirice/flupy" target="_blank">https://github.com/olirice/flupy</a>
24 | 
25 | ---
26 | 
27 | ## Overview
28 | Flupy implements a [fluent interface](https://en.wikipedia.org/wiki/Fluent_interface) for operating on python iterables. All flupy methods return generators and are evaluated lazily. This allows expressions to transform arbitrary size data in extremely limited memory.
29 | 
30 | You can think of flupy as a light weight, 0 dependency, pure python alternative to the excellent [Apache Spark](https://spark.apache.org/) project.
31 | 
32 | ## Setup
33 | 
34 | ### Requirements
35 | 
36 | * Python 3.6+
37 | 
38 | ### Installation
39 | 
40 | Install flupy with pip:
41 | ```sh
42 | $ pip install flupy
43 | ```
44 | 
45 | ### Library
46 | ```python
47 | from itertools import count
48 | from flupy import flu
49 | 
50 | # Processing an infinite sequence in constant memory
51 | pipeline = (
52 |     flu(count())
53 |     .map(lambda x: x**2)
54 |     .filter(lambda x: x % 517 == 0)
55 |     .chunk(5)
56 |     .take(3)
57 | )
58 | 
59 | for item in pipeline:
60 |   print(item)
61 | 
62 | # Returns:
63 | # [0, 267289, 1069156, 2405601, 4276624]
64 | # [6682225, 9622404, 13097161, 17106496, 21650409]
65 | # [26728900, 32341969, 38489616, 45171841, 52388644]
66 | ```
67 | 
68 | ### CLI
69 | The flupy command line interface brings the same syntax for lazy piplines to your shell. Inputs to the `flu` command are auto-populated into a `Fluent` context named `_`.
70 | ````
71 | $ flu -h
72 | usage: flu [-h] [-f FILE] [-i [IMPORT [IMPORT ...]]] command
73 | 
74 | flupy: a fluent interface for python
75 | 
76 | positional arguments:
77 |   command               flupy command to execute on input
78 | 
79 | optional arguments:
80 |   -h, --help            show this help message and exit
81 |   -f FILE, --file FILE  path to input file
82 |   -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]]
83 |                         modules to import
84 |                         Syntax: <module>:<object>:<alias>
85 |                         Examples:
86 |                                 'import os' = '-i os'
87 |                                 'import os as op_sys' = '-i os::op_sys'
88 |                                 'from os import environ' = '-i os:environ'
89 |                                 'from os import environ as env' = '-i os:environ:env'
90 | ````
91 | 


--------------------------------------------------------------------------------
/benchmark/test_benchmark.py:
--------------------------------------------------------------------------------
  1 | from itertools import cycle
  2 | 
  3 | from flupy import flu
  4 | 
  5 | 
  6 | def test_integration(benchmark):
  7 |     @benchmark
  8 |     def work():
  9 |         (flu(range(100000)).chunk(100).chunk(2).map_item(0).count())
 10 | 
 11 | 
 12 | def test_max(benchmark):
 13 |     @benchmark
 14 |     def work():
 15 |         flu(range(300000)).max()
 16 | 
 17 | 
 18 | def test_initialize(benchmark):
 19 |     @benchmark
 20 |     def work():
 21 |         flu(range(10))
 22 | 
 23 | 
 24 | def test_collect(benchmark):
 25 |     @benchmark
 26 |     def work():
 27 |         flu(range(3)).collect()
 28 | 
 29 | 
 30 | def test___getitem__(benchmark):
 31 |     @benchmark
 32 |     def work():
 33 |         flu(range(350))[1:3].collect()
 34 | 
 35 | 
 36 | def test_sum(benchmark):
 37 |     @benchmark
 38 |     def work():
 39 |         gen = flu(range(1000)).sum()
 40 | 
 41 | 
 42 | def test_reduce(benchmark):
 43 |     @benchmark
 44 |     def work():
 45 |         flu(range(50)).reduce(lambda x, y: x + y)
 46 | 
 47 | 
 48 | def test_fold_left(benchmark):
 49 |     @benchmark
 50 |     def work():
 51 |         flu(range(5)).fold_left(lambda x, y: x + y, 0)
 52 | 
 53 | 
 54 | def test_count(benchmark):
 55 |     @benchmark
 56 |     def work():
 57 |         gen = flu(range(3000)).count()
 58 | 
 59 | 
 60 | def test_min(benchmark):
 61 |     @benchmark
 62 |     def work():
 63 |         flu(range(3000)).min()
 64 | 
 65 | 
 66 | def test_first(benchmark):
 67 |     @benchmark
 68 |     def work():
 69 |         flu(range(3)).first()
 70 | 
 71 | 
 72 | def test_last(benchmark):
 73 |     @benchmark
 74 |     def work():
 75 |         flu(range(3000)).last()
 76 | 
 77 | 
 78 | def test_head(benchmark):
 79 |     @benchmark
 80 |     def work():
 81 |         flu(range(30000)).head(n=10)
 82 | 
 83 | 
 84 | def test_tail(benchmark):
 85 |     @benchmark
 86 |     def work():
 87 |         gen = flu(range(30000)).tail(n=10)
 88 | 
 89 | 
 90 | def test_unique(benchmark):
 91 |     class NoHash:
 92 |         def __init__(self, letter, keyf):
 93 |             self.letter = letter
 94 |             self.keyf = keyf
 95 | 
 96 |     a = NoHash("a", 1)
 97 |     b = NoHash("b", 1)
 98 |     c = NoHash("c", 2)
 99 | 
100 |     data = [x % 500 for x in range(10000)]
101 | 
102 |     @benchmark
103 |     def work():
104 |         gen = flu(data).unique().collect()
105 | 
106 | 
107 | def test_sort(benchmark):
108 |     @benchmark
109 |     def work():
110 |         flu(range(3000, 0, -1)).sort().collect()
111 | 
112 | 
113 | def test_shuffle(benchmark):
114 |     original_order = list(range(10000))
115 | 
116 |     @benchmark
117 |     def work():
118 |         flu(original_order).shuffle().collect()
119 | 
120 | 
121 | def test_map(benchmark):
122 |     @benchmark
123 |     def work():
124 |         flu(range(3)).map(lambda x: x + 2).collect()
125 | 
126 | 
127 | def test_rate_limit(benchmark):
128 |     @benchmark
129 |     def work():
130 |         flu(range(300)).rate_limit(50000000000000).collect()
131 | 
132 | 
133 | def test_map_item(benchmark):
134 |     data = flu(range(300)).map(lambda x: {"a": x})
135 | 
136 |     @benchmark
137 |     def work():
138 |         gen = flu(data).map_item("a")
139 | 
140 | 
141 | def test_map_attr(benchmark):
142 |     class Person:
143 |         def __init__(self, age: int) -> None:
144 |             self.age = age
145 | 
146 |     people = flu(range(200)).map(Person).collect()
147 | 
148 |     @benchmark
149 |     def work():
150 |         flu(people).map_attr("age").collect()
151 | 
152 | 
153 | def test_filter(benchmark):
154 |     @benchmark
155 |     def work():
156 |         flu(range(3)).filter(lambda x: 0 < x < 2).collect()
157 | 
158 | 
159 | def test_take(benchmark):
160 |     @benchmark
161 |     def work():
162 |         flu(range(10)).take(5).collect()
163 | 
164 | 
165 | def test_take_while(benchmark):
166 |     @benchmark
167 |     def work():
168 |         flu(cycle(range(10))).take_while(lambda x: x < 4).collect()
169 | 
170 | 
171 | def test_drop_while(benchmark):
172 |     @benchmark
173 |     def work():
174 |         flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4).collect()
175 | 
176 | 
177 | def test_group_by(benchmark):
178 |     @benchmark
179 |     def work():
180 |         flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]).collect()
181 | 
182 | 
183 | def test_chunk(benchmark):
184 |     @benchmark
185 |     def work():
186 |         flu(range(500)).chunk(2).collect()
187 | 
188 | 
189 | def test_enumerate(benchmark):
190 |     @benchmark
191 |     def work():
192 |         flu(range(3)).enumerate(start=1).collect()
193 | 
194 | 
195 | def test_zip(benchmark):
196 |     @benchmark
197 |     def work():
198 |         flu(range(3)).zip(range(3)).collect()
199 | 
200 | 
201 | def test_zip_longest(benchmark):
202 |     @benchmark
203 |     def work():
204 |         flu(range(3)).zip_longest(range(5)).collect()
205 | 
206 | 
207 | def test_window(benchmark):
208 |     @benchmark
209 |     def work():
210 |         gen = flu(range(5)).window(n=3, step=3).collect
211 | 
212 | 
213 | def test_flatten(benchmark):
214 |     nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)]
215 | 
216 |     @benchmark
217 |     def work():
218 |         gen = flu(nested).flatten(depth=2, base_type=tuple).collect()
219 | 
220 | 
221 | def test_tee(benchmark):
222 |     @benchmark
223 |     def work():
224 |         gen1, gen2, gen3 = flu(range(100)).tee(3)
225 | 
226 | 
227 | def test_join_left(benchmark):
228 |     @benchmark
229 |     def work():
230 |         flu(range(6)).join_left(range(0, 6, 2)).collect()
231 | 
232 | 
233 | def test_join_inner(benchmark):
234 |     @benchmark
235 |     def work():
236 |         flu(range(6)).join_inner(range(0, 6, 2)).collect()
237 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/flupy.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/more-itertools.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/more-itertools"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/more-itertools"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | =============
 2 | API Reference
 3 | =============
 4 | 
 5 | .. automodule:: flupy
 6 | 
 7 | 
 8 | Container
 9 | =========
10 | 
11 | .. autoclass:: flu
12 | 
13 | ----
14 | 
15 | 
16 | Grouping
17 | ========
18 | 
19 | .. automethod:: flu.chunk
20 | .. automethod:: flu.flatten
21 | .. automethod:: flu.denormalize
22 | .. automethod:: flu.group_by
23 | .. automethod:: flu.window
24 | 
25 | ----
26 | 
27 | Selecting
28 | =========
29 | 
30 | .. automethod:: flu.filter
31 | .. automethod:: flu.take
32 | .. automethod:: flu.take_while
33 | .. automethod:: flu.drop_while
34 | .. automethod:: flu.unique
35 | 
36 | ----
37 | 
38 | Transforming
39 | ============
40 | 
41 | .. automethod:: flu.enumerate
42 | .. automethod:: flu.join_left
43 | .. automethod:: flu.join_inner
44 | .. automethod:: flu.map
45 | .. automethod:: flu.map_attr
46 | .. automethod:: flu.map_item
47 | .. automethod:: flu.zip
48 | .. automethod:: flu.zip_longest
49 | 
50 | ----
51 | 
52 | Side Effects
53 | ============
54 | 
55 | .. automethod:: flu.rate_limit
56 | .. automethod:: flu.side_effect
57 | 
58 | ----
59 | 
60 | Summarizing
61 | ===========
62 | 
63 | .. automethod:: flu.count
64 | .. automethod:: flu.sum
65 | .. automethod:: flu.min
66 | .. automethod:: flu.max
67 | .. automethod:: flu.reduce
68 | .. automethod:: flu.fold_left
69 | .. automethod:: flu.first
70 | .. automethod:: flu.last
71 | .. automethod:: flu.head
72 | .. automethod:: flu.tail
73 | .. automethod:: flu.to_list
74 | .. automethod:: flu.collect
75 | 
76 | ----
77 | 
78 | Non-Constant Memory
79 | ===================
80 | 
81 | .. automethod:: flu.group_by
82 | .. automethod:: flu.join_left
83 | .. automethod:: flu.join_inner
84 | .. automethod:: flu.shuffle
85 | .. automethod:: flu.sort
86 | .. automethod:: flu.tee
87 | .. automethod:: flu.unique
88 | 


--------------------------------------------------------------------------------
/docs/cli.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Command Line
 3 | ============
 4 | 
 5 | 
 6 | The flupy CLI is a platform agnostic application that give full access to the flupy API and python from your shell.
 7 | 
 8 | .. automodule:: flupy
 9 | 
10 | Usage
11 | =====
12 | 
13 | ::
14 | 
15 | 	$ flu -h
16 | 
17 | 	usage: flu [-h] [-v] [-f FILE] [-i [IMPORT [IMPORT ...]]] command
18 | 
19 | 	flupy: a fluent interface for python
20 | 
21 | 	positional arguments:
22 | 	  command               command to execute against input
23 | 
24 | 	optional arguments:
25 | 	  -h, --help            show this help message and exit
26 | 	  -v, --version         show program's version number and exit
27 | 	  -f FILE, --file FILE  path to input file
28 | 	  -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]]
29 | 							modules to import
30 | 							Syntax: <module>:<object>:<alias>
31 | 
32 | 
33 | Basic Examples
34 | ==============
35 | 
36 | When input data are provided to the `flu` command, an instance of the flu object is preprepared with that input and stored in the the variable `_`.
37 | 
38 | 
39 | .. note:: for more information on writing flupy commands, see API Reference
40 | 
41 | Piping from another command (stdin)
42 | -----------------------------------
43 | Example: Show lines of a log file that are errors::
44 | 
45 |     $ cat logs.txt | flu '_.filter(lambda x: x.starswith("ERROR"))'
46 | 
47 | Reading from a file
48 | -------------------
49 | Example: Show lines of a log file that are errors::
50 | 
51 |     $ flu -f logs.txt '_.filter(lambda x: x.starswith("ERROR"))'
52 | 
53 | No Input data
54 | -------------
55 | flupy does not require input data if it can be generated from within python e.g. with `range(10)`. When no input data are provided, iterable at the beginning of the flupy command must be wraped into a flu instance.
56 | 
57 | Example: Even integers less than 10::
58 | 
59 |     $ flu 'flu(range(10)).filter(lambda x: x%2==0)'
60 | 
61 | Import System
62 | =============
63 | 
64 | Passing `-i` or `--import` to the cli allows you to import standard and third party libraries installed in the same environment.
65 | 
66 | Import syntax
67 | 
68 |         -i <module>:<object>:<alias>
69 | 
70 | 
71 | .. note:: for multiple imports pass `-i` multiple times
72 | 
73 | Import Examples
74 | ---------------
75 | **import os**::
76 | 
77 |     $ flu 'flu(os.environ)' -i os
78 | 
79 | **from os import environ**::
80 | 
81 |     $ flu 'flu(environ)' -i os:environ
82 | 
83 | **from os import environ as env**::
84 | 
85 |     $ flu 'flu(env)' -i os:environ:env
86 | 
87 | **import os as opsys**::
88 | 
89 |     $ flu 'flu(opsys.environ)' -i os::opsys
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # This file is execfile()d with the current directory set to its containing dir.
  4 | #
  5 | # Note that not all possible configuration values are present in this
  6 | # autogenerated file.
  7 | #
  8 | # All configuration values have a default; values that are commented out
  9 | # serve to show the default.
 10 | 
 11 | import os
 12 | import sys
 13 | from typing import Dict
 14 | 
 15 | import sphinx_rtd_theme
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | sys.path.insert(0, os.path.abspath(".."))
 21 | sys.path.insert(0, os.path.abspath("../src"))
 22 | 
 23 | # -- General configuration -----------------------------------------------------
 24 | 
 25 | # If your documentation needs a minimal Sphinx version, state it here.
 26 | # needs_sphinx = '1.0'
 27 | 
 28 | # Add any Sphinx extension module names here, as strings. They can be extensions
 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 30 | extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme", "sphinx.ext.viewcode"]
 31 | 
 32 | # Add any paths that contain templates here, relative to this directory.
 33 | templates_path = ["_templates"]
 34 | 
 35 | # The suffix of source filenames.
 36 | source_suffix = ".rst"
 37 | 
 38 | # The encoding of source files.
 39 | # source_encoding = 'utf-8-sig'
 40 | 
 41 | # The master toctree document.
 42 | master_doc = "index"
 43 | 
 44 | # General information about the project.
 45 | project = "flupy"
 46 | copyright = "2021, Oliver Rice"
 47 | 
 48 | # The version info for the project you're documenting, acts as replacement for
 49 | # |version| and |release|, also used in various other places throughout the
 50 | # built documents.
 51 | #
 52 | # The short X.Y version.
 53 | version = "1.1.0"
 54 | # The full version, including alpha/beta/rc tags.
 55 | release = version
 56 | 
 57 | # The language for content autogenerated by Sphinx. Refer to documentation
 58 | # for a list of supported languages.
 59 | # language = None
 60 | 
 61 | # There are two options for replacing |today|: either, you set today to some
 62 | # non-false value, then it is used:
 63 | # today = ''
 64 | # Else, today_fmt is used as the format for a strftime call.
 65 | # today_fmt = '%B %d, %Y'
 66 | 
 67 | # List of patterns, relative to source directory, that match files and
 68 | # directories to ignore when looking for source files.
 69 | exclude_patterns = ["_build"]
 70 | 
 71 | # The reST default role (used for this markup: `text`) to use for all documents.
 72 | # default_role = None
 73 | 
 74 | # If true, '()' will be appended to :func: etc. cross-reference text.
 75 | # add_function_parentheses = True
 76 | 
 77 | # If true, the current module name will be prepended to all description
 78 | # unit titles (such as .. function::).
 79 | # add_module_names = True
 80 | 
 81 | # If true, sectionauthor and moduleauthor directives will be shown in the
 82 | # output. They are ignored by default.
 83 | # show_authors = False
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = "sphinx"
 87 | 
 88 | # A list of ignored prefixes for module index sorting.
 89 | # modindex_common_prefix = []
 90 | 
 91 | 
 92 | # -- Options for HTML output ---------------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 95 | # a list of builtin themes.
 96 | html_theme = "sphinx_rtd_theme"
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | # html_theme_options = {}
102 | 
103 | # Add any paths that contain custom themes here, relative to this directory.
104 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
105 | 
106 | # The name for this set of Sphinx documents.  If None, it defaults to
107 | # "<project> v<release> documentation".
108 | # html_title = None
109 | 
110 | # A shorter title for the navigation bar.  Default is the same as html_title.
111 | # html_short_title = None
112 | 
113 | # The name of an image file (relative to this directory) to place at the top
114 | # of the sidebar.
115 | # html_logo = None
116 | 
117 | # The name of an image file (within the static path) to use as favicon of the
118 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
119 | # pixels large.
120 | # html_favicon = None
121 | 
122 | # Add any paths that contain custom static files (such as style sheets) here,
123 | # relative to this directory. They are copied after the builtin static files,
124 | # so a file named "default.css" will overwrite the builtin "default.css".
125 | # html_static_path = ["_static"]
126 | html_static_path = []
127 | 
128 | html_context = {
129 |     # https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html
130 |     # "css_files": ["_static/theme_overrides.css"]
131 |     "css_files": []
132 | }
133 | 
134 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
135 | # using the given strftime format.
136 | # html_last_updated_fmt = '%b %d, %Y'
137 | 
138 | # If true, SmartyPants will be used to convert quotes and dashes to
139 | # typographically correct entities.
140 | # html_use_smartypants = True
141 | 
142 | # Custom sidebar templates, maps document names to template names.
143 | # html_sidebars = {}
144 | 
145 | # Additional templates that should be rendered to pages, maps page names to
146 | # template names.
147 | # html_additional_pages = {}
148 | 
149 | # If false, no module index is generated.
150 | # html_domain_indices = True
151 | 
152 | # If false, no index is generated.
153 | # html_use_index = True
154 | 
155 | # If true, the index is split into individual pages for each letter.
156 | # html_split_index = False
157 | 
158 | # If true, links to the reST sources are added to the pages.
159 | # html_show_sourcelink = True
160 | 
161 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
162 | # html_show_sphinx = True
163 | 
164 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
165 | # html_show_copyright = True
166 | 
167 | # If true, an OpenSearch description file will be output, and all pages will
168 | # contain a <link> tag referring to it.  The value of this option must be the
169 | # base URL from which the finished HTML is served.
170 | # html_use_opensearch = ''
171 | 
172 | # This is the file name suffix for HTML files (e.g. ".xhtml").
173 | # html_file_suffix = None
174 | 
175 | # Output file base name for HTML help builder.
176 | htmlhelp_basename = "flupydoc"
177 | 
178 | 
179 | # -- Options for LaTeX output --------------------------------------------------
180 | 
181 | latex_elements: Dict[str, str] = {
182 |     # The paper size ('letterpaper' or 'a4paper').
183 |     #'papersize': 'letterpaper',
184 |     # The font size ('10pt', '11pt' or '12pt').
185 |     #'pointsize': '10pt',
186 |     # Additional stuff for the LaTeX preamble.
187 |     #'preamble': '',
188 | }
189 | 
190 | # Grouping the document tree into LaTeX files. List of tuples
191 | # (source start file, target name, title, author, documentclass [howto/manual]).
192 | latex_documents = [("index", "flupy.tex", "flupy Documentation", "Oliver Rice", "manual")]
193 | 
194 | # The name of an image file (relative to this directory) to place at the top of
195 | # the title page.
196 | # latex_logo = None
197 | 
198 | # For "manual" documents, if this is true, then toplevel headings are parts,
199 | # not chapters.
200 | # latex_use_parts = False
201 | 
202 | # If true, show page references after internal links.
203 | # latex_show_pagerefs = False
204 | 
205 | # If true, show URL addresses after external links.
206 | # latex_show_urls = False
207 | 
208 | # Documents to append as an appendix to all manuals.
209 | # latex_appendices = []
210 | 
211 | # If false, no module index is generated.
212 | # latex_domain_indices = True
213 | 
214 | 
215 | # -- Options for manual page output --------------------------------------------
216 | 
217 | # One entry per manual page. List of tuples
218 | # (source start file, name, description, authors, manual section).
219 | man_pages = [("index", "flupy", "flupy Documentation", ["Oliver Rice"], 1)]
220 | 
221 | # If true, show URL addresses after external links.
222 | # man_show_urls = False
223 | 
224 | 
225 | # -- Options for Texinfo output ------------------------------------------------
226 | 
227 | # Grouping the document tree into Texinfo files. List of tuples
228 | # (source start file, target name, title, author,
229 | #  dir menu entry, description, category)
230 | texinfo_documents = [
231 |     (
232 |         "index",
233 |         "flupy",
234 |         "flupy Documentation",
235 |         "Oliver Rice",
236 |         "flupy",
237 |         "A fluent interface to python collections.",
238 |         "Miscellaneous",
239 |     )
240 | ]
241 | 
242 | # Documents to append as an appendix to all manuals.
243 | # texinfo_appendices = []
244 | 
245 | # If false, no module index is generated.
246 | # texinfo_domain_indices = True
247 | 
248 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
249 | # texinfo_show_urls = 'footnote'
250 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. include:: ./welcome.rst
 2 | 
 3 | Contents
 4 | ========
 5 | 
 6 | .. toctree::
 7 |     :maxdepth: 2
 8 | 
 9 |     welcome
10 |     api
11 |     cli
12 | 
13 | 
14 | .. toctree::
15 |     :maxdepth: 1
16 | 
17 |     license
18 |     influances
19 |     versions
20 | 


--------------------------------------------------------------------------------
/docs/influances.rst:
--------------------------------------------------------------------------------
 1 | =================
 2 | Design Influances
 3 | =================
 4 | 
 5 | - more-itertools_
 6 | - pyspark_
 7 | - pydash_
 8 | - sqlalchemy_
 9 | - scala_
10 | 
11 | .. _fluent: https://en.wikipedia.org/wiki/Fluent_interface
12 | .. _more-itertools: https://github.com/erikrose/more-itertools
13 | .. _pyspark: http://spark.apache.org/docs/2.2.0/api/python/pyspark.html
14 | .. _sqlalchemy: https://www.sqlalchemy.org/
15 | .. _pydash: https://pydash.readthedocs.io/en/latest/index.html
16 | .. _scala: https://www.scala-lang.org/
17 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | License
 3 | =======
 4 | 
 5 | flupy is under the MIT License. See the LICENSE file.
 6 | 
 7 | Conditions for Contributors
 8 | ===========================
 9 | 
10 | By contributing to this software project, you are agreeing to the following
11 | terms and conditions for your contributions: First, you agree your
12 | contributions are submitted under the MIT license. Second, you represent you
13 | are authorized to make the contributions and grant the license. If your
14 | employer has rights to intellectual property that includes your contributions,
15 | you represent that you have received permission to make contributions and grant
16 | the required license on behalf of that employer.
17 | 


--------------------------------------------------------------------------------
/docs/versions.rst:
--------------------------------------------------------------------------------
 1 | ===============
 2 | Version History
 3 | ===============
 4 | 
 5 | .. automodule:: flupy
 6 | 
 7 | 1.0.0
 8 | -----
 9 | 
10 | * New Capabilities:
11 |     * Everything
12 | 
13 | 
14 | 1.1.0
15 | -----
16 | 
17 | * Remove support for calling instance methods on uninitialized flu class passing an interable as the *self* argument
18 | * Remove `flupy.Fluent` from top level `flupy` public API
19 | * Remove `flupy.with_iter` from API
20 | 
21 | 
22 | 1.1.2
23 | -----
24 | 
25 | * Change `Fluent` class name to `flu` and remove class alias to improve docs readability
26 | * Add type hints for `flu.sum`
27 | 


--------------------------------------------------------------------------------
/docs/welcome.rst:
--------------------------------------------------------------------------------
 1 | ================
 2 | Welcome to Flupy
 3 | ================
 4 | 
 5 | flupy is a lightweight library and CLI for implementing python data pipelines with a fluent interface.
 6 | 
 7 | 
 8 | Under the hood, flupy is built on generators. That means its pipelines evaluate lazily and use a constant amount of memory no matter how much data are being processed. This allows flupy to tackle Petabyte scale data manipulation as easily as it operates on a small list.
 9 | 
10 | API
11 | ===
12 | ::
13 | 
14 |     import json
15 |     from flupy import flu
16 | 
17 |     logs = open('logs.jl', 'r')
18 | 
19 |     error_count = (
20 |         flu(logs)
21 |         .map(lambda x: json.loads(x))
22 |         .filter(lambda x: x['level'] == 'ERROR')
23 |         .count()
24 |     )
25 | 
26 |     print(error_count)
27 |     # 14
28 | 
29 | 
30 | CLI
31 | ===
32 | 
33 | The flupy library, and python runtime, are also accessible from `flu` command line utility::
34 | 
35 |     $ cat logs.txt | flu "_.filter(lambda x: x.startswith('ERROR'))"
36 | 
37 | 
38 | For more information about the `flu` command see :doc:`command line <./cli>`.
39 | 
40 | 
41 | Getting Started
42 | ===============
43 | 
44 | **Requirements**
45 | 
46 | Python 3.6+
47 | 
48 | **Installation**
49 | ::
50 | 
51 |     $ pip install flupy
52 | 
53 | 
54 | Example
55 | =======
56 | 
57 | Since 2008, what domains are our customers comming from?::
58 | 
59 | 
60 |     from flupy import flu
61 | 
62 |     customers = [
63 |         {'name': 'Jane', 'signup_year': 2018, 'email': 'jane@ibm.com'},
64 |         {'name': 'Fred', 'signup_year': 2011, 'email': 'fred@google.com'},
65 |         {'name': 'Lisa', 'signup_year': 2014, 'email': 'jane@ibm.com'},
66 |         {'name': 'Jack', 'signup_year': 2007, 'email': 'jane@apple.com'},
67 |     ]
68 | 
69 |     pipeline = (
70 |         flu(customers)
71 |         .filter(lambda x: x['signup_year'] > 2008)
72 |         .map_item('email')
73 |         .map(lambda x: x.partition('@')[2])
74 |         .group_by() # defaults to identity
75 |         .map(lambda x: (x[0], x[1].count()))
76 |         .collect()
77 |     )
78 | 
79 |     print(pipeline)
80 |     # [('google.com', 1), ('ibm.com', 2)]
81 | 
82 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | ignore_missing_imports = True
 3 | strict_optional = True
 4 | follow_imports = skip
 5 | warn_redundant_casts = True
 6 | warn_unused_ignores = False
 7 | check_untyped_defs = True
 8 | no_implicit_reexport = True
 9 | 
10 | # Strict Mode:
11 | disallow_untyped_defs = True
12 | disallow_any_generics = True
13 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "flupy"
 3 | version = "1.2.2"
 4 | description = "Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining"
 5 | authors = ["Oliver Rice <oliver@oliverrice.com>"]
 6 | license = "MIT"
 7 | readme = "README.md"
 8 | repository = "https://github.com/olirice/flupy"
 9 | packages = [{include = "flupy", from = "src"}]
10 | classifiers = [
11 |     "Development Status :: 4 - Beta",
12 |     "Natural Language :: English",
13 |     "Operating System :: OS Independent",
14 |     "Programming Language :: Python",
15 |     "Programming Language :: Python :: 3",
16 |     "Programming Language :: Python :: 3.9",
17 |     "Programming Language :: Python :: 3.10",
18 |     "Programming Language :: Python :: 3.11",
19 |     "Programming Language :: Python :: 3.12",
20 |     "Programming Language :: Python :: 3.13",
21 | ]
22 | 
23 | [tool.poetry.dependencies]
24 | python = ">=3.9"
25 | typing_extensions = ">=4"
26 | 
27 | [tool.poetry.group.dev.dependencies]
28 | pytest = "*"
29 | pytest-cov = "*"
30 | pytest-benchmark = "*"
31 | pre-commit = "*"
32 | pylint = "*"
33 | black = "*"
34 | mypy = "*"
35 | 
36 | [tool.poetry.scripts]
37 | flu = "flupy.cli.cli:main"
38 | flu_precommit = "flupy.cli.cli:precommit"
39 | 
40 | [build-system]
41 | requires = ["poetry-core>=2.0.0"]
42 | build-backend = "poetry.core.masonry.api"
43 | 
44 | [tool.black]
45 | line-length = 120
46 | exclude = '''
47 | /(
48 |     \.git
49 |   | \.hg
50 |   | \.mypy_cache
51 |   | \.tox
52 |   | \.venv
53 |   | _build
54 |   | buck-out
55 |   | build
56 |   | dist
57 | )/
58 | '''
59 | 
60 | [tool.mypy]
61 | python_version = "3.9"
62 | ignore_missing_imports = true
63 | strict_optional = true
64 | follow_imports = "skip"
65 | warn_redundant_casts = true
66 | warn_unused_ignores = false
67 | check_untyped_defs = true
68 | no_implicit_reexport = true
69 | disallow_untyped_defs = true
70 | disallow_any_generics = true
71 | 
72 | [tool.pytest.ini_options]
73 | addopts = "--cov=src/flupy src/tests"
74 | 
75 | [tool.coverage.report]
76 | exclude_lines = [
77 |     "pragma: no cover",
78 |     "if TYPE_CHECKING:",
79 |     "raise AssertionError",
80 |     "raise NotImplementedError",
81 |     "@overload",
82 |     "pass",
83 | ]
84 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = --cov=src/flupy src/tests
3 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description_file = README.md
3 | 


--------------------------------------------------------------------------------
/src/flupy/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib.metadata import version
 2 | 
 3 | from flupy.cli.utils import walk_dirs, walk_files
 4 | from flupy.fluent import flu
 5 | 
 6 | __project__ = "flupy"
 7 | __version__ = version(__project__)
 8 | 
 9 | __all__ = ["flu", "walk_files", "walk_dirs"]
10 | 


--------------------------------------------------------------------------------
/src/flupy/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olirice/flupy/0205b524c7a9da547bfe0922a02285eec0ca6925/src/flupy/cli/__init__.py


--------------------------------------------------------------------------------
/src/flupy/cli/cli.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import importlib
 3 | import sys
 4 | from signal import SIG_DFL, SIGPIPE, signal
 5 | from typing import Any, Dict, Generator, List, Optional
 6 | 
 7 | from flupy import __version__, flu, walk_dirs, walk_files
 8 | 
 9 | 
10 | def read_file(path: str) -> Generator[str, None, None]:
11 |     """Yield lines from a file given its path"""
12 |     with open(path, "r") as f:
13 |         yield from f
14 | 
15 | 
16 | def parse_args(args: List[str]) -> argparse.Namespace:
17 |     """Parse input arguments"""
18 |     parser = argparse.ArgumentParser(
19 |         description="flupy: a fluent interface for python collections",
20 |         formatter_class=argparse.RawTextHelpFormatter,
21 |     )
22 |     parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__)
23 |     parser.add_argument("command", help="command to execute against input")
24 |     parser.add_argument("-f", "--file", help="path to input file")
25 |     parser.add_argument(
26 |         "-i",
27 |         "--import",
28 |         nargs="*",
29 |         default=[],
30 |         help="modules to import\n"
31 |         "Syntax: <module>:<object>:<alias>\n"
32 |         "Examples:\n"
33 |         "\t'import os' = '-i os'\n"
34 |         "\t'import os as op_sys' = '-i os::op_sys'\n"
35 |         "\t'from os import environ' = '-i os:environ'\n"
36 |         "\t'from os import environ as env' = '-i os:environ:env'\n",
37 |     )
38 |     return parser.parse_args(args)
39 | 
40 | 
41 | def build_import_dict(imps: List[str]) -> Dict[str, Any]:
42 |     """Execute CLI scoped imports"""
43 |     import_dict = {}
44 |     for imp_stx in imps:
45 |         module, _, obj_alias = imp_stx.partition(":")
46 |         obj, _, alias = obj_alias.partition(":")
47 | 
48 |         if not obj:
49 |             import_dict[alias or module] = importlib.import_module(module)
50 |         else:
51 |             _garb = importlib.import_module(module)
52 |             import_dict[alias or obj] = getattr(_garb, obj)
53 |     return import_dict
54 | 
55 | 
56 | def main(argv: Optional[List[str]] = None) -> None:
57 |     """CLI Entrypoint"""
58 |     args = parse_args(argv[1:] if argv is not None else sys.argv[1:])
59 | 
60 |     _command = args.command
61 |     _file = args.file
62 |     _import = getattr(args, "import")
63 | 
64 |     import_dict = build_import_dict(_import)
65 | 
66 |     if _file:
67 |         _ = flu(read_file(_file)).map(str.rstrip)
68 |     else:
69 |         # Do not raise exception for Broken Pipe
70 |         signal(SIGPIPE, SIG_DFL)
71 |         _ = flu(sys.stdin).map(str.rstrip)
72 | 
73 |     locals_dict = {
74 |         "flu": flu,
75 |         "_": _,
76 |         "walk_files": walk_files,
77 |         "walk_dirs": walk_dirs,
78 |     }
79 | 
80 |     pipeline = eval(_command, import_dict, locals_dict)
81 | 
82 |     if hasattr(pipeline, "__iter__") and not isinstance(pipeline, (str, bytes)):
83 |         for r in pipeline:
84 |             sys.stdout.write(str(r) + "\n")
85 | 
86 |     elif pipeline is None:
87 |         pass
88 |     else:
89 |         sys.stdout.write(str(pipeline) + "\n")
90 | 


--------------------------------------------------------------------------------
/src/flupy/cli/utils.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=invalid-name
 2 | import os
 3 | from typing import Generator
 4 | 
 5 | from flupy.fluent import Fluent, flu
 6 | 
 7 | 
 8 | def walk_files(*pathes: str, abspath: bool = True) -> "Fluent[str]":
 9 |     """Yield files recursively starting from each location in *pathes"""
10 | 
11 |     if pathes == ():
12 |         pathes = (".",)
13 | 
14 |     def _impl() -> Generator[str, None, None]:
15 |         for path in pathes:
16 |             for d, _, files in os.walk(path):
17 |                 for x in files:
18 |                     rel_path = os.path.join(d, x)
19 |                     if abspath:
20 |                         yield os.path.abspath(rel_path)
21 |                     else:
22 |                         yield rel_path
23 | 
24 |     return flu(_impl())
25 | 
26 | 
27 | def walk_dirs(path: str = ".") -> "Fluent[str]":
28 |     """Yield files recursively starting from *path"""
29 | 
30 |     def _impl() -> Generator[str, None, None]:
31 |         for d, _, _ in os.walk(path):
32 |             yield d
33 | 
34 |     return flu(_impl())
35 | 


--------------------------------------------------------------------------------
/src/flupy/fluent.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=invalid-name
  2 | import time
  3 | from collections import defaultdict, deque
  4 | from collections.abc import Iterable as IterableType
  5 | from functools import reduce
  6 | from itertools import dropwhile, groupby, islice, product, takewhile, tee, zip_longest
  7 | from random import sample
  8 | from typing import (
  9 |     Any,
 10 |     Callable,
 11 |     Collection,
 12 |     Deque,
 13 |     Generator,
 14 |     Generic,
 15 |     Hashable,
 16 |     Iterable,
 17 |     Iterator,
 18 |     List,
 19 |     Optional,
 20 |     Set,
 21 |     Tuple,
 22 |     Type,
 23 |     TypeVar,
 24 |     Union,
 25 |     overload,
 26 | )
 27 | 
 28 | from typing_extensions import Concatenate, ParamSpec, Protocol
 29 | 
 30 | __all__ = ["flu"]
 31 | 
 32 | 
 33 | T = TypeVar("T")
 34 | T_co = TypeVar("T_co", covariant=True)
 35 | T_contra = TypeVar("T_contra", contravariant=True)
 36 | _T1 = TypeVar("_T1")
 37 | _T2 = TypeVar("_T2")
 38 | _T3 = TypeVar("_T3")
 39 | S = TypeVar("S")
 40 | P = ParamSpec("P")
 41 | 
 42 | CallableTakesIterable = Callable[[Iterable[T]], Collection[T]]
 43 | 
 44 | 
 45 | class SupportsEquality(Protocol):
 46 |     def __eq__(self, __other: object) -> bool:
 47 |         pass
 48 | 
 49 | 
 50 | class SupportsGetItem(Protocol[T_co]):
 51 |     def __getitem__(self, __k: Hashable) -> T_co:
 52 |         pass
 53 | 
 54 | 
 55 | class SupportsIteration(Protocol[T_co]):
 56 |     def __iter__(self) -> Iterator[T]:
 57 |         pass
 58 | 
 59 | 
 60 | class SupportsLessThan(Protocol):
 61 |     def __lt__(self, __other: Any) -> bool:
 62 |         pass
 63 | 
 64 | 
 65 | SupportsLessThanT = TypeVar("SupportsLessThanT", bound="SupportsLessThan")
 66 | 
 67 | 
 68 | class Empty:
 69 |     pass
 70 | 
 71 | 
 72 | def identity(x: T) -> T:
 73 |     return x
 74 | 
 75 | 
 76 | class Fluent(Generic[T]):
 77 |     """A fluent interface to lazy generator functions
 78 | 
 79 |     >>> from flupy import flu
 80 |     >>> (
 81 |         flu(range(100))
 82 |         .map(lambda x: x**2)
 83 |         .filter(lambda x: x % 3 == 0)
 84 |         .chunk(3)
 85 |         .take(2)
 86 |         .to_list()
 87 |     )
 88 |     [[0, 9, 36], [81, 144, 225]]
 89 |     """
 90 | 
 91 |     def __init__(self, iterable: Iterable[T]) -> None:
 92 |         iterator = iter(iterable)
 93 |         self._iterator: Iterator[T] = iterator
 94 | 
 95 |     @overload
 96 |     def __getitem__(self, index: int) -> T:
 97 |         pass
 98 | 
 99 |     @overload
100 |     def __getitem__(self, index: slice) -> "Fluent[T]":
101 |         pass
102 | 
103 |     def __getitem__(self, key: Union[int, slice]) -> Union[T, "Fluent[T]"]:
104 |         if isinstance(key, int) and key >= 0:
105 |             try:
106 |                 return next(islice(self._iterator, key, key + 1))
107 |             except StopIteration:
108 |                 raise IndexError("flu index out of range")
109 |         elif isinstance(key, slice):
110 |             return flu(islice(self._iterator, key.start, key.stop, key.step))
111 |         else:
112 |             raise TypeError(f"Indices must be non-negative integers or slices, not {type(key).__name__}")
113 | 
114 |     ### Summary ###
115 |     def collect(self, n: Optional[int] = None, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
116 |         """Collect items from iterable into a container
117 | 
118 |         >>> flu(range(4)).collect()
119 |         [0, 1, 2, 3]
120 | 
121 |         >>> flu(range(4)).collect(container_type=set)
122 |         {0, 1, 2, 3}
123 | 
124 |         >>> flu(range(4)).collect(n=2)
125 |         [0, 1]
126 |         """
127 |         return container_type(self.take(n))
128 | 
129 |     def to_list(self) -> List[T]:
130 |         """Collect items from iterable into a list
131 | 
132 |         >>> flu(range(4)).to_list()
133 |         [0, 1, 2, 3]
134 |         """
135 |         return list(self)
136 | 
137 |     def sum(self) -> Union[T, int]:
138 |         """Sum of elements in the iterable
139 | 
140 |         >>> flu([1,2,3]).sum()
141 |         6
142 | 
143 |         """
144 |         return sum(self)  # type: ignore
145 | 
146 |     def count(self) -> int:
147 |         """Count of elements in the iterable
148 | 
149 |         >>> flu(['a','b','c']).count()
150 |         3
151 |         """
152 |         return sum(1 for _ in self)
153 | 
154 |     def min(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT:
155 |         """Smallest element in the interable
156 | 
157 |         >>> flu([1, 3, 0, 2]).min()
158 |         0
159 |         """
160 |         return min(self)
161 | 
162 |     def max(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT:
163 |         """Largest element in the interable
164 | 
165 |         >>> flu([0, 3, 2, 1]).max()
166 |         3
167 |         """
168 |         return max(self)
169 | 
170 |     def first(self, default: Any = Empty()) -> T:
171 |         """Return the first item of the iterable. Raise IndexError if empty, or return default if provided.
172 | 
173 |         >>> flu([0, 1, 2, 3]).first()
174 |         0
175 |         >>> flu([]).first(default="some_default")
176 |         'some_default'
177 |         """
178 |         x: Union[Empty, T] = default
179 |         for x in self:
180 |             return x
181 |         if isinstance(x, Empty):
182 |             raise IndexError("Empty iterator")
183 |         return x
184 | 
185 |     def last(self, default: Any = Empty()) -> T:
186 |         """Return the last item of the iterble. Raise IndexError if empty or default if provided.
187 | 
188 |         >>> flu([0, 1, 2, 3]).last()
189 |         3
190 |         >>> flu([]).last(default='some_default')
191 |         'some_default'
192 |         """
193 |         x: Union[Empty, T] = default
194 |         for x in self:
195 |             pass
196 |         if isinstance(x, Empty):
197 |             raise IndexError("Empty iterator")
198 |         return x
199 | 
200 |     def head(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
201 |         """Returns up to the first *n* elements from the iterable.
202 | 
203 |         >>> flu(range(20)).head()
204 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
205 | 
206 |         >>> flu(range(15)).head(n=2)
207 |         [0, 1]
208 | 
209 |         >>> flu([]).head()
210 |         []
211 |         """
212 |         return self.take(n).collect(container_type=container_type)
213 | 
214 |     def tail(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]:
215 |         """Return up to the last *n* elements from the iterable
216 | 
217 |         >>> flu(range(20)).tail()
218 |         [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
219 | 
220 |         >>> flu(range(15)).tail(n=2)
221 |         [13, 14]
222 |         """
223 |         val: Union[List[Empty], Tuple[Any, ...]] = [Empty()]
224 |         for val in self.window(n, fill_value=Empty()):
225 |             pass
226 |         return container_type([x for x in val if not isinstance(x, Empty)])
227 | 
228 |     ### End Summary ###
229 | 
230 |     ### Non-Constant Memory ###
231 |     def sort(
232 |         self: "Fluent[SupportsLessThanT]",
233 |         key: Optional[Callable[[Any], Any]] = None,
234 |         reverse: bool = False,
235 |     ) -> "Fluent[SupportsLessThanT]":
236 |         """Sort iterable by *key* function if provided or identity otherwise
237 | 
238 |         Note: sorting loads the entire iterable into memory
239 | 
240 |         >>> flu([3,6,1]).sort().to_list()
241 |         [1, 3, 6]
242 | 
243 |         >>> flu([3,6,1]).sort(reverse=True).to_list()
244 |         [6, 3, 1]
245 | 
246 |         >>> flu([3,-6,1]).sort(key=abs).to_list()
247 |         [1, 3, -6]
248 |         """
249 |         return Fluent(sorted(self, key=key, reverse=reverse))
250 | 
251 |     def join_left(
252 |         self,
253 |         other: Iterable[_T1],
254 |         key: Callable[[T], Hashable] = identity,
255 |         other_key: Callable[[_T1], Hashable] = identity,
256 |     ) -> "Fluent[Tuple[T, Union[_T1, None]]]":
257 |         """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries
258 | 
259 |         When no matching entry is found in *other*, entries in the iterable are paired with None
260 | 
261 |         Note: join_left loads *other* into memory
262 | 
263 |         >>> flu(range(6)).join_left(range(0, 6, 2)).to_list()
264 |         [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]
265 |         """
266 | 
267 |         def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]:
268 | 
269 |             other_lookup = defaultdict(list)
270 | 
271 |             for entry_other in other:
272 |                 other_lookup[other_key(entry_other)].append(entry_other)
273 | 
274 |             for entry in self:
275 |                 matches: Optional[List[_T1]] = other_lookup.get(key(entry))
276 | 
277 |                 if matches:
278 |                     for match in matches:
279 |                         yield (entry, match)
280 |                 else:
281 |                     yield (entry, None)
282 | 
283 |         return Fluent(_impl())
284 | 
285 |     def join_inner(
286 |         self,
287 |         other: Iterable[_T1],
288 |         key: Callable[[T], Hashable] = identity,
289 |         other_key: Callable[[_T1], Hashable] = identity,
290 |     ) -> "Fluent[Tuple[T, _T1]]":
291 |         """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries
292 | 
293 |         When no matching entry is found in *other*, entries in the iterable are filtered from the results
294 | 
295 |         Note: join_inner loads *other* into memory
296 | 
297 |         >>> flu(range(6)).join_inner(range(0, 6, 2)).to_list()
298 |         [(0, 0), (2, 2), (4, 4)]
299 | 
300 |         """
301 | 
302 |         def _impl() -> Generator[Tuple[T, _T1], None, None]:
303 | 
304 |             other_lookup = defaultdict(list)
305 | 
306 |             for entry_other in other:
307 |                 other_lookup[other_key(entry_other)].append(entry_other)
308 | 
309 |             for entry in self:
310 |                 matches: List[_T1] = other_lookup[key(entry)]
311 | 
312 |                 for match in matches:
313 |                     yield (entry, match)
314 | 
315 |         return Fluent(_impl())
316 | 
317 |     def shuffle(self) -> "Fluent[T]":
318 |         """Randomize the order of elements in the interable
319 | 
320 |         Note: shuffle loads the entire iterable into memory
321 | 
322 |         >>> flu([3,6,1]).shuffle().to_list()
323 |         [6, 1, 3]
324 |         """
325 |         dat: List[T] = self.to_list()
326 |         return Fluent(sample(dat, len(dat)))
327 | 
328 |     def group_by(
329 |         self, key: Callable[[T], Union[T, _T1]] = identity, sort: bool = True
330 |     ) -> "Fluent[Tuple[Union[T,_T1], Fluent[T]]]":
331 |         """Yield consecutive keys and groups from the iterable
332 | 
333 |         *key* is a function to compute a key value used in grouping and sorting for each element. *key* defaults to an identity function which returns the unchaged element
334 | 
335 |         When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance
336 | 
337 |         >>> flu([2, 4, 2, 4]).group_by().to_list()
338 |         [(2, <flu object>), (4, <flu object>)]
339 | 
340 |         Or, if the iterable is pre-sorted
341 | 
342 |         >>> flu([2, 2, 5, 5]).group_by(sort=False).to_list()
343 |         [(2, <flu object>), (5, <flu object>)]
344 | 
345 |         Using a key function
346 | 
347 |         >>> points = [
348 |             {'x': 1, 'y': 0},
349 |             {'x': 4, 'y': 3},
350 |             {'x': 1, 'y': 5}
351 |         ]
352 |         >>> key_func = lambda u: u['x']
353 |         >>> flu(points).group_by(key=key_func, sort=True).to_list()
354 |         [(1, <flu object>), (4, <flu object>)]
355 |         """
356 | 
357 |         gen = self.sort(key) if sort else self
358 |         return Fluent(groupby(gen, key)).map(lambda x: (x[0], flu([y for y in x[1]])))
359 | 
360 |     def unique(self, key: Callable[[T], Hashable] = identity) -> "Fluent[T]":
361 |         """Yield elements that are unique by a *key*.
362 | 
363 |         >>> flu([2, 3, 2, 3]).unique().to_list()
364 |         [2, 3]
365 | 
366 |         >>> flu([2, -3, -2, 3]).unique(key=abs).to_list()
367 |         [2, -3]
368 |         """
369 | 
370 |         def _impl() -> Generator[T, None, None]:
371 |             seen: Set[Any] = set()
372 |             for x in self:
373 |                 x_hash = key(x)
374 |                 if x_hash in seen:
375 |                     continue
376 |                 else:
377 |                     seen.add(x_hash)
378 |                     yield x
379 | 
380 |         return Fluent(_impl())
381 | 
382 |     ### End Non-Constant Memory ###
383 | 
384 |     ### Side Effect ###
385 |     def rate_limit(self, per_second: Union[int, float] = 100) -> "Fluent[T]":
386 |         """Restrict consumption of iterable to n item  *per_second*
387 | 
388 |         >>> import time
389 |         >>> start_time = time.time()
390 |         >>> _ = flu(range(3)).rate_limit(3).to_list()
391 |         >>> print('Runtime', int(time.time() - start_time))
392 |         1.00126 # approximately 1 second for 3 items
393 |         """
394 | 
395 |         def _impl() -> Generator[T, None, None]:
396 |             wait_time = 1.0 / per_second
397 |             for val in self:
398 |                 start_time = time.time()
399 |                 yield val
400 |                 call_duration = time.time() - start_time
401 |                 time.sleep(max(wait_time - call_duration, 0.0))
402 | 
403 |         return Fluent(_impl())
404 | 
405 |     def side_effect(
406 |         self,
407 |         func: Callable[[T], Any],
408 |         before: Optional[Callable[[], Any]] = None,
409 |         after: Optional[Callable[[], Any]] = None,
410 |     ) -> "Fluent[T]":
411 |         """Invoke *func* for each item in the iterable before yielding the item.
412 |         *func* takes a single argument and the output is discarded
413 |         *before* and *after* are optional functions that take no parameters and are executed once before iteration begins
414 |         and after iteration ends respectively. Each will be called exactly once.
415 | 
416 | 
417 |         >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list()
418 |         Collected 0
419 |         Collected 1
420 |         [0, 1]
421 |         """
422 | 
423 |         def _impl() -> Generator[T, None, None]:
424 |             try:
425 |                 if before is not None:
426 |                     before()
427 | 
428 |                 for x in self:
429 |                     func(x)
430 |                     yield x
431 | 
432 |             finally:
433 |                 if after is not None:
434 |                     after()
435 | 
436 |         return Fluent(_impl())
437 | 
438 |     ### End Side Effect ###
439 | 
440 |     def map(self, func: Callable[Concatenate[T, P], _T1], *args: Any, **kwargs: Any) -> "Fluent[_T1]":
441 |         """Apply *func* to each element of iterable
442 | 
443 |         >>> flu(range(5)).map(lambda x: x*x).to_list()
444 |         [0, 1, 4, 9, 16]
445 |         """
446 | 
447 |         def _impl() -> Generator[_T1, None, None]:
448 |             for val in self._iterator:
449 |                 yield func(val, *args, **kwargs)
450 | 
451 |         return Fluent(_impl())
452 | 
453 |     def map_item(self: "Fluent[SupportsGetItem[T]]", item: Hashable) -> "Fluent[T]":
454 |         """Extracts *item* from every element of the iterable
455 | 
456 |         >>> flu([(2, 4), (2, 5)]).map_item(1).to_list()
457 |         [4, 5]
458 | 
459 |         >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list()
460 |         [8, 5]
461 |         """
462 | 
463 |         def _impl() -> Generator[T, None, None]:
464 |             for x in self:
465 |                 yield x[item]
466 | 
467 |         return Fluent(_impl())
468 | 
469 |     def map_attr(self, attr: str) -> "Fluent[Any]":
470 |         """Extracts the attribute *attr* from each element of the iterable
471 | 
472 |         >>> from collections import namedtuple
473 |         >>> MyTup = namedtuple('MyTup', ['value', 'backup_val'])
474 |         >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list()
475 |         [1, 2]
476 |         """
477 |         return self.map(lambda x: getattr(x, attr))
478 | 
479 |     def filter(self, func: Callable[Concatenate[T, P], bool], *args: Any, **kwargs: Any) -> "Fluent[T]":
480 |         """Yield elements of iterable where *func* returns truthy
481 | 
482 |         >>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list()
483 |         [0, 2, 4, 6, 8]
484 |         """
485 | 
486 |         def _impl() -> Generator[T, None, None]:
487 |             for val in self._iterator:
488 |                 if func(val, *args, **kwargs):
489 |                     yield val
490 | 
491 |         return Fluent(_impl())
492 | 
493 |     def reduce(self, func: Callable[[T, T], T]) -> T:
494 |         """Apply a function of two arguments cumulatively to the items of the iterable,
495 |         from left to right, so as to reduce the sequence to a single value
496 | 
497 |         >>> flu(range(5)).reduce(lambda x, y: x + y)
498 |         10
499 |         """
500 |         return reduce(func, self)
501 | 
502 |     def fold_left(self, func: Callable[[S, T], S], initial: S) -> S:
503 |         """Apply a function of two arguments cumulatively to the items of the iterable,
504 |         from left to right, starting with *initial*, so as to fold the sequence to
505 |         a single value
506 | 
507 |         >>> flu(range(5)).fold_left(lambda x, y: x + str(y), "")
508 |         '01234'
509 |         """
510 |         return reduce(func, self, initial)
511 | 
512 |     @overload
513 |     def zip(self, __iter1: Iterable[_T1]) -> "Fluent[Tuple[T, _T1]]": ...
514 | 
515 |     @overload
516 |     def zip(self, __iter1: Iterable[_T1], __iter2: Iterable[_T2]) -> "Fluent[Tuple[T, _T1, _T2]]": ...
517 | 
518 |     @overload
519 |     def zip(
520 |         self, __iter1: Iterable[_T1], __iter2: Iterable[_T2], __iter3: Iterable[_T3]
521 |     ) -> "Fluent[Tuple[T, _T1, _T2, _T3]]": ...
522 | 
523 |     @overload
524 |     def zip(
525 |         self,
526 |         __iter1: Iterable[Any],
527 |         __iter2: Iterable[Any],
528 |         __iter3: Iterable[Any],
529 |         __iter4: Iterable[Any],
530 |         *iterable: Iterable[Any],
531 |     ) -> "Fluent[Tuple[T, ...]]": ...
532 | 
533 |     def zip(self, *iterable: Iterable[Any]) -> Union[
534 |         "Fluent[Tuple[T, ...]]",
535 |         "Fluent[Tuple[T, _T1]]",
536 |         "Fluent[Tuple[T, _T1, _T2]]",
537 |         "Fluent[Tuple[T, _T1, _T2, _T3]]",
538 |     ]:
539 |         """Yields tuples containing the i-th element from the i-th
540 |         argument in the instance, and the iterable
541 | 
542 |         >>> flu(range(5)).zip(range(3, 0, -1)).to_list()
543 |         [(0, 3), (1, 2), (2, 1)]
544 |         """
545 |         # @self_to_flu is not compatible with @overload
546 |         # make sure any usage of self supports arbitrary iterables
547 |         tup_iter = zip(iter(self), *iterable)
548 |         return Fluent(tup_iter)
549 | 
550 |     def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> "Fluent[Tuple[T, ...]]":
551 |         """Yields tuples containing the i-th element from the i-th
552 |         argument in the instance, and the iterable
553 |         Iteration continues until the longest iterable is exhaused.
554 |         If iterables are uneven in length, missing values are filled in with fill value
555 | 
556 |         >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list()
557 |         [(0, 3), (1, 2), (2, 1), (3, None), (4, None)]
558 | 
559 | 
560 |         >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list()
561 |         [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')]
562 |         """
563 |         return Fluent(zip_longest(self, *iterable, fillvalue=fill_value))
564 | 
565 |     def enumerate(self, start: int = 0) -> "Fluent[Tuple[int, T]]":
566 |         """Yields tuples from the instance where the first element
567 |         is a count from initial value *start*.
568 | 
569 |         >>> flu([3,4,5]).enumerate().to_list()
570 |         [(0, 3), (1, 4), (2, 5)]
571 |         """
572 |         return Fluent(enumerate(self, start=start))
573 | 
574 |     def take(self, n: Optional[int] = None) -> "Fluent[T]":
575 |         """Yield first *n* items of the iterable
576 | 
577 |         >>> flu(range(10)).take(2).to_list()
578 |         [0, 1]
579 |         """
580 |         return Fluent(islice(self._iterator, n))
581 | 
582 |     def take_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]":
583 |         """Yield elements from the chainable so long as the predicate is true
584 | 
585 |         >>> flu(range(10)).take_while(lambda x: x < 3).to_list()
586 |         [0, 1, 2]
587 |         """
588 |         return Fluent(takewhile(predicate, self._iterator))
589 | 
590 |     def drop_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]":
591 |         """Drop elements from the chainable as long as the predicate is true;
592 |         afterwards, return every element
593 | 
594 |         >>> flu(range(10)).drop_while(lambda x: x < 3).to_list()
595 |         [3, 4, 5, 6, 7, 8, 9]
596 |         """
597 |         return Fluent(dropwhile(predicate, self._iterator))
598 | 
599 |     def chunk(self, n: int) -> "Fluent[List[T]]":
600 |         """Yield lists of elements from iterable in groups of *n*
601 | 
602 |         if the iterable is not evenly divisiible by *n*, the final list will be shorter
603 | 
604 |         >>> flu(range(10)).chunk(3).to_list()
605 |         [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
606 |         """
607 | 
608 |         def _impl() -> Generator[List[T], None, None]:
609 | 
610 |             while True:
611 |                 vals: List[T] = list(self.take(n))
612 |                 if vals:
613 |                     yield vals
614 |                 else:
615 |                     return
616 | 
617 |         return Fluent(_impl())
618 | 
619 |     def flatten(
620 |         self,
621 |         depth: int = 1,
622 |         base_type: Optional[Type[object]] = None,
623 |         iterate_strings: bool = False,
624 |     ) -> "Fluent[Any]":
625 |         """Recursively flatten nested iterables (e.g., a list of lists of tuples)
626 |         into non-iterable type or an optional user-defined base_type
627 | 
628 |         Strings are treated as non-iterable for convenience. set iterate_string=True
629 |         to change that behavior.
630 | 
631 |         >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list()
632 |         [0, 1, 2, 3, 4, 5]
633 | 
634 |         >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list()
635 |         [0, [1, 2], [3, 4], 5]
636 | 
637 |         >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()
638 |         [0, 1, 2, 3, 4, 5]
639 | 
640 |         >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list()
641 |         [0, 1, 2, 3, 4, 5]
642 | 
643 |         >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list()
644 |         [1, (2, 2), 4, 5, (6, 6, 6)]
645 | 
646 |         >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list()
647 |         [2, 0, 'a', 'b', 'c', 3, 4]
648 |         """
649 | 
650 |         # TODO(OR): Reimplement with strong types
651 |         def walk(node: Any, level: int) -> Generator[T, None, None]:
652 |             if (
653 |                 ((depth is not None) and (level > depth))
654 |                 or (isinstance(node, str) and not iterate_strings)
655 |                 or ((base_type is not None) and isinstance(node, base_type))
656 |             ):
657 |                 yield node
658 |                 return
659 |             try:
660 |                 tree = iter(node)
661 |             except TypeError:
662 |                 yield node
663 |                 return
664 |             else:
665 |                 for child in tree:
666 |                     for val in walk(child, level + 1):
667 |                         yield val
668 | 
669 |         return Fluent(walk(self, level=0))
670 | 
671 |     def denormalize(self: "Fluent[SupportsIteration[Any]]", iterate_strings: bool = False) -> "Fluent[Tuple[Any, ...]]":
672 |         """Denormalize iterable components of each record
673 | 
674 |         >>> flu([("abc", [1, 2, 3])]).denormalize().to_list()
675 |         [('abc', 1), ('abc', 2), ('abc', 3)]
676 | 
677 |         >>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).to_list()
678 |         [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)]
679 | 
680 |         >>> flu([("abc", [])]).denormalize().to_list()
681 |         []
682 |         """
683 | 
684 |         def _impl() -> Generator[Tuple[Any, ...], None, None]:
685 |             for record in self:
686 |                 iter_elements: List[Iterable[Any]] = []
687 |                 element: Any
688 |                 for element in record:
689 | 
690 |                     # Check for string and string iteration is allowed
691 |                     if isinstance(element, str) and iterate_strings:
692 |                         iter_elements.append(element)
693 | 
694 |                     # Check for string and string iteration is not allowed
695 |                     elif isinstance(element, str):
696 |                         iter_elements.append([element])
697 | 
698 |                     # Check for iterable
699 |                     elif isinstance(element, IterableType):
700 |                         iter_elements.append(element)
701 | 
702 |                     # Check for non-iterable
703 |                     else:
704 |                         iter_elements.append([element])
705 | 
706 |                 for row in product(*iter_elements):
707 |                     yield row
708 | 
709 |         return Fluent(_impl())
710 | 
711 |     def window(self, n: int, step: int = 1, fill_value: Any = None) -> "Fluent[Tuple[Any, ...]]":
712 |         """Yield a sliding window of width *n* over the given iterable.
713 | 
714 |         Each window will advance in increments of *step*:
715 | 
716 |         If the length of the iterable does not evenly divide by the *step*
717 |         the final output is padded with *fill_value*
718 | 
719 |         >>> flu(range(5)).window(3).to_list()
720 |         [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
721 | 
722 |         >>> flu(range(5)).window(n=3, step=2).to_list()
723 |         [(0, 1, 2), (2, 3, 4)]
724 | 
725 |         >>> flu(range(9)).window(n=4, step=3).to_list()
726 |         [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)]
727 | 
728 |         >>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list()
729 |         [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)]
730 |         """
731 | 
732 |         def _impl() -> Generator[Tuple[Any, ...], None, None]:
733 |             if n < 0:
734 |                 raise ValueError("n must be >= 0")
735 |             elif n == 0:
736 |                 yield tuple()
737 |                 return
738 |             if step < 1:
739 |                 raise ValueError("step must be >= 1")
740 | 
741 |             window: Deque[Any] = deque([], n)
742 |             append = window.append
743 | 
744 |             # Initial deque fill
745 |             for _ in range(n):
746 |                 append(next(self, fill_value))
747 |             yield tuple(window)
748 | 
749 |             # Appending new items to the right causes old items to fall off the left
750 |             i = 0
751 |             for item in self:
752 |                 append(item)
753 |                 i = (i + 1) % step
754 |                 if i % step == 0:
755 |                     yield tuple(window)
756 | 
757 |             # If there are items from the iterable in the window, pad with the given
758 |             # value and emit them.
759 |             if (i % step) and (step - i < n):
760 |                 for _ in range(step - i):
761 |                     append(fill_value)
762 |                 yield tuple(window)
763 | 
764 |         return Fluent(_impl())
765 | 
766 |     def __iter__(self) -> "Fluent[T]":
767 |         return self
768 | 
769 |     def __next__(self) -> T:
770 |         return next(self._iterator)
771 | 
772 |     def tee(self, n: int = 2) -> "Fluent[Fluent[T]]":
773 |         """Return n independent iterators from a single iterable
774 | 
775 |         once tee() has made a split, the original iterable should not be used
776 |         anywhere else; otherwise, the iterable could get advanced without the
777 |         tee objects being informed
778 | 
779 |         >>> copy1, copy2 = flu(range(5)).tee()
780 |         >>> copy1.sum()
781 |         10
782 |         >>> copy2.to_list()
783 |         [0, 1, 2, 3, 4]
784 |         """
785 |         return Fluent((Fluent(x) for x in tee(self, n)))
786 | 
787 | 
788 | class flu(Fluent[T]):
789 |     """A fluent interface to lazy generator functions
790 | 
791 |     >>> from flupy import flu
792 |     >>> (
793 |             flu(range(100))
794 |             .map(lambda x: x**2)
795 |             .filter(lambda x: x % 3 == 0)
796 |             .chunk(3)
797 |             .take(2)
798 |             .to_list()
799 |         )
800 |     [[0, 9, 36], [81, 144, 225]]
801 |     """
802 | 


--------------------------------------------------------------------------------
/src/flupy/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/olirice/flupy/0205b524c7a9da547bfe0922a02285eec0ca6925/src/flupy/py.typed


--------------------------------------------------------------------------------
/src/tests/test_cli.py:
--------------------------------------------------------------------------------
  1 | from tempfile import NamedTemporaryFile
  2 | 
  3 | import pytest
  4 | 
  5 | from flupy.cli.cli import build_import_dict, main, parse_args
  6 | 
  7 | 
  8 | def test_parse_args():
  9 |     with pytest.raises(SystemExit) as cm:
 10 |         parse_args([])
 11 |         assert cm.exception.code == 2
 12 | 
 13 |     args = parse_args(["_"])
 14 |     assert args.command == "_"
 15 | 
 16 |     args = parse_args(["_", "-i", "os:environ:env"])
 17 |     assert "os:environ:env" in getattr(args, "import")
 18 |     assert args.command == "_"
 19 | 
 20 |     import_dict = build_import_dict(["json"])
 21 |     assert "json" in import_dict
 22 | 
 23 | 
 24 | def test_build_import_dict():
 25 |     import json
 26 | 
 27 |     import_dict = build_import_dict(["json"])
 28 |     assert "json" in import_dict
 29 |     assert import_dict["json"] == json
 30 | 
 31 |     import_dict = build_import_dict(["json:dumps"])
 32 |     assert "dumps" in import_dict
 33 |     assert import_dict["dumps"] == json.dumps
 34 | 
 35 |     import_dict = build_import_dict(["json:dumps:ds"])
 36 |     assert "ds" in import_dict
 37 |     assert import_dict["ds"] == json.dumps
 38 | 
 39 |     import_dict = build_import_dict(["json::j"])
 40 |     assert "j" in import_dict
 41 |     assert import_dict["j"] == json
 42 | 
 43 | 
 44 | def test_show_help(capsys):
 45 |     with pytest.raises(SystemExit):
 46 |         main(["flu", "-h"])
 47 | 
 48 |     result = capsys.readouterr()
 49 |     stdout = result.out
 50 |     assert stdout.startswith("usage")
 51 | 
 52 | 
 53 | def test_show_version(capsys):
 54 |     main(["flu", "flu(range(5)).collect()"])
 55 | 
 56 |     result = capsys.readouterr()
 57 |     stdout = result.out.replace("\n", "")
 58 |     assert stdout.startswith("0")
 59 | 
 60 | 
 61 | def test_basic_pipeline(capsys):
 62 |     main(["flu", "flu(range(5)).collect()"])
 63 |     result = capsys.readouterr()
 64 |     stdout = result.out.replace("\n", "")
 65 |     assert stdout.startswith("0")
 66 | 
 67 | 
 68 | def test_pass_on_none_pipeline(capsys):
 69 |     main(["flu", "None"])
 70 |     result = capsys.readouterr()
 71 |     stdout = result.out
 72 |     assert stdout == ""
 73 | 
 74 | 
 75 | def test_non_iterable_non_none_pipeline(capsys):
 76 |     main(["flu", '"hello_world"'])
 77 |     result = capsys.readouterr()
 78 |     stdout = result.out.strip("\n")
 79 |     assert stdout == "hello_world"
 80 | 
 81 | 
 82 | def test_cli_walk_files(capsys):
 83 |     main(["flu", "walk_files().head(2)"])
 84 |     result = capsys.readouterr()
 85 |     stdout = result.out.strip("\n").split("\n")
 86 |     assert len(stdout) == 2
 87 | 
 88 | 
 89 | def test_cli_walk_dirs(capsys):
 90 |     main(["flu", "walk_dirs().head(2)"])
 91 |     result = capsys.readouterr()
 92 |     stdout = result.out.strip("\n").split("\n")
 93 |     assert len(stdout) == 2
 94 | 
 95 | 
 96 | def test_from_file(capsys):
 97 |     with NamedTemporaryFile("w+") as f:
 98 |         f.write("hello")
 99 |         f.read()
100 |         f_name = f.name
101 |         main(["flu", "-f", f_name, "_.map(str.upper)"])
102 |     result = capsys.readouterr()
103 |     stdout = result.out.strip("\n")
104 |     assert stdout == "HELLO"
105 | 
106 | 
107 | def test_glob_imports(capsys):
108 |     main(["flu", "flu(env).count()", "-i", "os:environ:env"])
109 |     result = capsys.readouterr()
110 |     stdout = result.out
111 |     assert stdout
112 | 


--------------------------------------------------------------------------------
/src/tests/test_cli_utils.py:
--------------------------------------------------------------------------------
 1 | from flupy.cli.utils import walk_dirs, walk_files
 2 | 
 3 | 
 4 | def test_walk_files():
 5 |     assert walk_files().head()
 6 |     assert walk_files(abspath=False).head()
 7 | 
 8 | 
 9 | def test_walk_dirs():
10 |     assert walk_dirs().head()
11 | 


--------------------------------------------------------------------------------
/src/tests/test_flu.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from itertools import count, cycle
  3 | 
  4 | import pytest
  5 | 
  6 | from flupy import flu
  7 | 
  8 | 
  9 | def test_collect():
 10 |     assert flu(range(3)).collect() == [0, 1, 2]
 11 |     assert flu(range(3)).collect(container_type=tuple) == (0, 1, 2)
 12 |     assert flu(range(3)).collect(n=2) == [0, 1]
 13 | 
 14 | 
 15 | def test_to_list():
 16 |     assert flu(range(3)).to_list() == [0, 1, 2]
 17 | 
 18 | 
 19 | def test___getitem__():
 20 |     assert flu(range(3))[1] == 1
 21 |     assert flu(range(3))[1:].collect() == [1, 2]
 22 |     assert flu(range(35))[1:2].collect() == [1]
 23 |     assert flu(range(35))[1:3].collect() == [1, 2]
 24 |     with pytest.raises(IndexError):
 25 |         flu([1])[4]
 26 |     with pytest.raises((KeyError, TypeError)):
 27 |         flu([1])["not an index"]
 28 | 
 29 | 
 30 | def test_sum():
 31 |     gen = flu(range(3))
 32 |     assert gen.sum() == 3
 33 | 
 34 | 
 35 | def test_reduce():
 36 |     gen = flu(range(5))
 37 |     assert gen.reduce(lambda x, y: x + y) == 10
 38 | 
 39 | 
 40 | def test_fold_left():
 41 |     assert flu(range(5)).fold_left(lambda x, y: x + y, 0) == 10
 42 |     assert flu(range(5)).fold_left(lambda x, y: x + str(y), "") == "01234"
 43 | 
 44 | 
 45 | def test_count():
 46 |     gen = flu(range(3))
 47 |     assert gen.count() == 3
 48 | 
 49 | 
 50 | def test_min():
 51 |     gen = flu(range(3))
 52 |     assert gen.min() == 0
 53 | 
 54 | 
 55 | def test_first():
 56 |     gen = flu(range(3))
 57 |     assert gen.first() == 0
 58 |     gen = flu([])
 59 |     with pytest.raises(IndexError):
 60 |         gen.first()
 61 |     gen = flu([])
 62 |     assert gen.first(default=1) == 1
 63 | 
 64 | 
 65 | def test_last():
 66 |     gen = flu(range(3))
 67 |     assert gen.last() == 2
 68 |     gen = flu([])
 69 |     with pytest.raises(IndexError):
 70 |         gen.last()
 71 |     gen = flu([])
 72 |     assert gen.last(default=1) == 1
 73 | 
 74 | 
 75 | def test_head():
 76 |     gen = flu(range(30))
 77 |     assert gen.head(n=2) == [0, 1]
 78 |     gen = flu(range(30))
 79 |     assert gen.head(n=3, container_type=set) == set([0, 1, 2])
 80 |     gen = flu(range(3))
 81 |     assert gen.head(n=50) == [0, 1, 2]
 82 | 
 83 | 
 84 | def test_tail():
 85 |     gen = flu(range(30))
 86 |     assert gen.tail(n=2) == [28, 29]
 87 |     gen = flu(range(30))
 88 |     assert gen.tail(n=3, container_type=set) == set([27, 28, 29])
 89 |     gen = flu(range(3))
 90 |     assert gen.tail(n=50) == [0, 1, 2]
 91 | 
 92 | 
 93 | def test_max():
 94 |     gen = flu(range(3))
 95 |     assert gen.max() == 2
 96 | 
 97 | 
 98 | def test_unique():
 99 |     class NoHash:
100 |         def __init__(self, letter, keyf):
101 |             self.letter = letter
102 |             self.keyf = keyf
103 | 
104 |     a = NoHash("a", 1)
105 |     b = NoHash("b", 1)
106 |     c = NoHash("c", 2)
107 | 
108 |     gen = flu([a, b, c]).unique()
109 |     assert gen.collect() == [a, b, c]
110 |     gen = flu([a, b, c]).unique(lambda x: x.letter)
111 |     assert gen.collect() == [a, b, c]
112 |     gen = flu([a, b, c]).unique(lambda x: x.keyf)
113 |     assert gen.collect() == [a, c]
114 | 
115 | 
116 | def test_side_effect():
117 |     class FakeFile:
118 |         def __init__(self):
119 |             self.is_open = False
120 |             self.content = []
121 | 
122 |         def write(self, text):
123 |             if self.is_open:
124 |                 self.content.append(text)
125 |             else:
126 |                 raise IOError("fake file is not open for writing")
127 | 
128 |         def open(self):
129 |             self.is_open = True
130 | 
131 |         def close(self):
132 |             self.is_open = False
133 | 
134 |     # Test the fake file
135 |     ffile = FakeFile()
136 |     ffile.open()
137 |     ffile.write("should be there")
138 |     ffile.close()
139 |     assert ffile.content[0] == "should be there"
140 |     with pytest.raises(IOError):
141 |         ffile.write("should fail")
142 | 
143 |     # Reset fake file
144 |     ffile = FakeFile()
145 | 
146 |     with pytest.raises(IOError):
147 |         flu(range(5)).side_effect(ffile.write).collect()
148 | 
149 |     gen_result = flu(range(5)).side_effect(ffile.write, before=ffile.open, after=ffile.close).collect()
150 |     assert ffile.is_open == False
151 |     assert ffile.content == [0, 1, 2, 3, 4]
152 |     assert gen_result == [0, 1, 2, 3, 4]
153 | 
154 | 
155 | def test_sort():
156 |     gen = flu(range(3, 0, -1)).sort()
157 |     assert gen.collect() == [1, 2, 3]
158 | 
159 | 
160 | def test_shuffle():
161 |     original_order = list(range(10000))
162 |     new_order = flu(original_order).shuffle().collect()
163 |     assert new_order != original_order
164 |     assert len(new_order) == len(original_order)
165 |     assert sum(new_order) == sum(original_order)
166 | 
167 | 
168 | def test_map():
169 |     gen = flu(range(3)).map(lambda x: x + 2)
170 |     assert gen.collect() == [2, 3, 4]
171 | 
172 | 
173 | def test_rate_limit():
174 |     resA = flu(range(3)).collect()
175 |     resB = flu(range(3)).rate_limit(5000).collect()
176 |     assert resA == resB
177 | 
178 | 
179 | def test_map_item():
180 |     gen = flu(range(3)).map(lambda x: {"a": x}).map_item("a")
181 |     assert gen.collect() == [0, 1, 2]
182 | 
183 | 
184 | def test_map_attr():
185 |     class Person:
186 |         def __init__(self, age: int) -> None:
187 |             self.age = age
188 | 
189 |     gen = flu(range(3)).map(lambda x: Person(x)).map_attr("age")
190 |     assert gen.collect() == [0, 1, 2]
191 | 
192 | 
193 | def test_filter():
194 |     gen = flu(range(3)).filter(lambda x: 0 < x < 2)
195 |     assert gen.collect() == [1]
196 | 
197 | 
198 | def test_take():
199 |     gen = flu(range(10)).take(5)
200 |     assert gen.collect() == [0, 1, 2, 3, 4]
201 | 
202 | 
203 | def test_take_while():
204 |     gen = flu(cycle(range(10))).take_while(lambda x: x < 4)
205 |     assert gen.collect() == [0, 1, 2, 3]
206 | 
207 | 
208 | def test_drop_while():
209 |     gen = flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4)
210 |     assert gen.collect() == [4, 3, 2, 1]
211 | 
212 | 
213 | def test_group_by():
214 |     gen = flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0])
215 |     g1, g2, g3 = gen.map(lambda x: (x[0], x[1].collect())).collect()
216 |     # Standard usage
217 |     assert g1 == (1, [(1, 0), (1, 1), (1, 2)])
218 |     assert g2 == (2, [(2, 3), (2, 4), (2, 5), (2, 6)])
219 |     assert g3 == (3, [(3, 7)])
220 |     # No param usage
221 |     v1 = flu(range(10)).group_by().map(lambda x: (x[0], list(x[1])))
222 |     v2 = flu(range(10)).map(lambda x: (x, [x]))
223 |     assert v1.collect() == v2.collect()
224 |     # Sort
225 |     gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=False)
226 |     assert gen.count() == 4
227 |     gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=True)
228 |     assert gen.count() == 2
229 | 
230 |     # Identity Function
231 |     points = [{"x": 1, "y": 0}, {"x": 4, "y": 3}, {"x": 1, "y": 5}]
232 |     key_func = lambda u: u["x"]
233 |     gen = flu(points).group_by(key=key_func, sort=True).collect()
234 |     assert len(gen) == 2
235 |     assert gen[0][0] == 1
236 |     assert gen[1][0] == 4
237 |     assert len(gen[0][1].collect()) == 2
238 |     assert len(gen[1][1].collect()) == 1
239 | 
240 | 
241 | def test_chunk():
242 |     gen = flu(range(5)).chunk(2)
243 |     assert gen.collect() == [[0, 1], [2, 3], [4]]
244 | 
245 | 
246 | def test_next():
247 |     gen = flu(range(5))
248 |     assert next(gen) == 0
249 | 
250 | 
251 | def test_iter():
252 |     gen = flu(range(5))
253 |     assert next(iter(gen)) == 0
254 | 
255 | 
256 | def test_enumerate():
257 |     # Check default
258 |     gen = flu(range(3)).enumerate()
259 |     assert gen.collect() == [(0, 0), (1, 1), (2, 2)]
260 | 
261 |     # Check start param
262 |     gen = flu(range(3)).enumerate(start=1)
263 |     assert gen.collect() == [(1, 0), (2, 1), (3, 2)]
264 | 
265 | 
266 | def test_zip():
267 |     gen = flu(range(3)).zip(range(3))
268 |     assert gen.collect() == [(0, 0), (1, 1), (2, 2)]
269 | 
270 |     gen2 = flu(range(3)).zip(range(3), range(2))
271 |     assert gen2.collect() == [(0, 0, 0), (1, 1, 1)]
272 | 
273 | 
274 | def test_zip_longest():
275 |     gen = flu(range(3)).zip_longest(range(5))
276 |     assert gen.collect() == [(0, 0), (1, 1), (2, 2), (None, 3), (None, 4)]
277 |     gen = flu(range(3)).zip_longest(range(5), fill_value="a")
278 |     assert gen.collect() == [(0, 0), (1, 1), (2, 2), ("a", 3), ("a", 4)]
279 |     gen = flu(range(3)).zip_longest(range(5), range(4), fill_value="a")
280 |     assert gen.collect() == [(0, 0, 0), (1, 1, 1), (2, 2, 2), ("a", 3, 3), ("a", 4, "a")]
281 | 
282 | 
283 | def test_window():
284 |     # Check default
285 |     gen = flu(range(5)).window(n=3)
286 |     assert gen.collect() == [(0, 1, 2), (1, 2, 3), (2, 3, 4)]
287 | 
288 |     # Check step param
289 |     gen = flu(range(5)).window(n=3, step=3)
290 |     assert gen.collect() == [(0, 1, 2), (3, 4, None)]
291 | 
292 |     # Check fill_value param
293 |     gen = flu(range(5)).window(n=3, step=3, fill_value="i")
294 |     assert gen.collect() == [(0, 1, 2), (3, 4, "i")]
295 | 
296 |     assert flu(range(4)).window(n=0).collect() == [tuple()]
297 | 
298 |     with pytest.raises(ValueError):
299 |         flu(range(5)).window(n=-1).collect()
300 | 
301 |     with pytest.raises(ValueError):
302 |         flu(range(5)).window(3, step=0).collect()
303 | 
304 | 
305 | def test_flu():
306 |     gen = flu(count()).map(lambda x: x**2).filter(lambda x: x % 517 == 0).chunk(5).take(3)
307 |     assert next(gen) == [0, 267289, 1069156, 2405601, 4276624]
308 | 
309 | 
310 | def test_flatten():
311 |     nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)]
312 | 
313 |     # Defaults with depth of 1
314 |     gen = flu(nested).flatten()
315 |     assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", 7]
316 | 
317 |     # Depth 2
318 |     gen = flu(nested).flatten(depth=2)
319 |     assert [x for x in gen] == [1, 2, 3, [4], "rbsd", "abc", 7]
320 | 
321 |     # Depth 3
322 |     gen = flu(nested).flatten(depth=3)
323 |     assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7]
324 | 
325 |     # Depth infinite
326 |     gen = flu(nested).flatten(depth=sys.maxsize)
327 |     assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7]
328 | 
329 |     # Depth 2 with tuple base_type
330 |     gen = flu(nested).flatten(depth=2, base_type=tuple)
331 |     assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", (7,)]
332 | 
333 |     # Depth 2 with iterate strings
334 |     gen = flu(nested).flatten(depth=2, base_type=tuple, iterate_strings=True)
335 |     assert [x for x in gen] == [1, 2, (3, [4]), "r", "b", "s", "d", "a", "b", "c", (7,)]
336 | 
337 | 
338 | def test_denormalize():
339 |     content = [
340 |         ["abc", [1, 2, 3]],
341 |     ]
342 |     assert flu(content).denormalize().collect() == [("abc", 1), ("abc", 2), ("abc", 3)]
343 |     assert (flu(content).denormalize(iterate_strings=True).collect()) == [
344 |         ("a", 1),
345 |         ("a", 2),
346 |         ("a", 3),
347 |         ("b", 1),
348 |         ("b", 2),
349 |         ("b", 3),
350 |         ("c", 1),
351 |         ("c", 2),
352 |         ("c", 3),
353 |     ]
354 | 
355 |     assert (flu([[[1], [1, 2], None]]).denormalize().collect()) == [
356 |         (1, 1, None),
357 |         (1, 2, None),
358 |     ]
359 | 
360 |     assert (flu([[[1], [1, 2], []]]).denormalize().collect()) == []
361 | 
362 | 
363 | def test_tee():
364 |     # Default unpacking
365 |     gen1, gen2 = flu(range(100)).tee()
366 |     assert gen1.sum() == gen2.sum()
367 | 
368 |     # adjusting *n* paramter
369 |     gen1, gen2, gen3 = flu(range(100)).tee(3)
370 |     assert gen1.sum() == gen3.sum()
371 | 
372 |     # No sync progress
373 |     gen1, gen2 = flu(range(100)).tee()
374 |     assert next(gen1) == next(gen2)
375 | 
376 |     # No break chaining
377 |     assert flu(range(5)).tee().map(sum).sum() == 20
378 | 
379 | 
380 | def test_join_left():
381 |     # Default unpacking
382 |     res = flu(range(6)).join_left(range(0, 6, 2)).collect()
383 |     assert res == [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)]
384 | 
385 | 
386 | def test_join_inner():
387 |     # Default unpacking
388 |     res = flu(range(6)).join_inner(range(0, 6, 2)).collect()
389 |     assert res == [(0, 0), (2, 2), (4, 4)]
390 | 


--------------------------------------------------------------------------------
/src/tests/test_version.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Tests for version information.
 3 | """
 4 | 
 5 | import re
 6 | 
 7 | import flupy
 8 | 
 9 | 
10 | def test_version_format():
11 |     """Test that __version__ follows semantic versioning format (MAJOR.MINOR.PATCH)."""
12 |     # Standard semver regex pattern
13 |     semver_pattern = r"^(?P<major>0|[1-9]\d*)\.(?P<minor>0|[1-9]\d*)\.(?P<patch>0|[1-9]\d*)(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
14 | 
15 |     assert re.match(
16 |         semver_pattern, flupy.__version__
17 |     ), f"Version '{flupy.__version__}' does not match semantic versioning format"
18 | 
19 |     # Ensure version parts can be parsed as integers
20 |     major, minor, patch = flupy.__version__.split("-")[0].split("+")[0].split(".")[:3]
21 |     assert major.isdigit(), f"Major version '{major}' is not a valid integer"
22 |     assert minor.isdigit(), f"Minor version '{minor}' is not a valid integer"
23 |     assert patch.isdigit(), f"Patch version '{patch}' is not a valid integer"
24 | 


--------------------------------------------------------------------------------