├── .coveragerc ├── .github └── workflows │ ├── pre-commit_hooks.yaml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yml ├── .version ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── benchmark └── test_benchmark.py ├── docs ├── Makefile ├── api.rst ├── cli.rst ├── conf.py ├── index.rst ├── influances.rst ├── license.rst ├── versions.rst └── welcome.rst ├── mypy.ini ├── pyproject.toml ├── pytest.ini ├── setup.cfg └── src ├── flupy ├── __init__.py ├── cli │ ├── __init__.py │ ├── cli.py │ └── utils.py ├── fluent.py └── py.typed └── tests ├── test_cli.py ├── test_cli_utils.py ├── test_flu.py └── test_version.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | if TYPE_CHECKING: 5 | raise AssertionError 6 | raise NotImplementedError 7 | @overload 8 | pass 9 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit_hooks.yaml: -------------------------------------------------------------------------------- 1 | name: pre-commit hooks 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | 11 | - uses: actions/checkout@v1 12 | 13 | - name: python setup 3.9 14 | uses: actions/setup-python@v1 15 | with: 16 | python-version: '3.9' 17 | 18 | - name: Install Poetry 19 | uses: snok/install-poetry@v1 20 | with: 21 | version: 1.7.1 22 | virtualenvs-create: true 23 | virtualenvs-in-project: true 24 | 25 | - name: Install dependencies 26 | run: | 27 | poetry install --with dev 28 | 29 | - name: run tests 30 | run: | 31 | poetry run pre-commit run --all -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | strategy: 10 | matrix: 11 | python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 12 | 13 | steps: 14 | 15 | - uses: actions/checkout@v1 16 | 17 | - name: python setup ${{ matrix.python-version }} 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | 22 | - name: Install Poetry 23 | uses: snok/install-poetry@v1 24 | with: 25 | version: 1.7.1 26 | virtualenvs-create: true 27 | virtualenvs-in-project: true 28 | 29 | - name: Install dependencies 30 | run: | 31 | poetry install --with dev 32 | 33 | - name: run tests 34 | run: | 35 | poetry run pytest --cov=src/flupy src/tests --cov-report=xml 36 | 37 | - name: upload coverage to codecov 38 | uses: codecov/codecov-action@v1 39 | with: 40 | token: ${{ secrets.CODECOV_TOKEN }} 41 | file: ./coverage.xml 42 | flags: unittests 43 | name: codecov-umbrella 44 | fail_ci_if_error: true 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | docs/* 2 | # Temporary Python files 3 | *.pyc 4 | *.egg-info 5 | __pycache__ 6 | .ipynb_checkpoints 7 | 8 | # pyenv 9 | .python-version 10 | 11 | .benchmarks 12 | poetry.lock 13 | 14 | pip-wheel-metadata/ 15 | 16 | .vscode 17 | 18 | # Temporary OS files 19 | Icon* 20 | 21 | # Pytest cache 22 | .pytest_cache/* 23 | 24 | # Virtual environment 25 | venv/* 26 | 27 | # Temporary virtual environment files 28 | /.cache/ 29 | /.venv/ 30 | 31 | # Temporary server files 32 | .env 33 | *.pid 34 | *.swp 35 | 36 | # Generated documentation 37 | /docs/gen/ 38 | /docs/apidocs/ 39 | /docs/_build/ 40 | /site/ 41 | /*.html 42 | /*.rst 43 | /docs/*.png 44 | 45 | # Google Drive 46 | *.gdoc 47 | *.gsheet 48 | *.gslides 49 | *.gdraw 50 | 51 | # Testing and coverage results 52 | /.pytest/ 53 | /.coverage 54 | /.coverage.* 55 | /htmlcov/ 56 | /xmlreport/ 57 | /pyunit.xml 58 | /tmp/ 59 | *.tmp 60 | 61 | # Build and release directories 62 | /build/ 63 | /dist/ 64 | *.spec 65 | 66 | # Sublime Text 67 | *.sublime-workspace 68 | 69 | # Eclipse 70 | .settings 71 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/mirrors-isort 3 | rev: v5.10.1 4 | hooks: 5 | - id: isort 6 | args: ['--multi-line=3', '--trailing-comma', '--force-grid-wrap=0', '--use-parentheses', '--line-width=88'] 7 | 8 | 9 | - repo: https://github.com/pre-commit/pre-commit-hooks 10 | rev: v5.0.0 11 | hooks: 12 | - id: trailing-whitespace 13 | - id: check-added-large-files 14 | - id: check-yaml 15 | - id: mixed-line-ending 16 | args: ['--fix=lf'] 17 | 18 | - repo: https://github.com/humitos/mirrors-autoflake.git 19 | rev: v1.1 20 | hooks: 21 | - id: autoflake 22 | args: ['--in-place', '--remove-all-unused-imports'] 23 | 24 | - repo: https://github.com/psf/black 25 | rev: 25.1.0 26 | hooks: 27 | - id: black 28 | language_version: python3.9 29 | 30 | - repo: https://github.com/pre-commit/mirrors-mypy 31 | rev: v1.15.0 32 | hooks: 33 | - id: mypy 34 | files: flupy/ 35 | args: ["--config-file", "mypy.ini"] 36 | 37 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | build: 2 | image: latest 3 | python: 4 | version: 3.8 5 | setup_py_install: true 6 | -------------------------------------------------------------------------------- /.version: -------------------------------------------------------------------------------- 1 | 1.0.11 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # For Contributors 2 | 3 | ## Setup 4 | 5 | ### Requirements 6 | 7 | * Make: 8 | * Windows: http://mingw.org/download/installer 9 | * Mac: http://developer.apple.com/xcode 10 | * Linux: http://www.gnu.org/software/make 11 | * pipenv: http://docs.pipenv.org 12 | * Pandoc: http://johnmacfarlane.net/pandoc/installing.html 13 | * Graphviz: http://www.graphviz.org/Download.php 14 | 15 | To confirm these system dependencies are configured correctly: 16 | 17 | ```sh 18 | $ make doctor 19 | ``` 20 | 21 | ### Installation 22 | 23 | Install project dependencies into a virtual environment: 24 | 25 | ```sh 26 | $ make install 27 | ``` 28 | 29 | ## Development Tasks 30 | 31 | ### Testing 32 | 33 | Manually run the tests: 34 | 35 | ```sh 36 | $ make test 37 | ``` 38 | 39 | or keep them running on change: 40 | 41 | ```sh 42 | $ make watch 43 | ``` 44 | 45 | > In order to have OS X notifications, `brew install terminal-notifier`. 46 | 47 | ### Documentation 48 | 49 | Build the documentation: 50 | 51 | ```sh 52 | $ make docs 53 | ``` 54 | 55 | ### Static Analysis 56 | 57 | Run linters and static analyzers: 58 | 59 | ```sh 60 | $ make pylint 61 | $ make pycodestyle 62 | $ make pydocstyle 63 | $ make check # includes all checks 64 | ``` 65 | 66 | ## Continuous Integration 67 | 68 | The CI server will report overall build status: 69 | 70 | ```sh 71 | $ make ci 72 | ``` 73 | 74 | ## Release Tasks 75 | 76 | Release to PyPI: 77 | 78 | ```sh 79 | $ make upload 80 | ``` 81 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | **The MIT License (MIT)** 4 | 5 | Copyright © 2017, Oliver Rice 6 | 7 | Permission is hereby granted, free of charge, to any person obtaining a copy 8 | of this software and associated documentation files (the "Software"), to deal 9 | in the Software without restriction, including without limitation the rights 10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | copies of the Software, and to permit persons to whom the Software is 12 | furnished to do so, subject to the following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included in 15 | all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 | THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # flupy 2 | 3 |

4 | 5 | Tests 6 | 7 | 8 | Codestyle Black 9 | 10 |

11 | 12 |

13 | Python version 14 | PyPI version 15 | License 16 | Download count 17 |

18 | 19 | --- 20 | 21 | **Documentation**: https://flupy.readthedocs.io/en/latest/ 22 | 23 | **Source Code**: https://github.com/olirice/flupy 24 | 25 | --- 26 | 27 | ## Overview 28 | Flupy implements a [fluent interface](https://en.wikipedia.org/wiki/Fluent_interface) for operating on python iterables. All flupy methods return generators and are evaluated lazily. This allows expressions to transform arbitrary size data in extremely limited memory. 29 | 30 | You can think of flupy as a light weight, 0 dependency, pure python alternative to the excellent [Apache Spark](https://spark.apache.org/) project. 31 | 32 | ## Setup 33 | 34 | ### Requirements 35 | 36 | * Python 3.6+ 37 | 38 | ### Installation 39 | 40 | Install flupy with pip: 41 | ```sh 42 | $ pip install flupy 43 | ``` 44 | 45 | ### Library 46 | ```python 47 | from itertools import count 48 | from flupy import flu 49 | 50 | # Processing an infinite sequence in constant memory 51 | pipeline = ( 52 | flu(count()) 53 | .map(lambda x: x**2) 54 | .filter(lambda x: x % 517 == 0) 55 | .chunk(5) 56 | .take(3) 57 | ) 58 | 59 | for item in pipeline: 60 | print(item) 61 | 62 | # Returns: 63 | # [0, 267289, 1069156, 2405601, 4276624] 64 | # [6682225, 9622404, 13097161, 17106496, 21650409] 65 | # [26728900, 32341969, 38489616, 45171841, 52388644] 66 | ``` 67 | 68 | ### CLI 69 | The flupy command line interface brings the same syntax for lazy piplines to your shell. Inputs to the `flu` command are auto-populated into a `Fluent` context named `_`. 70 | ```` 71 | $ flu -h 72 | usage: flu [-h] [-f FILE] [-i [IMPORT [IMPORT ...]]] command 73 | 74 | flupy: a fluent interface for python 75 | 76 | positional arguments: 77 | command flupy command to execute on input 78 | 79 | optional arguments: 80 | -h, --help show this help message and exit 81 | -f FILE, --file FILE path to input file 82 | -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]] 83 | modules to import 84 | Syntax: :: 85 | Examples: 86 | 'import os' = '-i os' 87 | 'import os as op_sys' = '-i os::op_sys' 88 | 'from os import environ' = '-i os:environ' 89 | 'from os import environ as env' = '-i os:environ:env' 90 | ```` 91 | -------------------------------------------------------------------------------- /benchmark/test_benchmark.py: -------------------------------------------------------------------------------- 1 | from itertools import cycle 2 | 3 | from flupy import flu 4 | 5 | 6 | def test_integration(benchmark): 7 | @benchmark 8 | def work(): 9 | (flu(range(100000)).chunk(100).chunk(2).map_item(0).count()) 10 | 11 | 12 | def test_max(benchmark): 13 | @benchmark 14 | def work(): 15 | flu(range(300000)).max() 16 | 17 | 18 | def test_initialize(benchmark): 19 | @benchmark 20 | def work(): 21 | flu(range(10)) 22 | 23 | 24 | def test_collect(benchmark): 25 | @benchmark 26 | def work(): 27 | flu(range(3)).collect() 28 | 29 | 30 | def test___getitem__(benchmark): 31 | @benchmark 32 | def work(): 33 | flu(range(350))[1:3].collect() 34 | 35 | 36 | def test_sum(benchmark): 37 | @benchmark 38 | def work(): 39 | gen = flu(range(1000)).sum() 40 | 41 | 42 | def test_reduce(benchmark): 43 | @benchmark 44 | def work(): 45 | flu(range(50)).reduce(lambda x, y: x + y) 46 | 47 | 48 | def test_fold_left(benchmark): 49 | @benchmark 50 | def work(): 51 | flu(range(5)).fold_left(lambda x, y: x + y, 0) 52 | 53 | 54 | def test_count(benchmark): 55 | @benchmark 56 | def work(): 57 | gen = flu(range(3000)).count() 58 | 59 | 60 | def test_min(benchmark): 61 | @benchmark 62 | def work(): 63 | flu(range(3000)).min() 64 | 65 | 66 | def test_first(benchmark): 67 | @benchmark 68 | def work(): 69 | flu(range(3)).first() 70 | 71 | 72 | def test_last(benchmark): 73 | @benchmark 74 | def work(): 75 | flu(range(3000)).last() 76 | 77 | 78 | def test_head(benchmark): 79 | @benchmark 80 | def work(): 81 | flu(range(30000)).head(n=10) 82 | 83 | 84 | def test_tail(benchmark): 85 | @benchmark 86 | def work(): 87 | gen = flu(range(30000)).tail(n=10) 88 | 89 | 90 | def test_unique(benchmark): 91 | class NoHash: 92 | def __init__(self, letter, keyf): 93 | self.letter = letter 94 | self.keyf = keyf 95 | 96 | a = NoHash("a", 1) 97 | b = NoHash("b", 1) 98 | c = NoHash("c", 2) 99 | 100 | data = [x % 500 for x in range(10000)] 101 | 102 | @benchmark 103 | def work(): 104 | gen = flu(data).unique().collect() 105 | 106 | 107 | def test_sort(benchmark): 108 | @benchmark 109 | def work(): 110 | flu(range(3000, 0, -1)).sort().collect() 111 | 112 | 113 | def test_shuffle(benchmark): 114 | original_order = list(range(10000)) 115 | 116 | @benchmark 117 | def work(): 118 | flu(original_order).shuffle().collect() 119 | 120 | 121 | def test_map(benchmark): 122 | @benchmark 123 | def work(): 124 | flu(range(3)).map(lambda x: x + 2).collect() 125 | 126 | 127 | def test_rate_limit(benchmark): 128 | @benchmark 129 | def work(): 130 | flu(range(300)).rate_limit(50000000000000).collect() 131 | 132 | 133 | def test_map_item(benchmark): 134 | data = flu(range(300)).map(lambda x: {"a": x}) 135 | 136 | @benchmark 137 | def work(): 138 | gen = flu(data).map_item("a") 139 | 140 | 141 | def test_map_attr(benchmark): 142 | class Person: 143 | def __init__(self, age: int) -> None: 144 | self.age = age 145 | 146 | people = flu(range(200)).map(Person).collect() 147 | 148 | @benchmark 149 | def work(): 150 | flu(people).map_attr("age").collect() 151 | 152 | 153 | def test_filter(benchmark): 154 | @benchmark 155 | def work(): 156 | flu(range(3)).filter(lambda x: 0 < x < 2).collect() 157 | 158 | 159 | def test_take(benchmark): 160 | @benchmark 161 | def work(): 162 | flu(range(10)).take(5).collect() 163 | 164 | 165 | def test_take_while(benchmark): 166 | @benchmark 167 | def work(): 168 | flu(cycle(range(10))).take_while(lambda x: x < 4).collect() 169 | 170 | 171 | def test_drop_while(benchmark): 172 | @benchmark 173 | def work(): 174 | flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4).collect() 175 | 176 | 177 | def test_group_by(benchmark): 178 | @benchmark 179 | def work(): 180 | flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]).collect() 181 | 182 | 183 | def test_chunk(benchmark): 184 | @benchmark 185 | def work(): 186 | flu(range(500)).chunk(2).collect() 187 | 188 | 189 | def test_enumerate(benchmark): 190 | @benchmark 191 | def work(): 192 | flu(range(3)).enumerate(start=1).collect() 193 | 194 | 195 | def test_zip(benchmark): 196 | @benchmark 197 | def work(): 198 | flu(range(3)).zip(range(3)).collect() 199 | 200 | 201 | def test_zip_longest(benchmark): 202 | @benchmark 203 | def work(): 204 | flu(range(3)).zip_longest(range(5)).collect() 205 | 206 | 207 | def test_window(benchmark): 208 | @benchmark 209 | def work(): 210 | gen = flu(range(5)).window(n=3, step=3).collect 211 | 212 | 213 | def test_flatten(benchmark): 214 | nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)] 215 | 216 | @benchmark 217 | def work(): 218 | gen = flu(nested).flatten(depth=2, base_type=tuple).collect() 219 | 220 | 221 | def test_tee(benchmark): 222 | @benchmark 223 | def work(): 224 | gen1, gen2, gen3 = flu(range(100)).tee(3) 225 | 226 | 227 | def test_join_left(benchmark): 228 | @benchmark 229 | def work(): 230 | flu(range(6)).join_left(range(0, 6, 2)).collect() 231 | 232 | 233 | def test_join_inner(benchmark): 234 | @benchmark 235 | def work(): 236 | flu(range(6)).join_inner(range(0, 6, 2)).collect() 237 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/flupy.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/more-itertools.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/more-itertools" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/more-itertools" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | API Reference 3 | ============= 4 | 5 | .. automodule:: flupy 6 | 7 | 8 | Container 9 | ========= 10 | 11 | .. autoclass:: flu 12 | 13 | ---- 14 | 15 | 16 | Grouping 17 | ======== 18 | 19 | .. automethod:: flu.chunk 20 | .. automethod:: flu.flatten 21 | .. automethod:: flu.denormalize 22 | .. automethod:: flu.group_by 23 | .. automethod:: flu.window 24 | 25 | ---- 26 | 27 | Selecting 28 | ========= 29 | 30 | .. automethod:: flu.filter 31 | .. automethod:: flu.take 32 | .. automethod:: flu.take_while 33 | .. automethod:: flu.drop_while 34 | .. automethod:: flu.unique 35 | 36 | ---- 37 | 38 | Transforming 39 | ============ 40 | 41 | .. automethod:: flu.enumerate 42 | .. automethod:: flu.join_left 43 | .. automethod:: flu.join_inner 44 | .. automethod:: flu.map 45 | .. automethod:: flu.map_attr 46 | .. automethod:: flu.map_item 47 | .. automethod:: flu.zip 48 | .. automethod:: flu.zip_longest 49 | 50 | ---- 51 | 52 | Side Effects 53 | ============ 54 | 55 | .. automethod:: flu.rate_limit 56 | .. automethod:: flu.side_effect 57 | 58 | ---- 59 | 60 | Summarizing 61 | =========== 62 | 63 | .. automethod:: flu.count 64 | .. automethod:: flu.sum 65 | .. automethod:: flu.min 66 | .. automethod:: flu.max 67 | .. automethod:: flu.reduce 68 | .. automethod:: flu.fold_left 69 | .. automethod:: flu.first 70 | .. automethod:: flu.last 71 | .. automethod:: flu.head 72 | .. automethod:: flu.tail 73 | .. automethod:: flu.to_list 74 | .. automethod:: flu.collect 75 | 76 | ---- 77 | 78 | Non-Constant Memory 79 | =================== 80 | 81 | .. automethod:: flu.group_by 82 | .. automethod:: flu.join_left 83 | .. automethod:: flu.join_inner 84 | .. automethod:: flu.shuffle 85 | .. automethod:: flu.sort 86 | .. automethod:: flu.tee 87 | .. automethod:: flu.unique 88 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Command Line 3 | ============ 4 | 5 | 6 | The flupy CLI is a platform agnostic application that give full access to the flupy API and python from your shell. 7 | 8 | .. automodule:: flupy 9 | 10 | Usage 11 | ===== 12 | 13 | :: 14 | 15 | $ flu -h 16 | 17 | usage: flu [-h] [-v] [-f FILE] [-i [IMPORT [IMPORT ...]]] command 18 | 19 | flupy: a fluent interface for python 20 | 21 | positional arguments: 22 | command command to execute against input 23 | 24 | optional arguments: 25 | -h, --help show this help message and exit 26 | -v, --version show program's version number and exit 27 | -f FILE, --file FILE path to input file 28 | -i [IMPORT [IMPORT ...]], --import [IMPORT [IMPORT ...]] 29 | modules to import 30 | Syntax: :: 31 | 32 | 33 | Basic Examples 34 | ============== 35 | 36 | When input data are provided to the `flu` command, an instance of the flu object is preprepared with that input and stored in the the variable `_`. 37 | 38 | 39 | .. note:: for more information on writing flupy commands, see API Reference 40 | 41 | Piping from another command (stdin) 42 | ----------------------------------- 43 | Example: Show lines of a log file that are errors:: 44 | 45 | $ cat logs.txt | flu '_.filter(lambda x: x.starswith("ERROR"))' 46 | 47 | Reading from a file 48 | ------------------- 49 | Example: Show lines of a log file that are errors:: 50 | 51 | $ flu -f logs.txt '_.filter(lambda x: x.starswith("ERROR"))' 52 | 53 | No Input data 54 | ------------- 55 | flupy does not require input data if it can be generated from within python e.g. with `range(10)`. When no input data are provided, iterable at the beginning of the flupy command must be wraped into a flu instance. 56 | 57 | Example: Even integers less than 10:: 58 | 59 | $ flu 'flu(range(10)).filter(lambda x: x%2==0)' 60 | 61 | Import System 62 | ============= 63 | 64 | Passing `-i` or `--import` to the cli allows you to import standard and third party libraries installed in the same environment. 65 | 66 | Import syntax 67 | 68 | -i :: 69 | 70 | 71 | .. note:: for multiple imports pass `-i` multiple times 72 | 73 | Import Examples 74 | --------------- 75 | **import os**:: 76 | 77 | $ flu 'flu(os.environ)' -i os 78 | 79 | **from os import environ**:: 80 | 81 | $ flu 'flu(environ)' -i os:environ 82 | 83 | **from os import environ as env**:: 84 | 85 | $ flu 'flu(env)' -i os:environ:env 86 | 87 | **import os as opsys**:: 88 | 89 | $ flu 'flu(opsys.environ)' -i os::opsys 90 | 91 | 92 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # This file is execfile()d with the current directory set to its containing dir. 4 | # 5 | # Note that not all possible configuration values are present in this 6 | # autogenerated file. 7 | # 8 | # All configuration values have a default; values that are commented out 9 | # serve to show the default. 10 | 11 | import os 12 | import sys 13 | from typing import Dict 14 | 15 | import sphinx_rtd_theme 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | sys.path.insert(0, os.path.abspath("..")) 21 | sys.path.insert(0, os.path.abspath("../src")) 22 | 23 | # -- General configuration ----------------------------------------------------- 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | # needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be extensions 29 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 30 | extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme", "sphinx.ext.viewcode"] 31 | 32 | # Add any paths that contain templates here, relative to this directory. 33 | templates_path = ["_templates"] 34 | 35 | # The suffix of source filenames. 36 | source_suffix = ".rst" 37 | 38 | # The encoding of source files. 39 | # source_encoding = 'utf-8-sig' 40 | 41 | # The master toctree document. 42 | master_doc = "index" 43 | 44 | # General information about the project. 45 | project = "flupy" 46 | copyright = "2021, Oliver Rice" 47 | 48 | # The version info for the project you're documenting, acts as replacement for 49 | # |version| and |release|, also used in various other places throughout the 50 | # built documents. 51 | # 52 | # The short X.Y version. 53 | version = "1.1.0" 54 | # The full version, including alpha/beta/rc tags. 55 | release = version 56 | 57 | # The language for content autogenerated by Sphinx. Refer to documentation 58 | # for a list of supported languages. 59 | # language = None 60 | 61 | # There are two options for replacing |today|: either, you set today to some 62 | # non-false value, then it is used: 63 | # today = '' 64 | # Else, today_fmt is used as the format for a strftime call. 65 | # today_fmt = '%B %d, %Y' 66 | 67 | # List of patterns, relative to source directory, that match files and 68 | # directories to ignore when looking for source files. 69 | exclude_patterns = ["_build"] 70 | 71 | # The reST default role (used for this markup: `text`) to use for all documents. 72 | # default_role = None 73 | 74 | # If true, '()' will be appended to :func: etc. cross-reference text. 75 | # add_function_parentheses = True 76 | 77 | # If true, the current module name will be prepended to all description 78 | # unit titles (such as .. function::). 79 | # add_module_names = True 80 | 81 | # If true, sectionauthor and moduleauthor directives will be shown in the 82 | # output. They are ignored by default. 83 | # show_authors = False 84 | 85 | # The name of the Pygments (syntax highlighting) style to use. 86 | pygments_style = "sphinx" 87 | 88 | # A list of ignored prefixes for module index sorting. 89 | # modindex_common_prefix = [] 90 | 91 | 92 | # -- Options for HTML output --------------------------------------------------- 93 | 94 | # The theme to use for HTML and HTML Help pages. See the documentation for 95 | # a list of builtin themes. 96 | html_theme = "sphinx_rtd_theme" 97 | 98 | # Theme options are theme-specific and customize the look and feel of a theme 99 | # further. For a list of options available for each theme, see the 100 | # documentation. 101 | # html_theme_options = {} 102 | 103 | # Add any paths that contain custom themes here, relative to this directory. 104 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 105 | 106 | # The name for this set of Sphinx documents. If None, it defaults to 107 | # " v documentation". 108 | # html_title = None 109 | 110 | # A shorter title for the navigation bar. Default is the same as html_title. 111 | # html_short_title = None 112 | 113 | # The name of an image file (relative to this directory) to place at the top 114 | # of the sidebar. 115 | # html_logo = None 116 | 117 | # The name of an image file (within the static path) to use as favicon of the 118 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 119 | # pixels large. 120 | # html_favicon = None 121 | 122 | # Add any paths that contain custom static files (such as style sheets) here, 123 | # relative to this directory. They are copied after the builtin static files, 124 | # so a file named "default.css" will overwrite the builtin "default.css". 125 | # html_static_path = ["_static"] 126 | html_static_path = [] 127 | 128 | html_context = { 129 | # https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html 130 | # "css_files": ["_static/theme_overrides.css"] 131 | "css_files": [] 132 | } 133 | 134 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 135 | # using the given strftime format. 136 | # html_last_updated_fmt = '%b %d, %Y' 137 | 138 | # If true, SmartyPants will be used to convert quotes and dashes to 139 | # typographically correct entities. 140 | # html_use_smartypants = True 141 | 142 | # Custom sidebar templates, maps document names to template names. 143 | # html_sidebars = {} 144 | 145 | # Additional templates that should be rendered to pages, maps page names to 146 | # template names. 147 | # html_additional_pages = {} 148 | 149 | # If false, no module index is generated. 150 | # html_domain_indices = True 151 | 152 | # If false, no index is generated. 153 | # html_use_index = True 154 | 155 | # If true, the index is split into individual pages for each letter. 156 | # html_split_index = False 157 | 158 | # If true, links to the reST sources are added to the pages. 159 | # html_show_sourcelink = True 160 | 161 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 162 | # html_show_sphinx = True 163 | 164 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 165 | # html_show_copyright = True 166 | 167 | # If true, an OpenSearch description file will be output, and all pages will 168 | # contain a tag referring to it. The value of this option must be the 169 | # base URL from which the finished HTML is served. 170 | # html_use_opensearch = '' 171 | 172 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 173 | # html_file_suffix = None 174 | 175 | # Output file base name for HTML help builder. 176 | htmlhelp_basename = "flupydoc" 177 | 178 | 179 | # -- Options for LaTeX output -------------------------------------------------- 180 | 181 | latex_elements: Dict[str, str] = { 182 | # The paper size ('letterpaper' or 'a4paper'). 183 | #'papersize': 'letterpaper', 184 | # The font size ('10pt', '11pt' or '12pt'). 185 | #'pointsize': '10pt', 186 | # Additional stuff for the LaTeX preamble. 187 | #'preamble': '', 188 | } 189 | 190 | # Grouping the document tree into LaTeX files. List of tuples 191 | # (source start file, target name, title, author, documentclass [howto/manual]). 192 | latex_documents = [("index", "flupy.tex", "flupy Documentation", "Oliver Rice", "manual")] 193 | 194 | # The name of an image file (relative to this directory) to place at the top of 195 | # the title page. 196 | # latex_logo = None 197 | 198 | # For "manual" documents, if this is true, then toplevel headings are parts, 199 | # not chapters. 200 | # latex_use_parts = False 201 | 202 | # If true, show page references after internal links. 203 | # latex_show_pagerefs = False 204 | 205 | # If true, show URL addresses after external links. 206 | # latex_show_urls = False 207 | 208 | # Documents to append as an appendix to all manuals. 209 | # latex_appendices = [] 210 | 211 | # If false, no module index is generated. 212 | # latex_domain_indices = True 213 | 214 | 215 | # -- Options for manual page output -------------------------------------------- 216 | 217 | # One entry per manual page. List of tuples 218 | # (source start file, name, description, authors, manual section). 219 | man_pages = [("index", "flupy", "flupy Documentation", ["Oliver Rice"], 1)] 220 | 221 | # If true, show URL addresses after external links. 222 | # man_show_urls = False 223 | 224 | 225 | # -- Options for Texinfo output ------------------------------------------------ 226 | 227 | # Grouping the document tree into Texinfo files. List of tuples 228 | # (source start file, target name, title, author, 229 | # dir menu entry, description, category) 230 | texinfo_documents = [ 231 | ( 232 | "index", 233 | "flupy", 234 | "flupy Documentation", 235 | "Oliver Rice", 236 | "flupy", 237 | "A fluent interface to python collections.", 238 | "Miscellaneous", 239 | ) 240 | ] 241 | 242 | # Documents to append as an appendix to all manuals. 243 | # texinfo_appendices = [] 244 | 245 | # If false, no module index is generated. 246 | # texinfo_domain_indices = True 247 | 248 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 249 | # texinfo_show_urls = 'footnote' 250 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ./welcome.rst 2 | 3 | Contents 4 | ======== 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | welcome 10 | api 11 | cli 12 | 13 | 14 | .. toctree:: 15 | :maxdepth: 1 16 | 17 | license 18 | influances 19 | versions 20 | -------------------------------------------------------------------------------- /docs/influances.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | Design Influances 3 | ================= 4 | 5 | - more-itertools_ 6 | - pyspark_ 7 | - pydash_ 8 | - sqlalchemy_ 9 | - scala_ 10 | 11 | .. _fluent: https://en.wikipedia.org/wiki/Fluent_interface 12 | .. _more-itertools: https://github.com/erikrose/more-itertools 13 | .. _pyspark: http://spark.apache.org/docs/2.2.0/api/python/pyspark.html 14 | .. _sqlalchemy: https://www.sqlalchemy.org/ 15 | .. _pydash: https://pydash.readthedocs.io/en/latest/index.html 16 | .. _scala: https://www.scala-lang.org/ 17 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | License 3 | ======= 4 | 5 | flupy is under the MIT License. See the LICENSE file. 6 | 7 | Conditions for Contributors 8 | =========================== 9 | 10 | By contributing to this software project, you are agreeing to the following 11 | terms and conditions for your contributions: First, you agree your 12 | contributions are submitted under the MIT license. Second, you represent you 13 | are authorized to make the contributions and grant the license. If your 14 | employer has rights to intellectual property that includes your contributions, 15 | you represent that you have received permission to make contributions and grant 16 | the required license on behalf of that employer. 17 | -------------------------------------------------------------------------------- /docs/versions.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Version History 3 | =============== 4 | 5 | .. automodule:: flupy 6 | 7 | 1.0.0 8 | ----- 9 | 10 | * New Capabilities: 11 | * Everything 12 | 13 | 14 | 1.1.0 15 | ----- 16 | 17 | * Remove support for calling instance methods on uninitialized flu class passing an interable as the *self* argument 18 | * Remove `flupy.Fluent` from top level `flupy` public API 19 | * Remove `flupy.with_iter` from API 20 | 21 | 22 | 1.1.2 23 | ----- 24 | 25 | * Change `Fluent` class name to `flu` and remove class alias to improve docs readability 26 | * Add type hints for `flu.sum` 27 | -------------------------------------------------------------------------------- /docs/welcome.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | Welcome to Flupy 3 | ================ 4 | 5 | flupy is a lightweight library and CLI for implementing python data pipelines with a fluent interface. 6 | 7 | 8 | Under the hood, flupy is built on generators. That means its pipelines evaluate lazily and use a constant amount of memory no matter how much data are being processed. This allows flupy to tackle Petabyte scale data manipulation as easily as it operates on a small list. 9 | 10 | API 11 | === 12 | :: 13 | 14 | import json 15 | from flupy import flu 16 | 17 | logs = open('logs.jl', 'r') 18 | 19 | error_count = ( 20 | flu(logs) 21 | .map(lambda x: json.loads(x)) 22 | .filter(lambda x: x['level'] == 'ERROR') 23 | .count() 24 | ) 25 | 26 | print(error_count) 27 | # 14 28 | 29 | 30 | CLI 31 | === 32 | 33 | The flupy library, and python runtime, are also accessible from `flu` command line utility:: 34 | 35 | $ cat logs.txt | flu "_.filter(lambda x: x.startswith('ERROR'))" 36 | 37 | 38 | For more information about the `flu` command see :doc:`command line <./cli>`. 39 | 40 | 41 | Getting Started 42 | =============== 43 | 44 | **Requirements** 45 | 46 | Python 3.6+ 47 | 48 | **Installation** 49 | :: 50 | 51 | $ pip install flupy 52 | 53 | 54 | Example 55 | ======= 56 | 57 | Since 2008, what domains are our customers comming from?:: 58 | 59 | 60 | from flupy import flu 61 | 62 | customers = [ 63 | {'name': 'Jane', 'signup_year': 2018, 'email': 'jane@ibm.com'}, 64 | {'name': 'Fred', 'signup_year': 2011, 'email': 'fred@google.com'}, 65 | {'name': 'Lisa', 'signup_year': 2014, 'email': 'jane@ibm.com'}, 66 | {'name': 'Jack', 'signup_year': 2007, 'email': 'jane@apple.com'}, 67 | ] 68 | 69 | pipeline = ( 70 | flu(customers) 71 | .filter(lambda x: x['signup_year'] > 2008) 72 | .map_item('email') 73 | .map(lambda x: x.partition('@')[2]) 74 | .group_by() # defaults to identity 75 | .map(lambda x: (x[0], x[1].count())) 76 | .collect() 77 | ) 78 | 79 | print(pipeline) 80 | # [('google.com', 1), ('ibm.com', 2)] 81 | 82 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True 3 | strict_optional = True 4 | follow_imports = skip 5 | warn_redundant_casts = True 6 | warn_unused_ignores = False 7 | check_untyped_defs = True 8 | no_implicit_reexport = True 9 | 10 | # Strict Mode: 11 | disallow_untyped_defs = True 12 | disallow_any_generics = True 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "flupy" 3 | version = "1.2.2" 4 | description = "Fluent data processing in Python - a chainable stream processing library for expressive data manipulation using method chaining" 5 | authors = ["Oliver Rice "] 6 | license = "MIT" 7 | readme = "README.md" 8 | repository = "https://github.com/olirice/flupy" 9 | packages = [{include = "flupy", from = "src"}] 10 | classifiers = [ 11 | "Development Status :: 4 - Beta", 12 | "Natural Language :: English", 13 | "Operating System :: OS Independent", 14 | "Programming Language :: Python", 15 | "Programming Language :: Python :: 3", 16 | "Programming Language :: Python :: 3.9", 17 | "Programming Language :: Python :: 3.10", 18 | "Programming Language :: Python :: 3.11", 19 | "Programming Language :: Python :: 3.12", 20 | "Programming Language :: Python :: 3.13", 21 | ] 22 | 23 | [tool.poetry.dependencies] 24 | python = ">=3.9" 25 | typing_extensions = ">=4" 26 | 27 | [tool.poetry.group.dev.dependencies] 28 | pytest = "*" 29 | pytest-cov = "*" 30 | pytest-benchmark = "*" 31 | pre-commit = "*" 32 | pylint = "*" 33 | black = "*" 34 | mypy = "*" 35 | 36 | [tool.poetry.scripts] 37 | flu = "flupy.cli.cli:main" 38 | flu_precommit = "flupy.cli.cli:precommit" 39 | 40 | [build-system] 41 | requires = ["poetry-core>=2.0.0"] 42 | build-backend = "poetry.core.masonry.api" 43 | 44 | [tool.black] 45 | line-length = 120 46 | exclude = ''' 47 | /( 48 | \.git 49 | | \.hg 50 | | \.mypy_cache 51 | | \.tox 52 | | \.venv 53 | | _build 54 | | buck-out 55 | | build 56 | | dist 57 | )/ 58 | ''' 59 | 60 | [tool.mypy] 61 | python_version = "3.9" 62 | ignore_missing_imports = true 63 | strict_optional = true 64 | follow_imports = "skip" 65 | warn_redundant_casts = true 66 | warn_unused_ignores = false 67 | check_untyped_defs = true 68 | no_implicit_reexport = true 69 | disallow_untyped_defs = true 70 | disallow_any_generics = true 71 | 72 | [tool.pytest.ini_options] 73 | addopts = "--cov=src/flupy src/tests" 74 | 75 | [tool.coverage.report] 76 | exclude_lines = [ 77 | "pragma: no cover", 78 | "if TYPE_CHECKING:", 79 | "raise AssertionError", 80 | "raise NotImplementedError", 81 | "@overload", 82 | "pass", 83 | ] 84 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --cov=src/flupy src/tests 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description_file = README.md 3 | -------------------------------------------------------------------------------- /src/flupy/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib.metadata import version 2 | 3 | from flupy.cli.utils import walk_dirs, walk_files 4 | from flupy.fluent import flu 5 | 6 | __project__ = "flupy" 7 | __version__ = version(__project__) 8 | 9 | __all__ = ["flu", "walk_files", "walk_dirs"] 10 | -------------------------------------------------------------------------------- /src/flupy/cli/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olirice/flupy/0205b524c7a9da547bfe0922a02285eec0ca6925/src/flupy/cli/__init__.py -------------------------------------------------------------------------------- /src/flupy/cli/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import importlib 3 | import sys 4 | from signal import SIG_DFL, SIGPIPE, signal 5 | from typing import Any, Dict, Generator, List, Optional 6 | 7 | from flupy import __version__, flu, walk_dirs, walk_files 8 | 9 | 10 | def read_file(path: str) -> Generator[str, None, None]: 11 | """Yield lines from a file given its path""" 12 | with open(path, "r") as f: 13 | yield from f 14 | 15 | 16 | def parse_args(args: List[str]) -> argparse.Namespace: 17 | """Parse input arguments""" 18 | parser = argparse.ArgumentParser( 19 | description="flupy: a fluent interface for python collections", 20 | formatter_class=argparse.RawTextHelpFormatter, 21 | ) 22 | parser.add_argument("-v", "--version", action="version", version="%(prog)s " + __version__) 23 | parser.add_argument("command", help="command to execute against input") 24 | parser.add_argument("-f", "--file", help="path to input file") 25 | parser.add_argument( 26 | "-i", 27 | "--import", 28 | nargs="*", 29 | default=[], 30 | help="modules to import\n" 31 | "Syntax: ::\n" 32 | "Examples:\n" 33 | "\t'import os' = '-i os'\n" 34 | "\t'import os as op_sys' = '-i os::op_sys'\n" 35 | "\t'from os import environ' = '-i os:environ'\n" 36 | "\t'from os import environ as env' = '-i os:environ:env'\n", 37 | ) 38 | return parser.parse_args(args) 39 | 40 | 41 | def build_import_dict(imps: List[str]) -> Dict[str, Any]: 42 | """Execute CLI scoped imports""" 43 | import_dict = {} 44 | for imp_stx in imps: 45 | module, _, obj_alias = imp_stx.partition(":") 46 | obj, _, alias = obj_alias.partition(":") 47 | 48 | if not obj: 49 | import_dict[alias or module] = importlib.import_module(module) 50 | else: 51 | _garb = importlib.import_module(module) 52 | import_dict[alias or obj] = getattr(_garb, obj) 53 | return import_dict 54 | 55 | 56 | def main(argv: Optional[List[str]] = None) -> None: 57 | """CLI Entrypoint""" 58 | args = parse_args(argv[1:] if argv is not None else sys.argv[1:]) 59 | 60 | _command = args.command 61 | _file = args.file 62 | _import = getattr(args, "import") 63 | 64 | import_dict = build_import_dict(_import) 65 | 66 | if _file: 67 | _ = flu(read_file(_file)).map(str.rstrip) 68 | else: 69 | # Do not raise exception for Broken Pipe 70 | signal(SIGPIPE, SIG_DFL) 71 | _ = flu(sys.stdin).map(str.rstrip) 72 | 73 | locals_dict = { 74 | "flu": flu, 75 | "_": _, 76 | "walk_files": walk_files, 77 | "walk_dirs": walk_dirs, 78 | } 79 | 80 | pipeline = eval(_command, import_dict, locals_dict) 81 | 82 | if hasattr(pipeline, "__iter__") and not isinstance(pipeline, (str, bytes)): 83 | for r in pipeline: 84 | sys.stdout.write(str(r) + "\n") 85 | 86 | elif pipeline is None: 87 | pass 88 | else: 89 | sys.stdout.write(str(pipeline) + "\n") 90 | -------------------------------------------------------------------------------- /src/flupy/cli/utils.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | import os 3 | from typing import Generator 4 | 5 | from flupy.fluent import Fluent, flu 6 | 7 | 8 | def walk_files(*pathes: str, abspath: bool = True) -> "Fluent[str]": 9 | """Yield files recursively starting from each location in *pathes""" 10 | 11 | if pathes == (): 12 | pathes = (".",) 13 | 14 | def _impl() -> Generator[str, None, None]: 15 | for path in pathes: 16 | for d, _, files in os.walk(path): 17 | for x in files: 18 | rel_path = os.path.join(d, x) 19 | if abspath: 20 | yield os.path.abspath(rel_path) 21 | else: 22 | yield rel_path 23 | 24 | return flu(_impl()) 25 | 26 | 27 | def walk_dirs(path: str = ".") -> "Fluent[str]": 28 | """Yield files recursively starting from *path""" 29 | 30 | def _impl() -> Generator[str, None, None]: 31 | for d, _, _ in os.walk(path): 32 | yield d 33 | 34 | return flu(_impl()) 35 | -------------------------------------------------------------------------------- /src/flupy/fluent.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=invalid-name 2 | import time 3 | from collections import defaultdict, deque 4 | from collections.abc import Iterable as IterableType 5 | from functools import reduce 6 | from itertools import dropwhile, groupby, islice, product, takewhile, tee, zip_longest 7 | from random import sample 8 | from typing import ( 9 | Any, 10 | Callable, 11 | Collection, 12 | Deque, 13 | Generator, 14 | Generic, 15 | Hashable, 16 | Iterable, 17 | Iterator, 18 | List, 19 | Optional, 20 | Set, 21 | Tuple, 22 | Type, 23 | TypeVar, 24 | Union, 25 | overload, 26 | ) 27 | 28 | from typing_extensions import Concatenate, ParamSpec, Protocol 29 | 30 | __all__ = ["flu"] 31 | 32 | 33 | T = TypeVar("T") 34 | T_co = TypeVar("T_co", covariant=True) 35 | T_contra = TypeVar("T_contra", contravariant=True) 36 | _T1 = TypeVar("_T1") 37 | _T2 = TypeVar("_T2") 38 | _T3 = TypeVar("_T3") 39 | S = TypeVar("S") 40 | P = ParamSpec("P") 41 | 42 | CallableTakesIterable = Callable[[Iterable[T]], Collection[T]] 43 | 44 | 45 | class SupportsEquality(Protocol): 46 | def __eq__(self, __other: object) -> bool: 47 | pass 48 | 49 | 50 | class SupportsGetItem(Protocol[T_co]): 51 | def __getitem__(self, __k: Hashable) -> T_co: 52 | pass 53 | 54 | 55 | class SupportsIteration(Protocol[T_co]): 56 | def __iter__(self) -> Iterator[T]: 57 | pass 58 | 59 | 60 | class SupportsLessThan(Protocol): 61 | def __lt__(self, __other: Any) -> bool: 62 | pass 63 | 64 | 65 | SupportsLessThanT = TypeVar("SupportsLessThanT", bound="SupportsLessThan") 66 | 67 | 68 | class Empty: 69 | pass 70 | 71 | 72 | def identity(x: T) -> T: 73 | return x 74 | 75 | 76 | class Fluent(Generic[T]): 77 | """A fluent interface to lazy generator functions 78 | 79 | >>> from flupy import flu 80 | >>> ( 81 | flu(range(100)) 82 | .map(lambda x: x**2) 83 | .filter(lambda x: x % 3 == 0) 84 | .chunk(3) 85 | .take(2) 86 | .to_list() 87 | ) 88 | [[0, 9, 36], [81, 144, 225]] 89 | """ 90 | 91 | def __init__(self, iterable: Iterable[T]) -> None: 92 | iterator = iter(iterable) 93 | self._iterator: Iterator[T] = iterator 94 | 95 | @overload 96 | def __getitem__(self, index: int) -> T: 97 | pass 98 | 99 | @overload 100 | def __getitem__(self, index: slice) -> "Fluent[T]": 101 | pass 102 | 103 | def __getitem__(self, key: Union[int, slice]) -> Union[T, "Fluent[T]"]: 104 | if isinstance(key, int) and key >= 0: 105 | try: 106 | return next(islice(self._iterator, key, key + 1)) 107 | except StopIteration: 108 | raise IndexError("flu index out of range") 109 | elif isinstance(key, slice): 110 | return flu(islice(self._iterator, key.start, key.stop, key.step)) 111 | else: 112 | raise TypeError(f"Indices must be non-negative integers or slices, not {type(key).__name__}") 113 | 114 | ### Summary ### 115 | def collect(self, n: Optional[int] = None, container_type: CallableTakesIterable[T] = list) -> Collection[T]: 116 | """Collect items from iterable into a container 117 | 118 | >>> flu(range(4)).collect() 119 | [0, 1, 2, 3] 120 | 121 | >>> flu(range(4)).collect(container_type=set) 122 | {0, 1, 2, 3} 123 | 124 | >>> flu(range(4)).collect(n=2) 125 | [0, 1] 126 | """ 127 | return container_type(self.take(n)) 128 | 129 | def to_list(self) -> List[T]: 130 | """Collect items from iterable into a list 131 | 132 | >>> flu(range(4)).to_list() 133 | [0, 1, 2, 3] 134 | """ 135 | return list(self) 136 | 137 | def sum(self) -> Union[T, int]: 138 | """Sum of elements in the iterable 139 | 140 | >>> flu([1,2,3]).sum() 141 | 6 142 | 143 | """ 144 | return sum(self) # type: ignore 145 | 146 | def count(self) -> int: 147 | """Count of elements in the iterable 148 | 149 | >>> flu(['a','b','c']).count() 150 | 3 151 | """ 152 | return sum(1 for _ in self) 153 | 154 | def min(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT: 155 | """Smallest element in the interable 156 | 157 | >>> flu([1, 3, 0, 2]).min() 158 | 0 159 | """ 160 | return min(self) 161 | 162 | def max(self: "Fluent[SupportsLessThanT]") -> SupportsLessThanT: 163 | """Largest element in the interable 164 | 165 | >>> flu([0, 3, 2, 1]).max() 166 | 3 167 | """ 168 | return max(self) 169 | 170 | def first(self, default: Any = Empty()) -> T: 171 | """Return the first item of the iterable. Raise IndexError if empty, or return default if provided. 172 | 173 | >>> flu([0, 1, 2, 3]).first() 174 | 0 175 | >>> flu([]).first(default="some_default") 176 | 'some_default' 177 | """ 178 | x: Union[Empty, T] = default 179 | for x in self: 180 | return x 181 | if isinstance(x, Empty): 182 | raise IndexError("Empty iterator") 183 | return x 184 | 185 | def last(self, default: Any = Empty()) -> T: 186 | """Return the last item of the iterble. Raise IndexError if empty or default if provided. 187 | 188 | >>> flu([0, 1, 2, 3]).last() 189 | 3 190 | >>> flu([]).last(default='some_default') 191 | 'some_default' 192 | """ 193 | x: Union[Empty, T] = default 194 | for x in self: 195 | pass 196 | if isinstance(x, Empty): 197 | raise IndexError("Empty iterator") 198 | return x 199 | 200 | def head(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]: 201 | """Returns up to the first *n* elements from the iterable. 202 | 203 | >>> flu(range(20)).head() 204 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 205 | 206 | >>> flu(range(15)).head(n=2) 207 | [0, 1] 208 | 209 | >>> flu([]).head() 210 | [] 211 | """ 212 | return self.take(n).collect(container_type=container_type) 213 | 214 | def tail(self, n: int = 10, container_type: CallableTakesIterable[T] = list) -> Collection[T]: 215 | """Return up to the last *n* elements from the iterable 216 | 217 | >>> flu(range(20)).tail() 218 | [10, 11, 12, 13, 14, 15, 16, 17, 18, 19] 219 | 220 | >>> flu(range(15)).tail(n=2) 221 | [13, 14] 222 | """ 223 | val: Union[List[Empty], Tuple[Any, ...]] = [Empty()] 224 | for val in self.window(n, fill_value=Empty()): 225 | pass 226 | return container_type([x for x in val if not isinstance(x, Empty)]) 227 | 228 | ### End Summary ### 229 | 230 | ### Non-Constant Memory ### 231 | def sort( 232 | self: "Fluent[SupportsLessThanT]", 233 | key: Optional[Callable[[Any], Any]] = None, 234 | reverse: bool = False, 235 | ) -> "Fluent[SupportsLessThanT]": 236 | """Sort iterable by *key* function if provided or identity otherwise 237 | 238 | Note: sorting loads the entire iterable into memory 239 | 240 | >>> flu([3,6,1]).sort().to_list() 241 | [1, 3, 6] 242 | 243 | >>> flu([3,6,1]).sort(reverse=True).to_list() 244 | [6, 3, 1] 245 | 246 | >>> flu([3,-6,1]).sort(key=abs).to_list() 247 | [1, 3, -6] 248 | """ 249 | return Fluent(sorted(self, key=key, reverse=reverse)) 250 | 251 | def join_left( 252 | self, 253 | other: Iterable[_T1], 254 | key: Callable[[T], Hashable] = identity, 255 | other_key: Callable[[_T1], Hashable] = identity, 256 | ) -> "Fluent[Tuple[T, Union[_T1, None]]]": 257 | """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries 258 | 259 | When no matching entry is found in *other*, entries in the iterable are paired with None 260 | 261 | Note: join_left loads *other* into memory 262 | 263 | >>> flu(range(6)).join_left(range(0, 6, 2)).to_list() 264 | [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)] 265 | """ 266 | 267 | def _impl() -> Generator[Tuple[T, Union[_T1, None]], None, None]: 268 | 269 | other_lookup = defaultdict(list) 270 | 271 | for entry_other in other: 272 | other_lookup[other_key(entry_other)].append(entry_other) 273 | 274 | for entry in self: 275 | matches: Optional[List[_T1]] = other_lookup.get(key(entry)) 276 | 277 | if matches: 278 | for match in matches: 279 | yield (entry, match) 280 | else: 281 | yield (entry, None) 282 | 283 | return Fluent(_impl()) 284 | 285 | def join_inner( 286 | self, 287 | other: Iterable[_T1], 288 | key: Callable[[T], Hashable] = identity, 289 | other_key: Callable[[_T1], Hashable] = identity, 290 | ) -> "Fluent[Tuple[T, _T1]]": 291 | """Join the iterable with another iterable using equality between *key* applied to self and *other_key* applied to *other* to identify matching entries 292 | 293 | When no matching entry is found in *other*, entries in the iterable are filtered from the results 294 | 295 | Note: join_inner loads *other* into memory 296 | 297 | >>> flu(range(6)).join_inner(range(0, 6, 2)).to_list() 298 | [(0, 0), (2, 2), (4, 4)] 299 | 300 | """ 301 | 302 | def _impl() -> Generator[Tuple[T, _T1], None, None]: 303 | 304 | other_lookup = defaultdict(list) 305 | 306 | for entry_other in other: 307 | other_lookup[other_key(entry_other)].append(entry_other) 308 | 309 | for entry in self: 310 | matches: List[_T1] = other_lookup[key(entry)] 311 | 312 | for match in matches: 313 | yield (entry, match) 314 | 315 | return Fluent(_impl()) 316 | 317 | def shuffle(self) -> "Fluent[T]": 318 | """Randomize the order of elements in the interable 319 | 320 | Note: shuffle loads the entire iterable into memory 321 | 322 | >>> flu([3,6,1]).shuffle().to_list() 323 | [6, 1, 3] 324 | """ 325 | dat: List[T] = self.to_list() 326 | return Fluent(sample(dat, len(dat))) 327 | 328 | def group_by( 329 | self, key: Callable[[T], Union[T, _T1]] = identity, sort: bool = True 330 | ) -> "Fluent[Tuple[Union[T,_T1], Fluent[T]]]": 331 | """Yield consecutive keys and groups from the iterable 332 | 333 | *key* is a function to compute a key value used in grouping and sorting for each element. *key* defaults to an identity function which returns the unchaged element 334 | 335 | When the iterable is pre-sorted according to *key*, setting *sort* to False will prevent loading the dataset into memory and improve performance 336 | 337 | >>> flu([2, 4, 2, 4]).group_by().to_list() 338 | [(2, ), (4, )] 339 | 340 | Or, if the iterable is pre-sorted 341 | 342 | >>> flu([2, 2, 5, 5]).group_by(sort=False).to_list() 343 | [(2, ), (5, )] 344 | 345 | Using a key function 346 | 347 | >>> points = [ 348 | {'x': 1, 'y': 0}, 349 | {'x': 4, 'y': 3}, 350 | {'x': 1, 'y': 5} 351 | ] 352 | >>> key_func = lambda u: u['x'] 353 | >>> flu(points).group_by(key=key_func, sort=True).to_list() 354 | [(1, ), (4, )] 355 | """ 356 | 357 | gen = self.sort(key) if sort else self 358 | return Fluent(groupby(gen, key)).map(lambda x: (x[0], flu([y for y in x[1]]))) 359 | 360 | def unique(self, key: Callable[[T], Hashable] = identity) -> "Fluent[T]": 361 | """Yield elements that are unique by a *key*. 362 | 363 | >>> flu([2, 3, 2, 3]).unique().to_list() 364 | [2, 3] 365 | 366 | >>> flu([2, -3, -2, 3]).unique(key=abs).to_list() 367 | [2, -3] 368 | """ 369 | 370 | def _impl() -> Generator[T, None, None]: 371 | seen: Set[Any] = set() 372 | for x in self: 373 | x_hash = key(x) 374 | if x_hash in seen: 375 | continue 376 | else: 377 | seen.add(x_hash) 378 | yield x 379 | 380 | return Fluent(_impl()) 381 | 382 | ### End Non-Constant Memory ### 383 | 384 | ### Side Effect ### 385 | def rate_limit(self, per_second: Union[int, float] = 100) -> "Fluent[T]": 386 | """Restrict consumption of iterable to n item *per_second* 387 | 388 | >>> import time 389 | >>> start_time = time.time() 390 | >>> _ = flu(range(3)).rate_limit(3).to_list() 391 | >>> print('Runtime', int(time.time() - start_time)) 392 | 1.00126 # approximately 1 second for 3 items 393 | """ 394 | 395 | def _impl() -> Generator[T, None, None]: 396 | wait_time = 1.0 / per_second 397 | for val in self: 398 | start_time = time.time() 399 | yield val 400 | call_duration = time.time() - start_time 401 | time.sleep(max(wait_time - call_duration, 0.0)) 402 | 403 | return Fluent(_impl()) 404 | 405 | def side_effect( 406 | self, 407 | func: Callable[[T], Any], 408 | before: Optional[Callable[[], Any]] = None, 409 | after: Optional[Callable[[], Any]] = None, 410 | ) -> "Fluent[T]": 411 | """Invoke *func* for each item in the iterable before yielding the item. 412 | *func* takes a single argument and the output is discarded 413 | *before* and *after* are optional functions that take no parameters and are executed once before iteration begins 414 | and after iteration ends respectively. Each will be called exactly once. 415 | 416 | 417 | >>> flu(range(2)).side_effect(lambda x: print(f'Collected {x}')).to_list() 418 | Collected 0 419 | Collected 1 420 | [0, 1] 421 | """ 422 | 423 | def _impl() -> Generator[T, None, None]: 424 | try: 425 | if before is not None: 426 | before() 427 | 428 | for x in self: 429 | func(x) 430 | yield x 431 | 432 | finally: 433 | if after is not None: 434 | after() 435 | 436 | return Fluent(_impl()) 437 | 438 | ### End Side Effect ### 439 | 440 | def map(self, func: Callable[Concatenate[T, P], _T1], *args: Any, **kwargs: Any) -> "Fluent[_T1]": 441 | """Apply *func* to each element of iterable 442 | 443 | >>> flu(range(5)).map(lambda x: x*x).to_list() 444 | [0, 1, 4, 9, 16] 445 | """ 446 | 447 | def _impl() -> Generator[_T1, None, None]: 448 | for val in self._iterator: 449 | yield func(val, *args, **kwargs) 450 | 451 | return Fluent(_impl()) 452 | 453 | def map_item(self: "Fluent[SupportsGetItem[T]]", item: Hashable) -> "Fluent[T]": 454 | """Extracts *item* from every element of the iterable 455 | 456 | >>> flu([(2, 4), (2, 5)]).map_item(1).to_list() 457 | [4, 5] 458 | 459 | >>> flu([{'mykey': 8}, {'mykey': 5}]).map_item('mykey').to_list() 460 | [8, 5] 461 | """ 462 | 463 | def _impl() -> Generator[T, None, None]: 464 | for x in self: 465 | yield x[item] 466 | 467 | return Fluent(_impl()) 468 | 469 | def map_attr(self, attr: str) -> "Fluent[Any]": 470 | """Extracts the attribute *attr* from each element of the iterable 471 | 472 | >>> from collections import namedtuple 473 | >>> MyTup = namedtuple('MyTup', ['value', 'backup_val']) 474 | >>> flu([MyTup(1, 5), MyTup(2, 4)]).map_attr('value').to_list() 475 | [1, 2] 476 | """ 477 | return self.map(lambda x: getattr(x, attr)) 478 | 479 | def filter(self, func: Callable[Concatenate[T, P], bool], *args: Any, **kwargs: Any) -> "Fluent[T]": 480 | """Yield elements of iterable where *func* returns truthy 481 | 482 | >>> flu(range(10)).filter(lambda x: x % 2 == 0).to_list() 483 | [0, 2, 4, 6, 8] 484 | """ 485 | 486 | def _impl() -> Generator[T, None, None]: 487 | for val in self._iterator: 488 | if func(val, *args, **kwargs): 489 | yield val 490 | 491 | return Fluent(_impl()) 492 | 493 | def reduce(self, func: Callable[[T, T], T]) -> T: 494 | """Apply a function of two arguments cumulatively to the items of the iterable, 495 | from left to right, so as to reduce the sequence to a single value 496 | 497 | >>> flu(range(5)).reduce(lambda x, y: x + y) 498 | 10 499 | """ 500 | return reduce(func, self) 501 | 502 | def fold_left(self, func: Callable[[S, T], S], initial: S) -> S: 503 | """Apply a function of two arguments cumulatively to the items of the iterable, 504 | from left to right, starting with *initial*, so as to fold the sequence to 505 | a single value 506 | 507 | >>> flu(range(5)).fold_left(lambda x, y: x + str(y), "") 508 | '01234' 509 | """ 510 | return reduce(func, self, initial) 511 | 512 | @overload 513 | def zip(self, __iter1: Iterable[_T1]) -> "Fluent[Tuple[T, _T1]]": ... 514 | 515 | @overload 516 | def zip(self, __iter1: Iterable[_T1], __iter2: Iterable[_T2]) -> "Fluent[Tuple[T, _T1, _T2]]": ... 517 | 518 | @overload 519 | def zip( 520 | self, __iter1: Iterable[_T1], __iter2: Iterable[_T2], __iter3: Iterable[_T3] 521 | ) -> "Fluent[Tuple[T, _T1, _T2, _T3]]": ... 522 | 523 | @overload 524 | def zip( 525 | self, 526 | __iter1: Iterable[Any], 527 | __iter2: Iterable[Any], 528 | __iter3: Iterable[Any], 529 | __iter4: Iterable[Any], 530 | *iterable: Iterable[Any], 531 | ) -> "Fluent[Tuple[T, ...]]": ... 532 | 533 | def zip(self, *iterable: Iterable[Any]) -> Union[ 534 | "Fluent[Tuple[T, ...]]", 535 | "Fluent[Tuple[T, _T1]]", 536 | "Fluent[Tuple[T, _T1, _T2]]", 537 | "Fluent[Tuple[T, _T1, _T2, _T3]]", 538 | ]: 539 | """Yields tuples containing the i-th element from the i-th 540 | argument in the instance, and the iterable 541 | 542 | >>> flu(range(5)).zip(range(3, 0, -1)).to_list() 543 | [(0, 3), (1, 2), (2, 1)] 544 | """ 545 | # @self_to_flu is not compatible with @overload 546 | # make sure any usage of self supports arbitrary iterables 547 | tup_iter = zip(iter(self), *iterable) 548 | return Fluent(tup_iter) 549 | 550 | def zip_longest(self, *iterable: Iterable[_T1], fill_value: Any = None) -> "Fluent[Tuple[T, ...]]": 551 | """Yields tuples containing the i-th element from the i-th 552 | argument in the instance, and the iterable 553 | Iteration continues until the longest iterable is exhaused. 554 | If iterables are uneven in length, missing values are filled in with fill value 555 | 556 | >>> flu(range(5)).zip_longest(range(3, 0, -1)).to_list() 557 | [(0, 3), (1, 2), (2, 1), (3, None), (4, None)] 558 | 559 | 560 | >>> flu(range(5)).zip_longest(range(3, 0, -1), fill_value='a').to_list() 561 | [(0, 3), (1, 2), (2, 1), (3, 'a'), (4, 'a')] 562 | """ 563 | return Fluent(zip_longest(self, *iterable, fillvalue=fill_value)) 564 | 565 | def enumerate(self, start: int = 0) -> "Fluent[Tuple[int, T]]": 566 | """Yields tuples from the instance where the first element 567 | is a count from initial value *start*. 568 | 569 | >>> flu([3,4,5]).enumerate().to_list() 570 | [(0, 3), (1, 4), (2, 5)] 571 | """ 572 | return Fluent(enumerate(self, start=start)) 573 | 574 | def take(self, n: Optional[int] = None) -> "Fluent[T]": 575 | """Yield first *n* items of the iterable 576 | 577 | >>> flu(range(10)).take(2).to_list() 578 | [0, 1] 579 | """ 580 | return Fluent(islice(self._iterator, n)) 581 | 582 | def take_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]": 583 | """Yield elements from the chainable so long as the predicate is true 584 | 585 | >>> flu(range(10)).take_while(lambda x: x < 3).to_list() 586 | [0, 1, 2] 587 | """ 588 | return Fluent(takewhile(predicate, self._iterator)) 589 | 590 | def drop_while(self, predicate: Callable[[T], bool]) -> "Fluent[T]": 591 | """Drop elements from the chainable as long as the predicate is true; 592 | afterwards, return every element 593 | 594 | >>> flu(range(10)).drop_while(lambda x: x < 3).to_list() 595 | [3, 4, 5, 6, 7, 8, 9] 596 | """ 597 | return Fluent(dropwhile(predicate, self._iterator)) 598 | 599 | def chunk(self, n: int) -> "Fluent[List[T]]": 600 | """Yield lists of elements from iterable in groups of *n* 601 | 602 | if the iterable is not evenly divisiible by *n*, the final list will be shorter 603 | 604 | >>> flu(range(10)).chunk(3).to_list() 605 | [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 606 | """ 607 | 608 | def _impl() -> Generator[List[T], None, None]: 609 | 610 | while True: 611 | vals: List[T] = list(self.take(n)) 612 | if vals: 613 | yield vals 614 | else: 615 | return 616 | 617 | return Fluent(_impl()) 618 | 619 | def flatten( 620 | self, 621 | depth: int = 1, 622 | base_type: Optional[Type[object]] = None, 623 | iterate_strings: bool = False, 624 | ) -> "Fluent[Any]": 625 | """Recursively flatten nested iterables (e.g., a list of lists of tuples) 626 | into non-iterable type or an optional user-defined base_type 627 | 628 | Strings are treated as non-iterable for convenience. set iterate_string=True 629 | to change that behavior. 630 | 631 | >>> flu([[0, 1, 2], [3, 4, 5]]).flatten().to_list() 632 | [0, 1, 2, 3, 4, 5] 633 | 634 | >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten().to_list() 635 | [0, [1, 2], [3, 4], 5] 636 | 637 | >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list() 638 | [0, 1, 2, 3, 4, 5] 639 | 640 | >>> flu([[0, [1, 2]], [[3, 4], 5]]).flatten(depth=2).to_list() 641 | [0, 1, 2, 3, 4, 5] 642 | 643 | >>> flu([1, (2, 2), 4, [5, (6, 6, 6)]]).flatten(base_type=tuple).to_list() 644 | [1, (2, 2), 4, 5, (6, 6, 6)] 645 | 646 | >>> flu([[2, 0], 'abc', 3, [4]]).flatten(iterate_strings=True).to_list() 647 | [2, 0, 'a', 'b', 'c', 3, 4] 648 | """ 649 | 650 | # TODO(OR): Reimplement with strong types 651 | def walk(node: Any, level: int) -> Generator[T, None, None]: 652 | if ( 653 | ((depth is not None) and (level > depth)) 654 | or (isinstance(node, str) and not iterate_strings) 655 | or ((base_type is not None) and isinstance(node, base_type)) 656 | ): 657 | yield node 658 | return 659 | try: 660 | tree = iter(node) 661 | except TypeError: 662 | yield node 663 | return 664 | else: 665 | for child in tree: 666 | for val in walk(child, level + 1): 667 | yield val 668 | 669 | return Fluent(walk(self, level=0)) 670 | 671 | def denormalize(self: "Fluent[SupportsIteration[Any]]", iterate_strings: bool = False) -> "Fluent[Tuple[Any, ...]]": 672 | """Denormalize iterable components of each record 673 | 674 | >>> flu([("abc", [1, 2, 3])]).denormalize().to_list() 675 | [('abc', 1), ('abc', 2), ('abc', 3)] 676 | 677 | >>> flu([("abc", [1, 2])]).denormalize(iterate_strings=True).to_list() 678 | [('a', 1), ('a', 2), ('b', 1), ('b', 2), ('c', 1), ('c', 2)] 679 | 680 | >>> flu([("abc", [])]).denormalize().to_list() 681 | [] 682 | """ 683 | 684 | def _impl() -> Generator[Tuple[Any, ...], None, None]: 685 | for record in self: 686 | iter_elements: List[Iterable[Any]] = [] 687 | element: Any 688 | for element in record: 689 | 690 | # Check for string and string iteration is allowed 691 | if isinstance(element, str) and iterate_strings: 692 | iter_elements.append(element) 693 | 694 | # Check for string and string iteration is not allowed 695 | elif isinstance(element, str): 696 | iter_elements.append([element]) 697 | 698 | # Check for iterable 699 | elif isinstance(element, IterableType): 700 | iter_elements.append(element) 701 | 702 | # Check for non-iterable 703 | else: 704 | iter_elements.append([element]) 705 | 706 | for row in product(*iter_elements): 707 | yield row 708 | 709 | return Fluent(_impl()) 710 | 711 | def window(self, n: int, step: int = 1, fill_value: Any = None) -> "Fluent[Tuple[Any, ...]]": 712 | """Yield a sliding window of width *n* over the given iterable. 713 | 714 | Each window will advance in increments of *step*: 715 | 716 | If the length of the iterable does not evenly divide by the *step* 717 | the final output is padded with *fill_value* 718 | 719 | >>> flu(range(5)).window(3).to_list() 720 | [(0, 1, 2), (1, 2, 3), (2, 3, 4)] 721 | 722 | >>> flu(range(5)).window(n=3, step=2).to_list() 723 | [(0, 1, 2), (2, 3, 4)] 724 | 725 | >>> flu(range(9)).window(n=4, step=3).to_list() 726 | [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, None)] 727 | 728 | >>> flu(range(9)).window(n=4, step=3, fill_value=-1).to_list() 729 | [(0, 1, 2, 3), (3, 4, 5, 6), (6, 7, 8, -1)] 730 | """ 731 | 732 | def _impl() -> Generator[Tuple[Any, ...], None, None]: 733 | if n < 0: 734 | raise ValueError("n must be >= 0") 735 | elif n == 0: 736 | yield tuple() 737 | return 738 | if step < 1: 739 | raise ValueError("step must be >= 1") 740 | 741 | window: Deque[Any] = deque([], n) 742 | append = window.append 743 | 744 | # Initial deque fill 745 | for _ in range(n): 746 | append(next(self, fill_value)) 747 | yield tuple(window) 748 | 749 | # Appending new items to the right causes old items to fall off the left 750 | i = 0 751 | for item in self: 752 | append(item) 753 | i = (i + 1) % step 754 | if i % step == 0: 755 | yield tuple(window) 756 | 757 | # If there are items from the iterable in the window, pad with the given 758 | # value and emit them. 759 | if (i % step) and (step - i < n): 760 | for _ in range(step - i): 761 | append(fill_value) 762 | yield tuple(window) 763 | 764 | return Fluent(_impl()) 765 | 766 | def __iter__(self) -> "Fluent[T]": 767 | return self 768 | 769 | def __next__(self) -> T: 770 | return next(self._iterator) 771 | 772 | def tee(self, n: int = 2) -> "Fluent[Fluent[T]]": 773 | """Return n independent iterators from a single iterable 774 | 775 | once tee() has made a split, the original iterable should not be used 776 | anywhere else; otherwise, the iterable could get advanced without the 777 | tee objects being informed 778 | 779 | >>> copy1, copy2 = flu(range(5)).tee() 780 | >>> copy1.sum() 781 | 10 782 | >>> copy2.to_list() 783 | [0, 1, 2, 3, 4] 784 | """ 785 | return Fluent((Fluent(x) for x in tee(self, n))) 786 | 787 | 788 | class flu(Fluent[T]): 789 | """A fluent interface to lazy generator functions 790 | 791 | >>> from flupy import flu 792 | >>> ( 793 | flu(range(100)) 794 | .map(lambda x: x**2) 795 | .filter(lambda x: x % 3 == 0) 796 | .chunk(3) 797 | .take(2) 798 | .to_list() 799 | ) 800 | [[0, 9, 36], [81, 144, 225]] 801 | """ 802 | -------------------------------------------------------------------------------- /src/flupy/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/olirice/flupy/0205b524c7a9da547bfe0922a02285eec0ca6925/src/flupy/py.typed -------------------------------------------------------------------------------- /src/tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | 3 | import pytest 4 | 5 | from flupy.cli.cli import build_import_dict, main, parse_args 6 | 7 | 8 | def test_parse_args(): 9 | with pytest.raises(SystemExit) as cm: 10 | parse_args([]) 11 | assert cm.exception.code == 2 12 | 13 | args = parse_args(["_"]) 14 | assert args.command == "_" 15 | 16 | args = parse_args(["_", "-i", "os:environ:env"]) 17 | assert "os:environ:env" in getattr(args, "import") 18 | assert args.command == "_" 19 | 20 | import_dict = build_import_dict(["json"]) 21 | assert "json" in import_dict 22 | 23 | 24 | def test_build_import_dict(): 25 | import json 26 | 27 | import_dict = build_import_dict(["json"]) 28 | assert "json" in import_dict 29 | assert import_dict["json"] == json 30 | 31 | import_dict = build_import_dict(["json:dumps"]) 32 | assert "dumps" in import_dict 33 | assert import_dict["dumps"] == json.dumps 34 | 35 | import_dict = build_import_dict(["json:dumps:ds"]) 36 | assert "ds" in import_dict 37 | assert import_dict["ds"] == json.dumps 38 | 39 | import_dict = build_import_dict(["json::j"]) 40 | assert "j" in import_dict 41 | assert import_dict["j"] == json 42 | 43 | 44 | def test_show_help(capsys): 45 | with pytest.raises(SystemExit): 46 | main(["flu", "-h"]) 47 | 48 | result = capsys.readouterr() 49 | stdout = result.out 50 | assert stdout.startswith("usage") 51 | 52 | 53 | def test_show_version(capsys): 54 | main(["flu", "flu(range(5)).collect()"]) 55 | 56 | result = capsys.readouterr() 57 | stdout = result.out.replace("\n", "") 58 | assert stdout.startswith("0") 59 | 60 | 61 | def test_basic_pipeline(capsys): 62 | main(["flu", "flu(range(5)).collect()"]) 63 | result = capsys.readouterr() 64 | stdout = result.out.replace("\n", "") 65 | assert stdout.startswith("0") 66 | 67 | 68 | def test_pass_on_none_pipeline(capsys): 69 | main(["flu", "None"]) 70 | result = capsys.readouterr() 71 | stdout = result.out 72 | assert stdout == "" 73 | 74 | 75 | def test_non_iterable_non_none_pipeline(capsys): 76 | main(["flu", '"hello_world"']) 77 | result = capsys.readouterr() 78 | stdout = result.out.strip("\n") 79 | assert stdout == "hello_world" 80 | 81 | 82 | def test_cli_walk_files(capsys): 83 | main(["flu", "walk_files().head(2)"]) 84 | result = capsys.readouterr() 85 | stdout = result.out.strip("\n").split("\n") 86 | assert len(stdout) == 2 87 | 88 | 89 | def test_cli_walk_dirs(capsys): 90 | main(["flu", "walk_dirs().head(2)"]) 91 | result = capsys.readouterr() 92 | stdout = result.out.strip("\n").split("\n") 93 | assert len(stdout) == 2 94 | 95 | 96 | def test_from_file(capsys): 97 | with NamedTemporaryFile("w+") as f: 98 | f.write("hello") 99 | f.read() 100 | f_name = f.name 101 | main(["flu", "-f", f_name, "_.map(str.upper)"]) 102 | result = capsys.readouterr() 103 | stdout = result.out.strip("\n") 104 | assert stdout == "HELLO" 105 | 106 | 107 | def test_glob_imports(capsys): 108 | main(["flu", "flu(env).count()", "-i", "os:environ:env"]) 109 | result = capsys.readouterr() 110 | stdout = result.out 111 | assert stdout 112 | -------------------------------------------------------------------------------- /src/tests/test_cli_utils.py: -------------------------------------------------------------------------------- 1 | from flupy.cli.utils import walk_dirs, walk_files 2 | 3 | 4 | def test_walk_files(): 5 | assert walk_files().head() 6 | assert walk_files(abspath=False).head() 7 | 8 | 9 | def test_walk_dirs(): 10 | assert walk_dirs().head() 11 | -------------------------------------------------------------------------------- /src/tests/test_flu.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from itertools import count, cycle 3 | 4 | import pytest 5 | 6 | from flupy import flu 7 | 8 | 9 | def test_collect(): 10 | assert flu(range(3)).collect() == [0, 1, 2] 11 | assert flu(range(3)).collect(container_type=tuple) == (0, 1, 2) 12 | assert flu(range(3)).collect(n=2) == [0, 1] 13 | 14 | 15 | def test_to_list(): 16 | assert flu(range(3)).to_list() == [0, 1, 2] 17 | 18 | 19 | def test___getitem__(): 20 | assert flu(range(3))[1] == 1 21 | assert flu(range(3))[1:].collect() == [1, 2] 22 | assert flu(range(35))[1:2].collect() == [1] 23 | assert flu(range(35))[1:3].collect() == [1, 2] 24 | with pytest.raises(IndexError): 25 | flu([1])[4] 26 | with pytest.raises((KeyError, TypeError)): 27 | flu([1])["not an index"] 28 | 29 | 30 | def test_sum(): 31 | gen = flu(range(3)) 32 | assert gen.sum() == 3 33 | 34 | 35 | def test_reduce(): 36 | gen = flu(range(5)) 37 | assert gen.reduce(lambda x, y: x + y) == 10 38 | 39 | 40 | def test_fold_left(): 41 | assert flu(range(5)).fold_left(lambda x, y: x + y, 0) == 10 42 | assert flu(range(5)).fold_left(lambda x, y: x + str(y), "") == "01234" 43 | 44 | 45 | def test_count(): 46 | gen = flu(range(3)) 47 | assert gen.count() == 3 48 | 49 | 50 | def test_min(): 51 | gen = flu(range(3)) 52 | assert gen.min() == 0 53 | 54 | 55 | def test_first(): 56 | gen = flu(range(3)) 57 | assert gen.first() == 0 58 | gen = flu([]) 59 | with pytest.raises(IndexError): 60 | gen.first() 61 | gen = flu([]) 62 | assert gen.first(default=1) == 1 63 | 64 | 65 | def test_last(): 66 | gen = flu(range(3)) 67 | assert gen.last() == 2 68 | gen = flu([]) 69 | with pytest.raises(IndexError): 70 | gen.last() 71 | gen = flu([]) 72 | assert gen.last(default=1) == 1 73 | 74 | 75 | def test_head(): 76 | gen = flu(range(30)) 77 | assert gen.head(n=2) == [0, 1] 78 | gen = flu(range(30)) 79 | assert gen.head(n=3, container_type=set) == set([0, 1, 2]) 80 | gen = flu(range(3)) 81 | assert gen.head(n=50) == [0, 1, 2] 82 | 83 | 84 | def test_tail(): 85 | gen = flu(range(30)) 86 | assert gen.tail(n=2) == [28, 29] 87 | gen = flu(range(30)) 88 | assert gen.tail(n=3, container_type=set) == set([27, 28, 29]) 89 | gen = flu(range(3)) 90 | assert gen.tail(n=50) == [0, 1, 2] 91 | 92 | 93 | def test_max(): 94 | gen = flu(range(3)) 95 | assert gen.max() == 2 96 | 97 | 98 | def test_unique(): 99 | class NoHash: 100 | def __init__(self, letter, keyf): 101 | self.letter = letter 102 | self.keyf = keyf 103 | 104 | a = NoHash("a", 1) 105 | b = NoHash("b", 1) 106 | c = NoHash("c", 2) 107 | 108 | gen = flu([a, b, c]).unique() 109 | assert gen.collect() == [a, b, c] 110 | gen = flu([a, b, c]).unique(lambda x: x.letter) 111 | assert gen.collect() == [a, b, c] 112 | gen = flu([a, b, c]).unique(lambda x: x.keyf) 113 | assert gen.collect() == [a, c] 114 | 115 | 116 | def test_side_effect(): 117 | class FakeFile: 118 | def __init__(self): 119 | self.is_open = False 120 | self.content = [] 121 | 122 | def write(self, text): 123 | if self.is_open: 124 | self.content.append(text) 125 | else: 126 | raise IOError("fake file is not open for writing") 127 | 128 | def open(self): 129 | self.is_open = True 130 | 131 | def close(self): 132 | self.is_open = False 133 | 134 | # Test the fake file 135 | ffile = FakeFile() 136 | ffile.open() 137 | ffile.write("should be there") 138 | ffile.close() 139 | assert ffile.content[0] == "should be there" 140 | with pytest.raises(IOError): 141 | ffile.write("should fail") 142 | 143 | # Reset fake file 144 | ffile = FakeFile() 145 | 146 | with pytest.raises(IOError): 147 | flu(range(5)).side_effect(ffile.write).collect() 148 | 149 | gen_result = flu(range(5)).side_effect(ffile.write, before=ffile.open, after=ffile.close).collect() 150 | assert ffile.is_open == False 151 | assert ffile.content == [0, 1, 2, 3, 4] 152 | assert gen_result == [0, 1, 2, 3, 4] 153 | 154 | 155 | def test_sort(): 156 | gen = flu(range(3, 0, -1)).sort() 157 | assert gen.collect() == [1, 2, 3] 158 | 159 | 160 | def test_shuffle(): 161 | original_order = list(range(10000)) 162 | new_order = flu(original_order).shuffle().collect() 163 | assert new_order != original_order 164 | assert len(new_order) == len(original_order) 165 | assert sum(new_order) == sum(original_order) 166 | 167 | 168 | def test_map(): 169 | gen = flu(range(3)).map(lambda x: x + 2) 170 | assert gen.collect() == [2, 3, 4] 171 | 172 | 173 | def test_rate_limit(): 174 | resA = flu(range(3)).collect() 175 | resB = flu(range(3)).rate_limit(5000).collect() 176 | assert resA == resB 177 | 178 | 179 | def test_map_item(): 180 | gen = flu(range(3)).map(lambda x: {"a": x}).map_item("a") 181 | assert gen.collect() == [0, 1, 2] 182 | 183 | 184 | def test_map_attr(): 185 | class Person: 186 | def __init__(self, age: int) -> None: 187 | self.age = age 188 | 189 | gen = flu(range(3)).map(lambda x: Person(x)).map_attr("age") 190 | assert gen.collect() == [0, 1, 2] 191 | 192 | 193 | def test_filter(): 194 | gen = flu(range(3)).filter(lambda x: 0 < x < 2) 195 | assert gen.collect() == [1] 196 | 197 | 198 | def test_take(): 199 | gen = flu(range(10)).take(5) 200 | assert gen.collect() == [0, 1, 2, 3, 4] 201 | 202 | 203 | def test_take_while(): 204 | gen = flu(cycle(range(10))).take_while(lambda x: x < 4) 205 | assert gen.collect() == [0, 1, 2, 3] 206 | 207 | 208 | def test_drop_while(): 209 | gen = flu([1, 2, 3, 4, 3, 2, 1]).drop_while(lambda x: x < 4) 210 | assert gen.collect() == [4, 3, 2, 1] 211 | 212 | 213 | def test_group_by(): 214 | gen = flu([1, 1, 1, 2, 2, 2, 2, 3]).zip(range(100)).group_by(lambda x: x[0]) 215 | g1, g2, g3 = gen.map(lambda x: (x[0], x[1].collect())).collect() 216 | # Standard usage 217 | assert g1 == (1, [(1, 0), (1, 1), (1, 2)]) 218 | assert g2 == (2, [(2, 3), (2, 4), (2, 5), (2, 6)]) 219 | assert g3 == (3, [(3, 7)]) 220 | # No param usage 221 | v1 = flu(range(10)).group_by().map(lambda x: (x[0], list(x[1]))) 222 | v2 = flu(range(10)).map(lambda x: (x, [x])) 223 | assert v1.collect() == v2.collect() 224 | # Sort 225 | gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=False) 226 | assert gen.count() == 4 227 | gen = flu([1, 2, 1, 2]).group_by(lambda x: x, sort=True) 228 | assert gen.count() == 2 229 | 230 | # Identity Function 231 | points = [{"x": 1, "y": 0}, {"x": 4, "y": 3}, {"x": 1, "y": 5}] 232 | key_func = lambda u: u["x"] 233 | gen = flu(points).group_by(key=key_func, sort=True).collect() 234 | assert len(gen) == 2 235 | assert gen[0][0] == 1 236 | assert gen[1][0] == 4 237 | assert len(gen[0][1].collect()) == 2 238 | assert len(gen[1][1].collect()) == 1 239 | 240 | 241 | def test_chunk(): 242 | gen = flu(range(5)).chunk(2) 243 | assert gen.collect() == [[0, 1], [2, 3], [4]] 244 | 245 | 246 | def test_next(): 247 | gen = flu(range(5)) 248 | assert next(gen) == 0 249 | 250 | 251 | def test_iter(): 252 | gen = flu(range(5)) 253 | assert next(iter(gen)) == 0 254 | 255 | 256 | def test_enumerate(): 257 | # Check default 258 | gen = flu(range(3)).enumerate() 259 | assert gen.collect() == [(0, 0), (1, 1), (2, 2)] 260 | 261 | # Check start param 262 | gen = flu(range(3)).enumerate(start=1) 263 | assert gen.collect() == [(1, 0), (2, 1), (3, 2)] 264 | 265 | 266 | def test_zip(): 267 | gen = flu(range(3)).zip(range(3)) 268 | assert gen.collect() == [(0, 0), (1, 1), (2, 2)] 269 | 270 | gen2 = flu(range(3)).zip(range(3), range(2)) 271 | assert gen2.collect() == [(0, 0, 0), (1, 1, 1)] 272 | 273 | 274 | def test_zip_longest(): 275 | gen = flu(range(3)).zip_longest(range(5)) 276 | assert gen.collect() == [(0, 0), (1, 1), (2, 2), (None, 3), (None, 4)] 277 | gen = flu(range(3)).zip_longest(range(5), fill_value="a") 278 | assert gen.collect() == [(0, 0), (1, 1), (2, 2), ("a", 3), ("a", 4)] 279 | gen = flu(range(3)).zip_longest(range(5), range(4), fill_value="a") 280 | assert gen.collect() == [(0, 0, 0), (1, 1, 1), (2, 2, 2), ("a", 3, 3), ("a", 4, "a")] 281 | 282 | 283 | def test_window(): 284 | # Check default 285 | gen = flu(range(5)).window(n=3) 286 | assert gen.collect() == [(0, 1, 2), (1, 2, 3), (2, 3, 4)] 287 | 288 | # Check step param 289 | gen = flu(range(5)).window(n=3, step=3) 290 | assert gen.collect() == [(0, 1, 2), (3, 4, None)] 291 | 292 | # Check fill_value param 293 | gen = flu(range(5)).window(n=3, step=3, fill_value="i") 294 | assert gen.collect() == [(0, 1, 2), (3, 4, "i")] 295 | 296 | assert flu(range(4)).window(n=0).collect() == [tuple()] 297 | 298 | with pytest.raises(ValueError): 299 | flu(range(5)).window(n=-1).collect() 300 | 301 | with pytest.raises(ValueError): 302 | flu(range(5)).window(3, step=0).collect() 303 | 304 | 305 | def test_flu(): 306 | gen = flu(count()).map(lambda x: x**2).filter(lambda x: x % 517 == 0).chunk(5).take(3) 307 | assert next(gen) == [0, 267289, 1069156, 2405601, 4276624] 308 | 309 | 310 | def test_flatten(): 311 | nested = [1, [2, (3, [4])], ["rbsd", "abc"], (7,)] 312 | 313 | # Defaults with depth of 1 314 | gen = flu(nested).flatten() 315 | assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", 7] 316 | 317 | # Depth 2 318 | gen = flu(nested).flatten(depth=2) 319 | assert [x for x in gen] == [1, 2, 3, [4], "rbsd", "abc", 7] 320 | 321 | # Depth 3 322 | gen = flu(nested).flatten(depth=3) 323 | assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7] 324 | 325 | # Depth infinite 326 | gen = flu(nested).flatten(depth=sys.maxsize) 327 | assert [x for x in gen] == [1, 2, 3, 4, "rbsd", "abc", 7] 328 | 329 | # Depth 2 with tuple base_type 330 | gen = flu(nested).flatten(depth=2, base_type=tuple) 331 | assert [x for x in gen] == [1, 2, (3, [4]), "rbsd", "abc", (7,)] 332 | 333 | # Depth 2 with iterate strings 334 | gen = flu(nested).flatten(depth=2, base_type=tuple, iterate_strings=True) 335 | assert [x for x in gen] == [1, 2, (3, [4]), "r", "b", "s", "d", "a", "b", "c", (7,)] 336 | 337 | 338 | def test_denormalize(): 339 | content = [ 340 | ["abc", [1, 2, 3]], 341 | ] 342 | assert flu(content).denormalize().collect() == [("abc", 1), ("abc", 2), ("abc", 3)] 343 | assert (flu(content).denormalize(iterate_strings=True).collect()) == [ 344 | ("a", 1), 345 | ("a", 2), 346 | ("a", 3), 347 | ("b", 1), 348 | ("b", 2), 349 | ("b", 3), 350 | ("c", 1), 351 | ("c", 2), 352 | ("c", 3), 353 | ] 354 | 355 | assert (flu([[[1], [1, 2], None]]).denormalize().collect()) == [ 356 | (1, 1, None), 357 | (1, 2, None), 358 | ] 359 | 360 | assert (flu([[[1], [1, 2], []]]).denormalize().collect()) == [] 361 | 362 | 363 | def test_tee(): 364 | # Default unpacking 365 | gen1, gen2 = flu(range(100)).tee() 366 | assert gen1.sum() == gen2.sum() 367 | 368 | # adjusting *n* paramter 369 | gen1, gen2, gen3 = flu(range(100)).tee(3) 370 | assert gen1.sum() == gen3.sum() 371 | 372 | # No sync progress 373 | gen1, gen2 = flu(range(100)).tee() 374 | assert next(gen1) == next(gen2) 375 | 376 | # No break chaining 377 | assert flu(range(5)).tee().map(sum).sum() == 20 378 | 379 | 380 | def test_join_left(): 381 | # Default unpacking 382 | res = flu(range(6)).join_left(range(0, 6, 2)).collect() 383 | assert res == [(0, 0), (1, None), (2, 2), (3, None), (4, 4), (5, None)] 384 | 385 | 386 | def test_join_inner(): 387 | # Default unpacking 388 | res = flu(range(6)).join_inner(range(0, 6, 2)).collect() 389 | assert res == [(0, 0), (2, 2), (4, 4)] 390 | -------------------------------------------------------------------------------- /src/tests/test_version.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for version information. 3 | """ 4 | 5 | import re 6 | 7 | import flupy 8 | 9 | 10 | def test_version_format(): 11 | """Test that __version__ follows semantic versioning format (MAJOR.MINOR.PATCH).""" 12 | # Standard semver regex pattern 13 | semver_pattern = r"^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" 14 | 15 | assert re.match( 16 | semver_pattern, flupy.__version__ 17 | ), f"Version '{flupy.__version__}' does not match semantic versioning format" 18 | 19 | # Ensure version parts can be parsed as integers 20 | major, minor, patch = flupy.__version__.split("-")[0].split("+")[0].split(".")[:3] 21 | assert major.isdigit(), f"Major version '{major}' is not a valid integer" 22 | assert minor.isdigit(), f"Minor version '{minor}' is not a valid integer" 23 | assert patch.isdigit(), f"Patch version '{patch}' is not a valid integer" 24 | --------------------------------------------------------------------------------