├── pytest.ini ├── tests ├── .gitkeep ├── util │ ├── requireable │ │ └── dummy.py │ ├── test_statistics.py │ ├── test_compat.py │ ├── test_resolvers.py │ └── test_collections.py ├── structs │ ├── test_tokens.py │ └── test_inputs.py ├── nodes │ ├── io │ │ ├── test_io_base.py │ │ ├── test_pickle.py │ │ └── test_file.py │ └── test_casts.py ├── test_publicapi.py ├── test_basicusage.py ├── commands │ ├── test_convert.py │ ├── test_version.py │ ├── test_clibasics.py │ ├── test_init.py │ ├── test_download.py │ └── test_run.py ├── features │ ├── test_not_modified.py │ └── test_inherit.py ├── test_registry.py ├── execution │ ├── test_events.py │ └── contexts │ │ └── test_execution_contexts_graph.py ├── ext │ └── test_ods.py ├── plugins │ └── test_console.py ├── config │ ├── test_processors.py │ └── test_methods_partial.py ├── test_execution.py ├── test_settings.py └── examples │ └── test_example_change_some_fields.py ├── bonobo ├── contrib │ ├── __init__.py │ ├── jupyter │ │ ├── js │ │ │ ├── .gitignore │ │ │ ├── README.rst │ │ │ ├── src │ │ │ │ ├── embed.js │ │ │ │ ├── index.js │ │ │ │ ├── extension.js │ │ │ │ └── bonobo.js │ │ │ ├── package.json │ │ │ └── webpack.config.js │ │ ├── __init__.py │ │ ├── widget.py │ │ └── static │ │ │ └── extension.js │ ├── django │ │ ├── __init__.py │ │ ├── utils.py │ │ └── commands.py │ ├── opendatasoft │ │ └── __init__.py │ └── google │ │ └── __init__.py ├── structs │ ├── __init__.py │ └── tokens.py ├── examples │ ├── files │ │ ├── __init__.py │ │ ├── services.py │ │ ├── csv_handlers.py │ │ ├── text_handlers.py │ │ ├── json_handlers.py │ │ └── pickle_handlers.py │ ├── types │ │ ├── __init__.py │ │ ├── __main__.py │ │ └── strings.py │ ├── datasets │ │ ├── __init__.py │ │ ├── static │ │ │ ├── spam.tgz │ │ │ ├── Makefile │ │ │ └── passwd.txt │ │ ├── coffeeshops.py │ │ ├── fablabs.py │ │ └── __main__.py │ ├── .style.yapf │ ├── __main__.py │ ├── empty.py │ ├── clock.py │ ├── environ.py │ └── __init__.py ├── _version.py ├── util │ ├── term.py │ ├── pkgs.py │ ├── envelopes.py │ ├── __init__.py │ ├── compat.py │ ├── statistics.py │ ├── api.py │ ├── resolvers.py │ └── errors.py ├── __main__.py ├── plugins │ ├── sentry.py │ ├── __init__.py │ └── jupyter.py ├── execution │ ├── __init__.py │ ├── strategies │ │ ├── naive.py │ │ ├── base.py │ │ └── __init__.py │ ├── contexts │ │ ├── plugin.py │ │ └── __init__.py │ └── events.py ├── commands │ ├── templates │ │ ├── bare.py-tpl │ │ └── default.py-tpl │ ├── inspect.py │ ├── examples.py │ ├── download.py │ ├── version.py │ ├── __init__.py │ ├── run.py │ └── init.py ├── nodes │ ├── io │ │ ├── __init__.py │ │ ├── base.py │ │ ├── pickle.py │ │ ├── json.py │ │ └── file.py │ ├── aggregation.py │ ├── __init__.py │ ├── filter.py │ └── throttle.py ├── config │ ├── functools.py │ └── __init__.py ├── constants.py ├── __init__.py └── errors.py ├── docs ├── guide │ ├── packaging.rst │ ├── plugins.rst │ ├── index.rst │ ├── _next.rst │ ├── debugging.rst │ └── future │ │ ├── services.rst │ │ └── transformations.rst ├── _static │ ├── graphs.css │ ├── bonobo.png │ └── custom.css ├── genindex.rst ├── _templates │ ├── alabaster │ │ ├── static │ │ │ └── custom.css │ │ ├── _version.py │ │ ├── donate.html │ │ ├── navigation.html │ │ ├── relations.html │ │ ├── __init__.py │ │ ├── about.html │ │ └── theme.conf │ ├── layout.html │ ├── sidebarlogo.html │ ├── sidebarinfos.html │ ├── sidebarintro.html │ └── base.html ├── tutorial │ ├── _todo.rst │ ├── _wip_note.rst │ └── index.rst ├── extension │ ├── _beta.rst │ ├── docker.rst │ ├── _alpha.rst │ ├── index.rst │ ├── selenium.rst │ ├── jupyter.rst │ ├── django.rst │ └── sqlalchemy.rst ├── reference │ ├── api │ │ ├── bonobo │ │ │ ├── util.rst │ │ │ ├── nodes.rst │ │ │ ├── config.rst │ │ │ ├── execution.rst │ │ │ ├── constants.rst │ │ │ ├── structs │ │ │ │ └── graphs.rst │ │ │ └── execution │ │ │ │ ├── events.rst │ │ │ │ ├── contexts.rst │ │ │ │ └── strategies.rst │ │ └── bonobo.rst │ ├── index.rst │ ├── commands.rst │ ├── examples.rst │ └── settings.rst ├── index.rst ├── Makefile ├── make.bat ├── contribute │ └── release.rst └── history.rst ├── CONTRIBUTING.md ├── MANIFEST.in ├── setup.cfg ├── readthedocs.yml ├── bin ├── test_graph ├── run_all_examples.sh └── imgcat ├── .isort.cfg ├── .codacy.yml ├── CREDITS.rst ├── .landscape.yml ├── .editorconfig ├── .travis.yml ├── .style.yapf ├── classifiers.txt ├── requirements.txt ├── requirements-sqlalchemy.txt ├── requirements-docker.txt ├── .gitignore ├── wercker.yml ├── .coveragerc ├── requirements-dev.txt ├── readthedocs-conda.yml ├── requirements-jupyter.txt ├── .github └── ISSUE_TEMPLATE.md ├── benchmarks ├── parameters.py └── person.json ├── RELEASE-0.6.rst ├── Projectfile ├── README.rst └── CODE_OF_CONDUCT.md /pytest.ini: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bonobo/contrib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bonobo/structs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/guide/packaging.rst: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bonobo/examples/files/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bonobo/examples/types/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/_static/graphs.css: -------------------------------------------------------------------------------- 1 | .node { 2 | } 3 | -------------------------------------------------------------------------------- /bonobo/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.7.0rc2' 2 | -------------------------------------------------------------------------------- /tests/util/requireable/dummy.py: -------------------------------------------------------------------------------- 1 | foo = "bar" 2 | -------------------------------------------------------------------------------- /docs/genindex.rst: -------------------------------------------------------------------------------- 1 | Full Index 2 | ========== 3 | 4 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | See http://docs.bonobo-project.org/en/latest/contribute/index.html 2 | -------------------------------------------------------------------------------- /bonobo/util/term.py: -------------------------------------------------------------------------------- 1 | CLEAR_EOL = "\033[0K" 2 | MOVE_CURSOR_UP = "\033[{}A".format 3 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/static/custom.css: -------------------------------------------------------------------------------- 1 | /* This file intentionally left blank. */ 2 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include bonobo/bonobo.svg 3 | recursive-include bonobo *.py-tpl 4 | -------------------------------------------------------------------------------- /docs/_static/bonobo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/variable/bonobo/develop/docs/_static/bonobo.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst 3 | 4 | [bdist_wheel] 5 | universal = 1 6 | 7 | -------------------------------------------------------------------------------- /bonobo/__main__.py: -------------------------------------------------------------------------------- 1 | from bonobo.commands import entrypoint 2 | 3 | if __name__ == "__main__": 4 | entrypoint() 5 | -------------------------------------------------------------------------------- /bonobo/plugins/sentry.py: -------------------------------------------------------------------------------- 1 | from bonobo.plugins import Plugin 2 | 3 | 4 | class SentryPlugin(Plugin): 5 | pass 6 | -------------------------------------------------------------------------------- /docs/tutorial/_todo.rst: -------------------------------------------------------------------------------- 1 | .. warning:: 2 | 3 | This section is missing. Sorry, but stay tuned! It'll be added soon. -------------------------------------------------------------------------------- /bonobo/examples/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit = 74 4 | dedent_closing_brackets = true 5 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/_version.py: -------------------------------------------------------------------------------- 1 | __version_info__ = (0, 7, 10) 2 | __version__ = ".".join(map(str, __version_info__)) 3 | -------------------------------------------------------------------------------- /readthedocs.yml: -------------------------------------------------------------------------------- 1 | conda: 2 | file: readthedocs-conda.yml 3 | python: 4 | extra_requirements: [ dev, docker, sqlalchemy ] 5 | 6 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/static/spam.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/variable/bonobo/develop/bonobo/examples/datasets/static/spam.tgz -------------------------------------------------------------------------------- /bin/test_graph: -------------------------------------------------------------------------------- 1 | bonobo inspect --graph bonobo/examples/tutorials/tut02e03_writeasmap.py | dot -o test_output.png -T png && bin/imgcat test_output.png 2 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {%- extends "base.html" %} 2 | 3 | {%- block content %} 4 | {{ relbar() }} 5 | {{ super() }} 6 | {%- endblock %} 7 | 8 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | line_length=120 3 | indent=' ' 4 | multi_line_output=5 5 | known_first_party=bonobo 6 | known_third_party=mondrian,whistle 7 | -------------------------------------------------------------------------------- /tests/structs/test_tokens.py: -------------------------------------------------------------------------------- 1 | from bonobo.structs.tokens import Token 2 | 3 | 4 | def test_token_repr(): 5 | t = Token("Acme") 6 | assert repr(t) == "" 7 | -------------------------------------------------------------------------------- /bonobo/examples/__main__.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | from bonobo.commands import entrypoint 3 | import sys 4 | 5 | entrypoint(["examples"] + sys.argv[1:]) 6 | -------------------------------------------------------------------------------- /bonobo/examples/files/services.py: -------------------------------------------------------------------------------- 1 | from bonobo import examples, open_fs 2 | 3 | 4 | def get_services(): 5 | return {**examples.get_services(), "fs.output": open_fs()} 6 | -------------------------------------------------------------------------------- /docs/extension/_beta.rst: -------------------------------------------------------------------------------- 1 | .. note:: 2 | 3 | This extension is currently **BETA**. 4 | 5 | Things will change, and although we use it on some real-world software, it may, or may not, satisfy your needs. 6 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/static/Makefile: -------------------------------------------------------------------------------- 1 | 2 | theaters.json: 3 | curl 'https://data.toulouse-metropole.fr/explore/dataset/theatres-et-salles-de-spectacles/download?format=json&timezone=Europe/Berlin&use_labels_for_header=true' > $@ 4 | 5 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/util.rst: -------------------------------------------------------------------------------- 1 | :mod:`Util ` 2 | ========================= 3 | 4 | .. currentmodule:: bonobo.util 5 | 6 | :Module: :mod:`bonobo.util` 7 | 8 | 9 | .. automodule:: bonobo.util 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/nodes.rst: -------------------------------------------------------------------------------- 1 | :mod:`Nodes ` 2 | =========================== 3 | 4 | .. currentmodule:: bonobo.nodes 5 | 6 | :Module: :mod:`bonobo.nodes` 7 | 8 | 9 | .. automodule:: bonobo.nodes 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.codacy.yml: -------------------------------------------------------------------------------- 1 | --- 2 | exclude_paths: 3 | - benchmarks/** 4 | - bin/** 5 | - bonobo/contrib/jupyter/**.js 6 | - bonobo/examples/** 7 | - bonobo/ext/** 8 | - bonobo/util/testing.py 9 | - docs/** 10 | - setup.py 11 | - tests/** 12 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/config.rst: -------------------------------------------------------------------------------- 1 | :mod:`Config ` 2 | ============================= 3 | 4 | .. currentmodule:: bonobo.config 5 | 6 | :Module: :mod:`bonobo.config` 7 | 8 | 9 | .. automodule:: bonobo.config 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /bonobo/examples/types/__main__.py: -------------------------------------------------------------------------------- 1 | import bonobo 2 | from bonobo.examples.types.strings import get_graph 3 | 4 | if __name__ == "__main__": 5 | parser = bonobo.get_argument_parser() 6 | with bonobo.parse_args(parser): 7 | bonobo.run(get_graph()) 8 | -------------------------------------------------------------------------------- /docs/guide/plugins.rst: -------------------------------------------------------------------------------- 1 | Plugins 2 | ======= 3 | 4 | 5 | Graph level plugins 6 | ::::::::::::::::::: 7 | 8 | 9 | Node level plugins 10 | :::::::::::::::::: 11 | 12 | enhancers 13 | 14 | 15 | node 16 | - 17 | 18 | 19 | .. include:: _next.rst 20 | -------------------------------------------------------------------------------- /bonobo/execution/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Execution logic, surrounding contexts for nodes and graphs and events. 3 | 4 | This module is considered **internal**. 5 | 6 | """ 7 | 8 | import logging 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = [] 13 | -------------------------------------------------------------------------------- /CREDITS.rst: -------------------------------------------------------------------------------- 1 | Credits 2 | ======= 3 | 4 | Logo 5 | :::: 6 | 7 | Created by Sarah GHIGLIANO and available on The Noun Project. 8 | 9 | License: https://creativecommons.org/licenses/by/3.0/us/ 10 | Source: https://thenounproject.com/Ghigliano/collection/animals/?i=320941 11 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/execution.rst: -------------------------------------------------------------------------------- 1 | :mod:`Execution ` 2 | =================================== 3 | 4 | .. currentmodule:: bonobo.execution 5 | 6 | :Module: :mod:`bonobo.execution` 7 | 8 | 9 | .. automodule:: bonobo.execution 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /bonobo/util/pkgs.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | from packaging.utils import canonicalize_name 3 | 4 | bonobo_packages = {} 5 | for p in pkg_resources.working_set: 6 | name = canonicalize_name(p.project_name) 7 | if name.startswith("bonobo"): 8 | bonobo_packages[name] = p 9 | -------------------------------------------------------------------------------- /.landscape.yml: -------------------------------------------------------------------------------- 1 | doc-warnings: true 2 | test-warnings: true 3 | strictness: veryhigh 4 | max-line-length: 120 5 | autodetect: true 6 | python-targets: 7 | - 3 8 | ignore-paths: 9 | - docs 10 | - examples 11 | - tests 12 | pep257: 13 | disable: 14 | - D205 15 | - D210 16 | -------------------------------------------------------------------------------- /bonobo/commands/templates/bare.py-tpl: -------------------------------------------------------------------------------- 1 | import bonobo 2 | 3 | 4 | def create_graph(): 5 | return ( 6 | bonobo.Graph() 7 | >> ... 8 | ) 9 | 10 | 11 | if __name__ == '__main__': 12 | with bonobo.parse_args() as options: 13 | bonobo.run(create_graph()) 14 | -------------------------------------------------------------------------------- /docs/extension/docker.rst: -------------------------------------------------------------------------------- 1 | Working with Docker 2 | =================== 3 | 4 | .. include:: _beta.rst 5 | 6 | Read the introduction: https://www.bonobo-project.org/with/docker 7 | 8 | Source code 9 | ::::::::::: 10 | 11 | https://github.com/python-bonobo/bonobo-docker 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/constants.rst: -------------------------------------------------------------------------------- 1 | :mod:`Constants ` 2 | =================================== 3 | 4 | .. currentmodule:: bonobo.constants 5 | 6 | :Module: :mod:`bonobo.constants` 7 | 8 | 9 | .. automodule:: bonobo.constants 10 | :no-members: 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/structs/graphs.rst: -------------------------------------------------------------------------------- 1 | :mod:`Graphs ` 2 | ===================================== 3 | 4 | .. currentmodule:: bonobo.structs.graphs 5 | 6 | :Module: :mod:`bonobo.structs.graphs` 7 | 8 | 9 | .. automodule:: bonobo.structs.graphs 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | end_of_line = lf 5 | insert_final_newline = true 6 | charset = utf-8 7 | 8 | [*.py] 9 | indent = ' ' 10 | indent_size = 4 11 | indent_style = space 12 | line_length = 120 13 | multi_line_output = 5 14 | 15 | [Makefile] 16 | indent_style = tab 17 | 18 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/__init__.py: -------------------------------------------------------------------------------- 1 | from bonobo.plugins.jupyter import JupyterOutputPlugin 2 | 3 | 4 | def _jupyter_nbextension_paths(): 5 | return [{"section": "notebook", "src": "static", "dest": "bonobo-jupyter", "require": "bonobo-jupyter/extension"}] 6 | 7 | 8 | __all__ = ["JupyterOutputPlugin"] 9 | -------------------------------------------------------------------------------- /bin/run_all_examples.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | __PATH__=$(cd $(dirname "$0")/..; pwd) 4 | EXAMPLES=$(cd $__PATH__; find bonobo/examples -name \*.py -not -name _\*) 5 | 6 | for example in $EXAMPLES; do 7 | echo "===== $example =====" 8 | (cd $__PATH__; time bonobo run $example > /dev/null); 9 | done 10 | -------------------------------------------------------------------------------- /bonobo/structs/tokens.py: -------------------------------------------------------------------------------- 1 | class Token: 2 | def __init__(self, name): 3 | self.__name__ = name 4 | 5 | def __repr__(self): 6 | return "<{}>".format(self.__name__) 7 | 8 | 9 | class Flag(Token): 10 | must_be_first = False 11 | must_be_last = False 12 | allows_data = True 13 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Bonobo 2 | ====== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | 7 | install 8 | tutorial/index 9 | guide/index 10 | extension/index 11 | reference/index 12 | faq 13 | contribute/index 14 | 15 | 16 | .. toctree:: 17 | :hidden: 18 | 19 | genindex 20 | modindex 21 | -------------------------------------------------------------------------------- /docs/guide/index.rst: -------------------------------------------------------------------------------- 1 | Guides 2 | ====== 3 | 4 | This section will guide you through your journey with Bonobo ETL. 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | introduction 10 | transformations 11 | graphs 12 | services 13 | environment 14 | purity 15 | debugging 16 | plugins 17 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | python: 4 | - 3.5 5 | - 3.5-dev 6 | - 3.6 7 | - 3.6-dev 8 | - 3.7 9 | - 3.7-dev 10 | # - 3.8-dev 11 | # - nightly 12 | install: 13 | - make install-dev 14 | - pip install coveralls 15 | script: 16 | - make clean test 17 | after_success: 18 | - coveralls 19 | -------------------------------------------------------------------------------- /bonobo/contrib/django/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | This module contains all tools for Bonobo and Django to interract nicely. 3 | 4 | * :class:`ETLCommand` 5 | * :func:`create_or_update` 6 | 7 | """ 8 | 9 | from .commands import ETLCommand 10 | from .utils import create_or_update 11 | 12 | __all__ = ["ETLCommand", "create_or_update"] 13 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/execution/events.rst: -------------------------------------------------------------------------------- 1 | :mod:`Execution Events ` 2 | ================================================= 3 | 4 | .. currentmodule:: bonobo.execution.events 5 | 6 | :Module: :mod:`bonobo.execution.events` 7 | 8 | 9 | .. automodule:: bonobo.execution.events 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/extension/_alpha.rst: -------------------------------------------------------------------------------- 1 | .. warning:: 2 | 3 | This extension is currently **ALPHA**. 4 | 5 | Things will change, break, not work as expected, and the documentation is lacking some serious work. 6 | 7 | This section is here to give a brief overview but is neither complete nor definitive. 8 | 9 | You've been warned. 10 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/execution/contexts.rst: -------------------------------------------------------------------------------- 1 | :mod:`Execution Contexts ` 2 | ===================================================== 3 | 4 | .. currentmodule:: bonobo.execution.contexts 5 | 6 | :Module: :mod:`bonobo.execution.contexts` 7 | 8 | 9 | .. automodule:: bonobo.execution.contexts 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | based_on_style = pep8 3 | column_limit = 120 4 | allow_multiline_lambdas = false 5 | allow_multiline_dictionary_keys = false 6 | coalesce_brackets = true 7 | dedent_closing_brackets = true 8 | join_multiple_lines = true 9 | spaces_before_comment = 2 10 | split_before_first_argument = true 11 | split_complex_comprehension = true 12 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/README.rst: -------------------------------------------------------------------------------- 1 | Bonobo within Jupyter 2 | ===================== 3 | 4 | Install 5 | ------- 6 | 7 | .. code-block:: shell-session 8 | 9 | yarn install 10 | 11 | 12 | Watch mode (for development) 13 | ---------------------------- 14 | 15 | .. code-block:: shell-session 16 | 17 | yarn run webpack --watch 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo/execution/strategies.rst: -------------------------------------------------------------------------------- 1 | :mod:`Execution Strategies ` 2 | ========================================================= 3 | 4 | .. currentmodule:: bonobo.execution.strategies 5 | 6 | :Module: :mod:`bonobo.execution.strategies` 7 | 8 | 9 | .. automodule:: bonobo.execution.strategies 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /tests/nodes/io/test_io_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bonobo.nodes.io.base import filesystem_path 4 | 5 | 6 | def test_filesystem_path_absolute(): 7 | with pytest.raises(ValueError): 8 | filesystem_path("/this/is/absolute") 9 | 10 | 11 | def test_filesystem_path_relative(): 12 | assert filesystem_path("this/is/relative") == "this/is/relative" 13 | -------------------------------------------------------------------------------- /tests/util/test_statistics.py: -------------------------------------------------------------------------------- 1 | from bonobo.util.statistics import WithStatistics 2 | 3 | 4 | class MyThingWithStats(WithStatistics): 5 | def get_statistics(self, *args, **kwargs): 6 | return (("foo", 42), ("bar", 69)) 7 | 8 | 9 | def test_with_statistics(): 10 | o = MyThingWithStats() 11 | assert o.get_statistics_as_string() == "foo=42 bar=69" 12 | -------------------------------------------------------------------------------- /docs/extension/index.rst: -------------------------------------------------------------------------------- 1 | Extensions 2 | ========== 3 | 4 | Extensions contains all things needed to work with a few popular third party tools. 5 | 6 | Most of them are available as optional extra dependencies, and the maturity stage of each may vary. 7 | 8 | .. toctree:: 9 | :maxdepth: 2 10 | 11 | django 12 | docker 13 | jupyter 14 | selenium 15 | sqlalchemy 16 | -------------------------------------------------------------------------------- /bonobo/execution/strategies/naive.py: -------------------------------------------------------------------------------- 1 | from bonobo.execution.strategies.base import Strategy 2 | 3 | 4 | class NaiveStrategy(Strategy): 5 | # TODO: how to run plugins in "naive" mode ? 6 | 7 | def execute(self, graph, **kwargs): 8 | with self.create_graph_execution_context(graph, **kwargs) as context: 9 | context.run_until_complete() 10 | return context 11 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/donate.html: -------------------------------------------------------------------------------- 1 | {% if theme_gratipay_user or theme_gittip_user %} 2 |

Donate

3 |

4 | Consider supporting the authors on Gratipay: 5 | 8 |

9 | {% endif %} 10 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/navigation.html: -------------------------------------------------------------------------------- 1 |

{{ _('Navigation') }}

2 | {{ toctree(includehidden=theme_sidebar_includehidden, collapse=theme_sidebar_collapse) }} 3 | {% if theme_extra_nav_links %} 4 |
5 |
    6 | {% for text, uri in theme_extra_nav_links.items() %} 7 |
  • {{ text }}
  • 8 | {% endfor %} 9 |
10 | {% endif %} 11 | -------------------------------------------------------------------------------- /bonobo/execution/contexts/plugin.py: -------------------------------------------------------------------------------- 1 | from bonobo.execution.contexts.base import BaseContext 2 | 3 | 4 | class PluginExecutionContext(BaseContext): 5 | @property 6 | def dispatcher(self): 7 | return self.parent.dispatcher 8 | 9 | def register(self): 10 | return self.wrapped.register(self.dispatcher) 11 | 12 | def unregister(self): 13 | return self.wrapped.unregister(self.dispatcher) 14 | -------------------------------------------------------------------------------- /classifiers.txt: -------------------------------------------------------------------------------- 1 | Development Status :: 3 - Alpha 2 | Intended Audience :: Developers 3 | Intended Audience :: Information Technology 4 | License :: OSI Approved :: Apache Software License 5 | Programming Language :: Python 6 | Programming Language :: Python :: 3 7 | Programming Language :: Python :: 3.5 8 | Programming Language :: Python :: 3.6 9 | Programming Language :: Python :: 3.7 10 | Programming Language :: Python :: 3 :: Only 11 | -------------------------------------------------------------------------------- /tests/test_publicapi.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | 4 | def test_wildcard_import(): 5 | bonobo = __import__("bonobo") 6 | assert bonobo.__version__ 7 | 8 | for name in dir(bonobo): 9 | # ignore attributes starting by underscores 10 | if name.startswith("_"): 11 | continue 12 | attr = getattr(bonobo, name) 13 | if inspect.ismodule(attr): 14 | continue 15 | 16 | assert name in bonobo.__all__ 17 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/widget.py: -------------------------------------------------------------------------------- 1 | import ipywidgets as widgets 2 | from traitlets import List, Unicode 3 | 4 | 5 | @widgets.register("bonobo-widget.bonobo") 6 | class BonoboWidget(widgets.DOMWidget): 7 | _view_name = Unicode("BonoboView").tag(sync=True) 8 | _model_name = Unicode("BonoboModel").tag(sync=True) 9 | _view_module = Unicode("bonobo-jupyter").tag(sync=True) 10 | _model_module = Unicode("bonobo-jupyter").tag(sync=True) 11 | value = List().tag(sync=True) 12 | -------------------------------------------------------------------------------- /docs/_templates/sidebarlogo.html: -------------------------------------------------------------------------------- 1 | 2 |

3 | 4 | 5 | Bonobo 6 | 7 |

8 |
9 | 10 |

11 | Data processing for humans. 12 |

13 | -------------------------------------------------------------------------------- /docs/guide/_next.rst: -------------------------------------------------------------------------------- 1 | Where to jump next? 2 | ::::::::::::::::::: 3 | 4 | We suggest that you go through the :doc:`tutorial ` first. 5 | 6 | Then, you can read the guides, either using the order suggested or by picking the chapter that interest you the most at 7 | one given moment: 8 | 9 | * :doc:`introduction` 10 | * :doc:`transformations` 11 | * :doc:`graphs` 12 | * :doc:`services` 13 | * :doc:`environment` 14 | * :doc:`purity` 15 | * :doc:`debugging` 16 | * :doc:`plugins` 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . 2 | appdirs==1.4.3 3 | cached-property==1.5.1 4 | certifi==2019.6.16 5 | chardet==3.0.4 6 | colorama==0.3.9 7 | fs==2.4.8 8 | graphviz==0.8.4 9 | idna==2.8 10 | jinja2==2.10.1 11 | markupsafe==1.1.1 12 | mondrian==0.8.0 13 | packaging==19.0 14 | pbr==5.4.1 15 | psutil==5.6.3 16 | pyparsing==2.4.0 17 | python-slugify==1.2.6 18 | pytz==2019.1 19 | requests==2.22.0 20 | six==1.12.0 21 | stevedore==1.30.1 22 | typing==3.7.4 23 | unidecode==1.1.1 24 | urllib3==1.25.3 25 | whistle==1.0.1 26 | -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | Reference documents of all stable APIs and modules. If something is not here, please be careful about using it as it 5 | means that the api is not yet 1.0-proof. 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | api/bonobo 11 | api/bonobo/config 12 | api/bonobo/constants 13 | api/bonobo/execution 14 | api/bonobo/nodes 15 | api/bonobo/structs/graphs 16 | api/bonobo/util 17 | commands 18 | settings 19 | examples 20 | -------------------------------------------------------------------------------- /tests/test_basicusage.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | import pytest 4 | 5 | import bonobo 6 | from bonobo.execution.contexts.graph import GraphExecutionContext 7 | 8 | 9 | @pytest.mark.timeout(2) 10 | def test_run_graph_noop(): 11 | graph = bonobo.Graph(bonobo.noop) 12 | assert len(graph) == 1 13 | 14 | with patch("bonobo._api._is_interactive_console", side_effect=lambda: False): 15 | result = bonobo.run(graph) 16 | 17 | assert isinstance(result, GraphExecutionContext) 18 | -------------------------------------------------------------------------------- /tests/util/test_compat.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bonobo.util.compat import deprecated, deprecated_alias 4 | 5 | 6 | def test_deprecated(): 7 | @deprecated 8 | def foo(): 9 | pass 10 | 11 | foo = deprecated(foo) 12 | with pytest.warns(DeprecationWarning): 13 | foo() 14 | 15 | 16 | def test_deprecated_alias(): 17 | def foo(): 18 | pass 19 | 20 | foo = deprecated_alias("bar", foo) 21 | 22 | with pytest.warns(DeprecationWarning): 23 | foo() 24 | -------------------------------------------------------------------------------- /tests/commands/test_convert.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import pytest 4 | 5 | from bonobo.util.environ import change_working_directory 6 | from bonobo.util.testing import all_runners 7 | 8 | 9 | @all_runners 10 | def test_convert(runner, tmpdir): 11 | csv_content = "id;name\n1;Romain" 12 | tmpdir.join("in.csv").write(csv_content) 13 | 14 | with change_working_directory(tmpdir): 15 | runner("convert", "in.csv", "out.csv") 16 | 17 | assert tmpdir.join("out.csv").read().strip() == csv_content 18 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/src/embed.js: -------------------------------------------------------------------------------- 1 | // Entry point for the unpkg bundle containing custom model definitions. 2 | // 3 | // It differs from the notebook bundle in that it does not need to define a 4 | // dynamic baseURL for the static assets and may load some css that would 5 | // already be loaded by the notebook otherwise. 6 | 7 | // Export widget models and views, and the npm package version number. 8 | module.exports = require('./bonobo.js'); 9 | module.exports['version'] = require('../package.json').version; 10 | -------------------------------------------------------------------------------- /tests/features/test_not_modified.py: -------------------------------------------------------------------------------- 1 | from bonobo.constants import NOT_MODIFIED 2 | from bonobo.util.testing import BufferingNodeExecutionContext 3 | 4 | 5 | def useless(*args, **kwargs): 6 | return NOT_MODIFIED 7 | 8 | 9 | def test_not_modified(): 10 | input_messages = [("foo", "bar"), ("foo", "baz")] 11 | 12 | with BufferingNodeExecutionContext(useless) as context: 13 | context.write_sync(*input_messages) 14 | 15 | result = context.get_buffer() 16 | print(result) 17 | assert result == input_messages 18 | -------------------------------------------------------------------------------- /bonobo/nodes/io/__init__.py: -------------------------------------------------------------------------------- 1 | """ Readers and writers for common file formats. """ 2 | 3 | from .csv import CsvReader, CsvWriter 4 | from .file import FileReader, FileWriter 5 | from .json import JsonReader, JsonWriter, LdjsonReader, LdjsonWriter 6 | from .pickle import PickleReader, PickleWriter 7 | 8 | __all__ = [ 9 | "CsvReader", 10 | "CsvWriter", 11 | "FileReader", 12 | "FileWriter", 13 | "JsonReader", 14 | "JsonWriter", 15 | "LdjsonReader", 16 | "LdjsonWriter", 17 | "PickleReader", 18 | "PickleWriter", 19 | ] 20 | -------------------------------------------------------------------------------- /bonobo/examples/empty.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | 4 | import bonobo 5 | 6 | 7 | def extract(): 8 | """Placeholder, change, rename, remove... """ 9 | for x in range(60): 10 | if x: 11 | time.sleep(1) 12 | yield datetime.datetime.now() 13 | 14 | 15 | def get_graph(): 16 | graph = bonobo.Graph() 17 | graph.add_chain() 18 | 19 | return graph 20 | 21 | 22 | if __name__ == "__main__": 23 | parser = bonobo.get_argument_parser() 24 | with bonobo.parse_args(parser): 25 | bonobo.run(get_graph()) 26 | -------------------------------------------------------------------------------- /requirements-sqlalchemy.txt: -------------------------------------------------------------------------------- 1 | -e .[sqlalchemy] 2 | -r requirements.txt 3 | appdirs==1.4.3 4 | bonobo-sqlalchemy==0.6.0 5 | certifi==2019.6.16 6 | chardet==3.0.4 7 | colorama==0.3.9 8 | fs==2.4.8 9 | graphviz==0.8.4 10 | idna==2.8 11 | jinja2==2.10.1 12 | markupsafe==1.1.1 13 | mondrian==0.8.0 14 | packaging==19.0 15 | pbr==5.4.1 16 | psutil==5.6.3 17 | pyparsing==2.4.0 18 | python-slugify==1.2.6 19 | pytz==2019.1 20 | requests==2.22.0 21 | six==1.12.0 22 | sqlalchemy==1.3.5 23 | stevedore==1.30.1 24 | typing==3.7.4 25 | unidecode==1.1.1 26 | urllib3==1.25.3 27 | whistle==1.0.1 28 | -------------------------------------------------------------------------------- /tests/test_registry.py: -------------------------------------------------------------------------------- 1 | from bonobo import create_reader, create_writer 2 | from bonobo.nodes import CsvReader, CsvWriter, JsonReader, JsonWriter 3 | 4 | 5 | def test_create_reader(): 6 | t = create_reader("foo.csv") 7 | assert isinstance(t, CsvReader) 8 | 9 | t = create_reader("foo.txt", format="json") 10 | assert isinstance(t, JsonReader) 11 | 12 | 13 | def test_create_writer(): 14 | t = create_writer("foo.csv") 15 | assert isinstance(t, CsvWriter) 16 | 17 | t = create_writer("foo.txt", format="json") 18 | assert isinstance(t, JsonWriter) 19 | -------------------------------------------------------------------------------- /bonobo/examples/clock.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import time 3 | 4 | import bonobo 5 | 6 | 7 | def extract(): 8 | """Placeholder, change, rename, remove... """ 9 | for x in range(60): 10 | if x: 11 | time.sleep(1) 12 | yield datetime.datetime.now() 13 | 14 | 15 | def get_graph(): 16 | graph = bonobo.Graph() 17 | graph.add_chain(extract, print) 18 | 19 | return graph 20 | 21 | 22 | if __name__ == "__main__": 23 | parser = bonobo.get_argument_parser() 24 | with bonobo.parse_args(parser): 25 | bonobo.run(get_graph()) 26 | -------------------------------------------------------------------------------- /bonobo/examples/files/csv_handlers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import bonobo 4 | from bonobo import examples 5 | from bonobo.examples.files.services import get_services 6 | 7 | 8 | def get_graph(*, _limit=None, _print=False): 9 | return bonobo.Graph( 10 | bonobo.CsvReader("coffeeshops.csv"), 11 | *((bonobo.Limit(_limit),) if _limit else ()), 12 | *((bonobo.PrettyPrinter(),) if _print else ()), 13 | bonobo.CsvWriter("coffeeshops.csv", fs="fs.output") 14 | ) 15 | 16 | 17 | if __name__ == "__main__": 18 | sys.exit(examples.run(get_graph, get_services)) 19 | -------------------------------------------------------------------------------- /bonobo/commands/inspect.py: -------------------------------------------------------------------------------- 1 | import bonobo 2 | from bonobo.commands import BaseGraphCommand 3 | 4 | 5 | class InspectCommand(BaseGraphCommand): 6 | handler = staticmethod(bonobo.inspect) 7 | 8 | def add_arguments(self, parser): 9 | super(InspectCommand, self).add_arguments(parser) 10 | parser.add_argument("--graph", "-g", dest="format", action="store_const", const="graph") 11 | 12 | def parse_options(self, **options): 13 | if not options.get("format"): 14 | raise RuntimeError("You must provide a format (try --graph).") 15 | return options 16 | -------------------------------------------------------------------------------- /bonobo/execution/contexts/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Execution Contexts are objects that wraps the stateless data-structures (graphs and nodes) during a job execution to 3 | keep an eye on their context/state (from the simplest things like i/o statistics to lifecycle and custom userland 4 | state). 5 | 6 | """ 7 | 8 | from bonobo.execution.contexts.graph import GraphExecutionContext 9 | from bonobo.execution.contexts.node import NodeExecutionContext 10 | from bonobo.execution.contexts.plugin import PluginExecutionContext 11 | 12 | __all__ = ["GraphExecutionContext", "NodeExecutionContext", "PluginExecutionContext"] 13 | -------------------------------------------------------------------------------- /bonobo/nodes/aggregation.py: -------------------------------------------------------------------------------- 1 | from bonobo.config import Configurable, ContextProcessor, Method, Option, use_raw_input 2 | from bonobo.util import ValueHolder 3 | 4 | 5 | class Reduce(Configurable): 6 | function = Method() 7 | initializer = Option(required=False) 8 | 9 | @ContextProcessor 10 | def buffer(self, context): 11 | values = yield ValueHolder(self.initializer() if callable(self.initializer) else self.initializer) 12 | context.send(values.get()) 13 | 14 | @use_raw_input 15 | def __call__(self, values, bag): 16 | values.set(self.function(values.get(), bag)) 17 | -------------------------------------------------------------------------------- /tests/commands/test_version.py: -------------------------------------------------------------------------------- 1 | from bonobo import __version__ 2 | from bonobo.util.testing import all_runners 3 | 4 | 5 | @all_runners 6 | def test_version(runner): 7 | out, err = runner("version") 8 | out = out.strip() 9 | assert out.startswith("bonobo ") 10 | assert __version__ in out 11 | 12 | out, err = runner("version", "-q") 13 | out = out.strip() 14 | assert out.startswith("bonobo ") 15 | assert __version__ in out 16 | 17 | out, err = runner("version", "-qq") 18 | out = out.strip() 19 | assert not out.startswith("bonobo ") 20 | assert __version__ in out 21 | -------------------------------------------------------------------------------- /tests/commands/test_clibasics.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | 3 | from bonobo.util.testing import all_runners 4 | 5 | 6 | def test_entrypoint(): 7 | commands = {} 8 | 9 | for command in pkg_resources.iter_entry_points("bonobo.commands"): 10 | commands[command.name] = command 11 | 12 | assert not {"convert", "init", "inspect", "run", "version"}.difference(set(commands)) 13 | 14 | 15 | @all_runners 16 | def test_no_command(runner): 17 | _, err, exc = runner(catch_errors=True) 18 | assert type(exc) == SystemExit 19 | assert "error: the following arguments are required: command" in err 20 | -------------------------------------------------------------------------------- /bonobo/nodes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The :mod:`bonobo.nodes` module contains all builtin transformations that you can use out of the box in your ETL jobs. 3 | 4 | Please note that all objects from this package are also available directly through the root :mod:`bonobo` package. 5 | 6 | """ 7 | 8 | from bonobo.nodes.basics import * 9 | from bonobo.nodes.basics import __all__ as _all_basics 10 | from bonobo.nodes.filter import Filter 11 | from bonobo.nodes.io import * 12 | from bonobo.nodes.io import __all__ as _all_io 13 | from bonobo.nodes.throttle import RateLimited 14 | 15 | __all__ = _all_basics + _all_io + ["Filter", "RateLimited"] 16 | -------------------------------------------------------------------------------- /requirements-docker.txt: -------------------------------------------------------------------------------- 1 | -e .[docker] 2 | -r requirements.txt 3 | appdirs==1.4.3 4 | bonobo-docker==0.6.0 5 | certifi==2019.6.16 6 | chardet==3.0.4 7 | colorama==0.3.9 8 | docker-pycreds==0.4.0 9 | docker==2.7.0 10 | fs==2.4.8 11 | graphviz==0.8.4 12 | idna==2.8 13 | jinja2==2.10.1 14 | markupsafe==1.1.1 15 | mondrian==0.8.0 16 | packaging==19.0 17 | pbr==5.4.1 18 | psutil==5.6.3 19 | pyparsing==2.4.0 20 | python-slugify==1.2.6 21 | pytz==2019.1 22 | requests==2.22.0 23 | semantic-version==2.6.0 24 | six==1.12.0 25 | stevedore==1.30.1 26 | typing==3.7.4 27 | unidecode==1.1.1 28 | urllib3==1.25.3 29 | websocket-client==0.56.0 30 | whistle==1.0.1 31 | -------------------------------------------------------------------------------- /tests/execution/test_events.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock 2 | 3 | from bonobo.execution import events 4 | 5 | 6 | def test_names(): 7 | # This test looks useless, but as it's becoming the pliugin API, I want to make sure that nothing changes here, or 8 | # notice it otherwise. 9 | for name in "start", "started", "tick", "stop", "stopped", "kill": 10 | event_name = getattr(events, name.upper()) 11 | assert event_name == ".".join(("execution", name)) 12 | 13 | 14 | def test_event_object(): 15 | # Same logic as above. 16 | c = Mock() 17 | e = events.ExecutionEvent(c) 18 | assert e.context is c 19 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/src/index.js: -------------------------------------------------------------------------------- 1 | // Entry point for the notebook bundle containing custom model definitions. 2 | // 3 | // Setup notebook base URL 4 | // 5 | // Some static assets may be required by the custom widget javascript. The base 6 | // url for the notebook is not known at build time and is therefore computed 7 | // dynamically. 8 | __webpack_public_path__ = document.querySelector('body').getAttribute('data-base-url') + 'nbextensions/bonobo/'; 9 | 10 | // Export widget models and views, and the npm package version number. 11 | module.exports = require('./bonobo.js'); 12 | module.exports['version'] = require('../package.json').version; 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *$py.class 2 | *,cover 3 | *.egg 4 | *.egg-info/ 5 | *.iml 6 | *.log 7 | *.manifest 8 | *.mo 9 | *.pot 10 | *.py[cod] 11 | *.so 12 | *.spec 13 | .*.sw? 14 | .DS_Store 15 | .Python 16 | .cache 17 | .coverage 18 | .coverage.* 19 | .eggs/ 20 | .env 21 | .hypothesis/ 22 | .installed.cfg 23 | .ipynb_checkpoints 24 | .python-version 25 | /.idea 26 | /.medikit 27 | /.pytest_cache 28 | /.release 29 | /bonobo/contrib/jupyter/js/node_modules/ 30 | /bonobo/examples/work_in_progress/ 31 | /build/ 32 | /coverage.xml 33 | /dist/ 34 | /docs/_build/ 35 | /eggs/ 36 | /examples/private 37 | /sdist/ 38 | /tags 39 | pip-delete-this-directory.txt 40 | pip-log.txt 41 | -------------------------------------------------------------------------------- /bonobo/commands/examples.py: -------------------------------------------------------------------------------- 1 | from bonobo.commands import BaseCommand 2 | 3 | all_examples = ( 4 | "clock", 5 | "datasets", 6 | "environ", 7 | "files.csv_handlers", 8 | "files.json_handlers", 9 | "files.pickle_handlers", 10 | "files.text_handlers", 11 | "types", 12 | ) 13 | 14 | 15 | class ExamplesCommand(BaseCommand): 16 | def handle(self): 17 | print("You can run the following examples:") 18 | print() 19 | for example in all_examples: 20 | print(" $ python -m bonobo.examples.{}".format(example)) 21 | print() 22 | 23 | def add_arguments(self, parser): 24 | pass 25 | -------------------------------------------------------------------------------- /wercker.yml: -------------------------------------------------------------------------------- 1 | box: python:3.5 2 | build: 3 | steps: 4 | - script: 5 | name: install virtualenv 6 | code: | 7 | pip install virtualenv 8 | 9 | - virtualenv: 10 | name: setup virtual environment 11 | install_wheel: true 12 | python_location: /usr/local/bin/python3.5 13 | 14 | - pip-install 15 | 16 | - script: 17 | name: echo python information 18 | code: | 19 | echo "python version $(python --version) running" 20 | echo "pip version $(pip --version) running" 21 | 22 | deploy: 23 | steps: 24 | - script: 25 | name: deploy 26 | code: python -m bonobo 27 | 28 | -------------------------------------------------------------------------------- /docs/tutorial/_wip_note.rst: -------------------------------------------------------------------------------- 1 | .. warning:: 2 | 3 | This section is being rewritten for |bonobo| 0.6, and it's now in a "work in progress" state. 4 | 5 | You can read :doc:`the tutorial for the previous version (0.5) <0.5/index>`. Please note that things changed a bit 6 | since then and you'll have quirks here and there. 7 | 8 | You can also read the `migration guide from 0.5 to 0.6 `_ 9 | that will give you a good overview of the changes. 10 | 11 | Hopefully, this document will be updated soon, and please accept our apologies about this doc status until then. 12 | 13 | -------------------------------------------------------------------------------- /bonobo/examples/environ.py: -------------------------------------------------------------------------------- 1 | """ 2 | This transformation extracts the environment and prints it, sorted alphabetically, one item per line. 3 | 4 | Used in the bonobo tests around environment management. 5 | 6 | """ 7 | import os 8 | 9 | import bonobo 10 | 11 | 12 | def extract_environ(): 13 | """Yield all the system environment.""" 14 | yield from sorted(os.environ.items()) 15 | 16 | 17 | def get_graph(): 18 | graph = bonobo.Graph() 19 | graph.add_chain(extract_environ, print) 20 | 21 | return graph 22 | 23 | 24 | if __name__ == "__main__": 25 | parser = bonobo.get_argument_parser() 26 | with bonobo.parse_args(parser): 27 | bonobo.run(get_graph()) 28 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = Bonobo 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /tests/util/test_resolvers.py: -------------------------------------------------------------------------------- 1 | import bonobo 2 | from bonobo.util.resolvers import _parse_option, _resolve_options, _resolve_transformations 3 | 4 | 5 | def test_parse_option(): 6 | assert _parse_option("foo=bar") == ("foo", "bar") 7 | assert _parse_option('foo="bar"') == ("foo", "bar") 8 | assert _parse_option('sep=";"') == ("sep", ";") 9 | assert _parse_option("foo") == ("foo", True) 10 | 11 | 12 | def test_resolve_options(): 13 | assert _resolve_options(("foo=bar", 'bar="baz"')) == {"foo": "bar", "bar": "baz"} 14 | assert _resolve_options() == {} 15 | 16 | 17 | def test_resolve_transformations(): 18 | assert _resolve_transformations(("PrettyPrinter",)) == (bonobo.PrettyPrinter,) 19 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bonobo-jupyter", 3 | "version": "0.0.1", 4 | "description": "Jupyter integration for Bonobo", 5 | "author": "", 6 | "main": "src/index.js", 7 | "repository": { 8 | "type": "git", 9 | "url": "" 10 | }, 11 | "keywords": [ 12 | "jupyter", 13 | "widgets", 14 | "ipython", 15 | "ipywidgets" 16 | ], 17 | "scripts": { 18 | "prepublish": "webpack", 19 | "test": "echo \"Error: no test specified\" && exit 1" 20 | }, 21 | "devDependencies": { 22 | "json-loader": "^0.5.4", 23 | "webpack": "^1.12.14" 24 | }, 25 | "dependencies": { 26 | "jupyter-js-widgets": "^2.0.9", 27 | "underscore": "^1.8.3" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /bonobo/examples/files/text_handlers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import bonobo 4 | from bonobo import examples 5 | from bonobo.examples.files.services import get_services 6 | 7 | 8 | def skip_comments(line): 9 | line = line.strip() 10 | if not line.startswith("#"): 11 | yield line 12 | 13 | 14 | def get_graph(*, _limit=(), _print=()): 15 | return bonobo.Graph( 16 | bonobo.FileReader("passwd.txt", fs="fs.static"), 17 | skip_comments, 18 | *_limit, 19 | lambda s: s.split(":")[0], 20 | *_print, 21 | bonobo.FileWriter("usernames.txt", fs="fs.output"), 22 | ) 23 | 24 | 25 | if __name__ == "__main__": 26 | sys.exit(examples.run(get_graph, get_services)) 27 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/src/extension.js: -------------------------------------------------------------------------------- 1 | // This file contains the javascript that is run when the notebook is loaded. 2 | // It contains some requirejs configuration and the `load_ipython_extension` 3 | // which is required for any notebook extension. 4 | 5 | // Configure requirejs 6 | if (window.require) { 7 | window.require.config({ 8 | map: { 9 | "*" : { 10 | "bonobo-jupyter": "nbextensions/bonobo-jupyter/index", 11 | "jupyter-js-widgets": "nbextensions/jupyter-js-widgets/extension" 12 | } 13 | } 14 | }); 15 | } 16 | 17 | // Export the required load_ipython_extention 18 | module.exports = { 19 | load_ipython_extension: function() {} 20 | }; 21 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | omit = 4 | bonobo/examples/** 5 | bonobo/ext/** 6 | 7 | [report] 8 | # Regexes for lines to exclude from consideration 9 | exclude_lines = 10 | # Have to re-enable the standard pragma 11 | pragma: no cover 12 | 13 | # Don't complain about missing debug-only code: 14 | def __repr__ 15 | if self\.debug 16 | 17 | # Don't complain if tests don't hit defensive assertion code: 18 | raise AbstractError 19 | raise AssertionError 20 | raise NotImplementedError 21 | 22 | # Don't complain if non-runnable code isn't run: 23 | if 0: 24 | if __name__ == .__main__.: 25 | 26 | ignore_errors = True 27 | 28 | [html] 29 | directory = docs/_build/html/coverage 30 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/relations.html: -------------------------------------------------------------------------------- 1 |
2 |

Related Topics

3 | 21 |
22 | -------------------------------------------------------------------------------- /bonobo/contrib/django/utils.py: -------------------------------------------------------------------------------- 1 | def create_or_update(model, *, defaults=None, save=True, **kwargs): 2 | """ 3 | Create or update a django model instance. 4 | 5 | :param model: 6 | :param defaults: 7 | :param kwargs: 8 | :return: object, created, updated 9 | 10 | """ 11 | obj, created = model._default_manager.get_or_create(defaults=defaults, **kwargs) 12 | 13 | updated = False 14 | if not created: 15 | if defaults: 16 | for k, v in defaults.items(): 17 | if getattr(obj, k) != v: 18 | setattr(obj, k, v) 19 | updated = True 20 | 21 | if updated and save: 22 | obj.save() 23 | 24 | return obj, created, updated 25 | -------------------------------------------------------------------------------- /bonobo/nodes/filter.py: -------------------------------------------------------------------------------- 1 | from bonobo.config import Configurable, Method 2 | from bonobo.constants import NOT_MODIFIED 3 | 4 | 5 | class Filter(Configurable): 6 | """Filter out hashes from the stream depending on the :attr:`filter` callable return value, when called with the 7 | current hash as parameter. 8 | 9 | Can be used as a decorator on a filter callable. 10 | 11 | .. attribute:: filter 12 | 13 | A callable used to filter lines. 14 | 15 | If the callable returns a true-ish value, the input will be passed unmodified to the next items. 16 | 17 | Otherwise, it'll be burnt. 18 | 19 | """ 20 | 21 | filter = Method() 22 | 23 | def __call__(self, *args, **kwargs): 24 | if self.filter(*args, **kwargs): 25 | return NOT_MODIFIED 26 | -------------------------------------------------------------------------------- /tests/ext/test_ods.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import patch 2 | 3 | from bonobo.contrib.opendatasoft import OpenDataSoftAPI 4 | from bonobo.util.objects import ValueHolder 5 | 6 | 7 | class ResponseMock: 8 | def __init__(self, json_value): 9 | self.json_value = json_value 10 | self.count = 0 11 | 12 | def json(self): 13 | if self.count: 14 | return {} 15 | else: 16 | self.count += 1 17 | return {"records": self.json_value} 18 | 19 | 20 | def test_read_from_opendatasoft_api(): 21 | extract = OpenDataSoftAPI(dataset="test-a-set") 22 | with patch("requests.get", return_value=ResponseMock([{"fields": {"foo": "bar"}}, {"fields": {"foo": "zab"}}])): 23 | for line in extract("http://example.com/", ValueHolder(0)): 24 | assert "foo" in line 25 | -------------------------------------------------------------------------------- /tests/features/test_inherit.py: -------------------------------------------------------------------------------- 1 | from bonobo.util.envelopes import AppendingEnvelope 2 | from bonobo.util.testing import BufferingNodeExecutionContext 3 | 4 | messages = [("Hello",), ("Goodbye",)] 5 | 6 | 7 | def append(*args): 8 | return AppendingEnvelope("!") 9 | 10 | 11 | def test_inherit(): 12 | with BufferingNodeExecutionContext(append) as context: 13 | context.write_sync(*messages) 14 | 15 | assert context.get_buffer() == list(map(lambda x: x + ("!",), messages)) 16 | 17 | 18 | def test_inherit_bag_tuple(): 19 | with BufferingNodeExecutionContext(append) as context: 20 | context.set_input_fields(["message"]) 21 | context.write_sync(*messages) 22 | 23 | assert context.get_output_fields() == ("message", "0") 24 | assert context.get_buffer() == list(map(lambda x: x + ("!",), messages)) 25 | -------------------------------------------------------------------------------- /bonobo/examples/files/json_handlers.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import bonobo 4 | from bonobo import examples 5 | from bonobo.examples.files.services import get_services 6 | 7 | 8 | def get_graph(*, _limit=None, _print=False): 9 | graph = bonobo.Graph() 10 | 11 | trunk = graph.add_chain( 12 | bonobo.JsonReader("theaters.json", fs="fs.static"), *((bonobo.Limit(_limit),) if _limit else ()) 13 | ) 14 | 15 | if _print: 16 | graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output) 17 | 18 | graph.add_chain(bonobo.JsonWriter("theaters.output.json", fs="fs.output"), _input=trunk.output) 19 | graph.add_chain(bonobo.LdjsonWriter("theaters.output.ldjson", fs="fs.output"), _input=trunk.output) 20 | 21 | return graph 22 | 23 | 24 | if __name__ == "__main__": 25 | sys.exit(examples.run(get_graph, get_services)) 26 | -------------------------------------------------------------------------------- /bonobo/config/functools.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import itertools 3 | 4 | from bonobo.config.services import use 5 | from bonobo.util import get_name 6 | 7 | 8 | def transformation_factory(f): 9 | @functools.wraps(f) 10 | def _transformation_factory(*args, **kwargs): 11 | retval = f(*args, **kwargs) 12 | retval.__name__ = f.__name__ + "({})".format( 13 | ", ".join(itertools.chain(map(repr, args), ("{}={!r}".format(k, v) for k, v in kwargs.items()))) 14 | ) 15 | return retval 16 | 17 | _transformation_factory._partial = True 18 | 19 | return _transformation_factory 20 | 21 | 22 | class partial(functools.partial): 23 | @property 24 | def __name__(self): 25 | return get_name(self.func) 26 | 27 | def using(self, *service_names): 28 | return use(*service_names)(self) 29 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from alabaster import _version as version 4 | 5 | 6 | def get_path(): 7 | """ 8 | Shortcut for users whose theme is next to their conf.py. 9 | """ 10 | # Theme directory is defined as our parent directory 11 | return os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 12 | 13 | 14 | def update_context(app, pagename, templatename, context, doctree): 15 | context["alabaster_version"] = version.__version__ 16 | 17 | 18 | def setup(app): 19 | # add_html_theme is new in Sphinx 1.6+ 20 | if hasattr(app, "add_html_theme"): 21 | theme_path = os.path.abspath(os.path.dirname(__file__)) 22 | app.add_html_theme("alabaster", theme_path) 23 | app.connect("html-page-context", update_context) 24 | return {"version": version.__version__, "parallel_read_safe": True} 25 | -------------------------------------------------------------------------------- /bonobo/execution/strategies/base.py: -------------------------------------------------------------------------------- 1 | from bonobo.execution.contexts.graph import GraphExecutionContext 2 | 3 | 4 | class Strategy: 5 | """ 6 | Base class for execution strategies. 7 | 8 | """ 9 | 10 | GraphExecutionContextType = GraphExecutionContext 11 | 12 | def __init__(self, GraphExecutionContextType=None): 13 | self.GraphExecutionContextType = GraphExecutionContextType or self.GraphExecutionContextType 14 | 15 | def create_graph_execution_context(self, graph, *args, GraphExecutionContextType=None, **kwargs): 16 | if not len(graph): 17 | raise ValueError("You provided an empty graph, which does not really make sense. Please add some nodes.") 18 | return (GraphExecutionContextType or self.GraphExecutionContextType)(graph, *args, **kwargs) 19 | 20 | def execute(self, graph, *args, **kwargs): 21 | raise NotImplementedError 22 | -------------------------------------------------------------------------------- /bonobo/execution/events.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. data:: START 3 | 4 | Event dispatched before execution starts. 5 | 6 | .. data:: STARTED 7 | 8 | Event dispatched after execution starts. 9 | 10 | .. data:: TICK 11 | 12 | Event dispatched while execution runs, on a regular basis (on each "tick"). 13 | 14 | .. data:: STOP 15 | 16 | Event dispatched before execution stops. 17 | 18 | .. data:: STOPPED 19 | 20 | Event dispatched after execution stops. 21 | 22 | .. data:: KILL 23 | 24 | Event dispatched when execution is killed. 25 | 26 | """ 27 | 28 | from whistle import Event 29 | 30 | START = "execution.start" 31 | STARTED = "execution.started" 32 | TICK = "execution.tick" 33 | STOP = "execution.stop" 34 | STOPPED = "execution.stopped" 35 | KILL = "execution.kill" 36 | 37 | 38 | class ExecutionEvent(Event): 39 | def __init__(self, context): 40 | self.context = context 41 | -------------------------------------------------------------------------------- /docs/_templates/sidebarinfos.html: -------------------------------------------------------------------------------- 1 |

Stay Informed

2 | 3 |

Join announcements list.

4 | 5 |

6 | 7 |

8 | 10 | 11 |

12 | -------------------------------------------------------------------------------- /bonobo/examples/types/strings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example on how to use symple python strings to communicate between transformations. 3 | 4 | .. graphviz:: 5 | 6 | digraph { 7 | rankdir = LR; 8 | stylesheet = "../_static/graphs.css"; 9 | 10 | BEGIN [shape="point"]; 11 | BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)"; 12 | } 13 | 14 | """ 15 | from random import randint 16 | 17 | import bonobo 18 | 19 | 20 | def extract(): 21 | yield "foo" 22 | yield "bar" 23 | yield "baz" 24 | 25 | 26 | def transform(s): 27 | return "{} ({})".format(s.title(), randint(10, 99)) 28 | 29 | 30 | def load(s): 31 | print(s) 32 | 33 | 34 | def get_graph(): 35 | return bonobo.Graph(extract, transform, load) 36 | 37 | 38 | if __name__ == "__main__": 39 | parser = bonobo.get_argument_parser() 40 | with bonobo.parse_args(parser): 41 | bonobo.run(get_graph()) 42 | -------------------------------------------------------------------------------- /tests/commands/test_init.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from bonobo.commands.init import InitCommand 6 | from bonobo.util.testing import all_runners 7 | 8 | 9 | @all_runners 10 | def test_init_file(runner, tmpdir): 11 | target = tmpdir.join("foo.py") 12 | target_filename = str(target) 13 | runner("init", target_filename) 14 | assert os.path.exists(target_filename) 15 | 16 | out, err = runner("run", target_filename) 17 | assert out.replace("\n", " ").strip() == "Hello World" 18 | assert not err 19 | 20 | 21 | @all_runners 22 | @pytest.mark.parametrize("template", InitCommand.TEMPLATES) 23 | def test_init_file_templates(runner, template, tmpdir): 24 | target = tmpdir.join("foo.py") 25 | target_filename = str(target) 26 | runner("init", target_filename) 27 | assert os.path.exists(target_filename) 28 | out, err = runner("run", target_filename) 29 | assert not err 30 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -e .[dev] 2 | -r requirements.txt 3 | alabaster==0.7.12 4 | arrow==0.14.2 5 | atomicwrites==1.3.0 6 | attrs==19.1.0 7 | babel==2.7.0 8 | binaryornot==0.4.4 9 | certifi==2019.6.16 10 | chardet==3.0.4 11 | click==7.0 12 | cookiecutter==1.5.1 13 | coverage==4.5.3 14 | docutils==0.14 15 | future==0.17.1 16 | idna==2.8 17 | imagesize==1.1.0 18 | importlib-metadata==0.18 19 | jinja2-time==0.2.0 20 | jinja2==2.10.1 21 | markupsafe==1.1.1 22 | more-itertools==7.1.0 23 | packaging==19.0 24 | pathlib2==2.3.4 25 | pluggy==0.12.0 26 | poyo==0.4.2 27 | py==1.8.0 28 | pygments==2.4.2 29 | pyparsing==2.4.0 30 | pytest-cov==2.7.1 31 | pytest-timeout==1.3.3 32 | pytest==4.6.4 33 | python-dateutil==2.8.0 34 | pytz==2019.1 35 | requests==2.22.0 36 | six==1.12.0 37 | snowballstemmer==1.9.0 38 | sphinx-sitemap==0.2 39 | sphinx==1.8.5 40 | sphinxcontrib-websupport==1.1.2 41 | urllib3==1.25.3 42 | wcwidth==0.1.7 43 | whichcraft==0.6.0 44 | zipp==0.5.2 45 | -------------------------------------------------------------------------------- /bonobo/util/envelopes.py: -------------------------------------------------------------------------------- 1 | from bonobo.structs.tokens import Flag 2 | 3 | F_INHERIT = Flag("Inherit") 4 | 5 | F_NOT_MODIFIED = Flag("NotModified") 6 | F_NOT_MODIFIED.must_be_first = True 7 | F_NOT_MODIFIED.must_be_last = True 8 | F_NOT_MODIFIED.allows_data = False 9 | 10 | 11 | class Envelope: 12 | def __init__(self, content, *, flags=None, **options): 13 | self._content = content 14 | self._flags = set(flags or ()) 15 | self._options = options 16 | 17 | def unfold(self): 18 | return self._content, self._flags, self._options 19 | 20 | 21 | class AppendingEnvelope(Envelope): 22 | def __init__(self, content, **options): 23 | super().__init__(content, flags={F_INHERIT}, **options) 24 | 25 | 26 | class UnchangedEnvelope(Envelope): 27 | def __init__(self, **options): 28 | super().__init__(None, flags={F_NOT_MODIFIED}, **options) 29 | 30 | 31 | def isenvelope(mixed): 32 | return isinstance(mixed, Envelope) 33 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=Bonobo 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /readthedocs-conda.yml: -------------------------------------------------------------------------------- 1 | name: py35 2 | dependencies: 3 | - pip=9.0.1 4 | - python=3.5 5 | - setuptools=36.5.0 6 | - wheel=0.29.0 7 | - pip: 8 | - appdirs==1.4.3 9 | - certifi==2017.11.5 10 | - chardet==3.0.4 11 | - colorama==0.3.9 12 | - fs==2.0.17 13 | - graphviz==0.8.2 14 | - idna==2.6 15 | - jinja2==2.10 16 | - markupsafe==1.0 17 | - mondrian==0.6.1 18 | - packaging==16.8 19 | - pbr==3.1.1 20 | - psutil==5.4.3 21 | - pyparsing==2.2.0 22 | - python-slugify==1.2.4 23 | - pytz==2017.3 24 | - requests==2.18.4 25 | - six==1.11.0 26 | - stevedore==1.28.0 27 | - unidecode==1.0.22 28 | - urllib3==1.22 29 | - whistle==1.0.0 30 | # for contribs 31 | - django>=2,<3 32 | # for extensions 33 | - bonobo-docker>=0.6,<0.7 34 | - bonobo-sqlalchemy>=0.6,<0.7 35 | # for docs 36 | - alabaster==0.7.10 37 | - sphinx-sitemap==0.2 38 | - sphinx==1.6.5 39 | - sphinxcontrib-websupport==1.0.1 40 | # for examples 41 | - pycountry ==17.9.23 42 | 43 | -------------------------------------------------------------------------------- /docs/_templates/sidebarintro.html: -------------------------------------------------------------------------------- 1 |

About Bonobo

2 |

3 | Bonobo is a data-processing toolkit for python 3.5+, your swiss-army knife for everyday's data. 4 |

5 | 6 |

Other Formats

7 |

8 | Download the docs... 9 |

10 | 15 | 16 |

Useful Links

17 | 22 | -------------------------------------------------------------------------------- /requirements-jupyter.txt: -------------------------------------------------------------------------------- 1 | -e .[jupyter] 2 | -r requirements.txt 3 | appnope==0.1.0 4 | attrs==19.1.0 5 | backcall==0.1.0 6 | bleach==3.1.0 7 | decorator==4.4.0 8 | defusedxml==0.6.0 9 | entrypoints==0.3 10 | ipykernel==5.1.1 11 | ipython-genutils==0.2.0 12 | ipython==7.6.1 13 | ipywidgets==6.0.1 14 | jedi==0.14.1 15 | jinja2==2.10.1 16 | jsonschema==3.0.1 17 | jupyter-client==5.3.1 18 | jupyter-console==6.0.0 19 | jupyter-core==4.5.0 20 | jupyter==1.0.0 21 | markupsafe==1.1.1 22 | mistune==0.8.4 23 | nbconvert==5.5.0 24 | nbformat==4.4.0 25 | notebook==6.0.0 26 | pandocfilters==1.4.2 27 | parso==0.5.1 28 | pexpect==4.7.0 29 | pickleshare==0.7.5 30 | prometheus-client==0.7.1 31 | prompt-toolkit==2.0.9 32 | ptyprocess==0.6.0 33 | pygments==2.4.2 34 | pyrsistent==0.15.3 35 | python-dateutil==2.8.0 36 | pyzmq==18.0.2 37 | qtconsole==4.5.1 38 | send2trash==1.5.0 39 | six==1.12.0 40 | terminado==0.8.2 41 | testpath==0.4.2 42 | tornado==6.0.3 43 | traitlets==4.3.2 44 | wcwidth==0.1.7 45 | webencodings==0.5.1 46 | widgetsnbextension==2.0.1 47 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thanks for submitting an issue! 2 | 3 | * If this is a **feature request**, please make sure you explain the context, the goal, and why it is something that would go into bonobo core. Drafting some bits of spec is a good idea too, even if it's very draft-y. 4 | 5 | * If this is a **bug report**, make sure you describe the expected and actual behaviour, eventually some minimal code or steps to reproduce the problem. 6 | 7 | * If this is a **question**, please post it on slack overflow, and simply paste the question URL in an issue or in the slack channel. Also, when you get an answer, please consider contributing back the bits of documentation you would have loved to find in the first place. 8 | 9 | ## Versions 10 | 11 | * Bonobo version: 12 | 13 | `bonobo version --all` 14 | 15 | * Python version: 16 | 17 | `python -c "import platform,sys; print(platform.python_implementation() + ' ' + sys.version)"` 18 | 19 | * Platform: 20 | 21 | `uname -a` 22 | `/etc/lsb-release` 23 | ... 24 | 25 | -------------------------------------------------------------------------------- /bonobo/util/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Util API, located under the :mod:`bonobo.util` namespace, contains helpers functions and decorators to work with 3 | and inspect transformations, graphs, and nodes. 4 | 5 | """ 6 | from bonobo.util.collections import cast, ensure_tuple, sortedlist, tuplize 7 | from bonobo.util.compat import deprecated, deprecated_alias 8 | from bonobo.util.inspect import ( 9 | inspect_node, isconfigurable, isconfigurabletype, iscontextprocessor, isdict, ismethod, isoption, istuple, istype 10 | ) 11 | from bonobo.util.objects import ValueHolder, get_attribute_or_create, get_name 12 | 13 | # Bonobo's util API 14 | __all__ = [ 15 | "ValueHolder", 16 | "cast", 17 | "deprecated", 18 | "deprecated_alias", 19 | "ensure_tuple", 20 | "get_attribute_or_create", 21 | "get_name", 22 | "inspect_node", 23 | "isconfigurable", 24 | "isconfigurabletype", 25 | "iscontextprocessor", 26 | "isdict", 27 | "ismethod", 28 | "isoption", 29 | "istype", 30 | "sortedlist", 31 | "tuplize", 32 | ] 33 | -------------------------------------------------------------------------------- /docs/extension/selenium.rst: -------------------------------------------------------------------------------- 1 | Working with Selenium 2 | ===================== 3 | 4 | .. include:: _alpha.rst 5 | 6 | Writing web crawlers with Bonobo and Selenium is easy. 7 | 8 | First, install **bonobo-selenium**: 9 | 10 | .. code-block:: shell-session 11 | 12 | $ pip install bonobo-selenium 13 | 14 | The idea is to have one callable crawl one thing and delegate drill downs to callables further away in the chain. 15 | 16 | An example chain could be: 17 | 18 | .. graphviz:: 19 | 20 | digraph { 21 | rankdir = LR; 22 | login -> paginate -> list -> details -> "ExcelWriter(...)"; 23 | } 24 | 25 | Where each step would do the following: 26 | 27 | * `login()` is in charge to open an authenticated session in the browser. 28 | * `paginate()` open each page of a fictive list and pass it to next. 29 | * `list()` take every list item and yield it. 30 | * `details()` extract the data you're interested in. 31 | * ... and the writer saves it somewhere. 32 | 33 | Installation 34 | :::::::::::: 35 | 36 | Overview 37 | :::::::: 38 | 39 | Details 40 | ::::::: 41 | -------------------------------------------------------------------------------- /bonobo/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | .. data:: BEGIN 3 | 4 | **BEGIN** token marks the entrypoint of graphs, and all extractors will be connected to this node. 5 | 6 | Without this, it would be impossible for an execution to actually start anything, as it's the marker that tells 7 | |bonobo| which node to actually call when the execution starts. 8 | 9 | .. data:: NOT_MODIFIED 10 | 11 | **NOT_MODIFIED** is a special value you can return or yield from a transformation to tell bonobo to reuse 12 | the input data as output. 13 | 14 | As a convention, all loaders should return this, so loaders can be chained. 15 | 16 | .. data:: EMPTY 17 | 18 | Shortcut for "empty tuple". It's often much more clear to write (especially in a test) `write(EMPTY)` than 19 | `write(())`, although strictly equivalent. 20 | 21 | 22 | """ 23 | from bonobo.structs.tokens import Token 24 | from bonobo.util.envelopes import UnchangedEnvelope 25 | 26 | BEGIN = Token("Begin") 27 | END = Token("End") 28 | 29 | NOT_MODIFIED = UnchangedEnvelope() 30 | 31 | EMPTY = tuple() 32 | 33 | TICK_PERIOD = 0.2 34 | -------------------------------------------------------------------------------- /docs/contribute/release.rst: -------------------------------------------------------------------------------- 1 | Releases 2 | ======== 3 | 4 | WORK IN PROGRESS, THIS DOC IS UNFINISHED AND JUST RAW NOTES TO HELP ME RELEASING STUFF. 5 | 6 | How to make a patch release? 7 | :::::::::::::::::::::::::::: 8 | 9 | For now, reference at http://rdc.li/r 10 | 11 | Additional checklist: 12 | 13 | * make format 14 | 15 | How to make a minor or major release? 16 | ::::::::::::::::::::::::::::::::::::: 17 | 18 | Releases above patch level are more complex, because we did not find a way not to hardcode the version number in a bunch 19 | of files, and because a few dependant services (source control, continuous integration, code coverage, documentation 20 | builder ...) also depends on version numbers. 21 | 22 | Checklist: 23 | 24 | * Files 25 | * Github 26 | 27 | 28 | Recipes 29 | ::::::: 30 | 31 | Get current minor:: 32 | 33 | git semver | python -c 'import sys; print(".".join(sys.stdin.read().strip().split(".")[0:2]))' 34 | 35 | Open git with all files containing current minor:: 36 | 37 | ack `git semver | python -c 'import sys; print("\\\\.".join(sys.stdin.read().strip().split(".")[0:2]))'` | vim - 38 | 39 | -------------------------------------------------------------------------------- /bonobo/config/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The Config API, located under the :mod:`bonobo.config` namespace, contains all the tools you need to create 3 | configurable transformations, either class-based or function-based. 4 | 5 | """ 6 | 7 | from bonobo.config.configurables import Configurable 8 | from bonobo.config.functools import partial, transformation_factory 9 | from bonobo.config.options import Method, Option 10 | from bonobo.config.processors import ContextProcessor, use_context, use_context_processor, use_no_input, use_raw_input 11 | from bonobo.config.services import Container, Exclusive, Service, create_container, use 12 | from bonobo.util import deprecated_alias 13 | 14 | requires = deprecated_alias("requires", use) 15 | 16 | # Bonobo's Config API 17 | __all__ = [ 18 | "Configurable", 19 | "Container", 20 | "ContextProcessor", 21 | "Exclusive", 22 | "Method", 23 | "Option", 24 | "Service", 25 | "create_container", 26 | "partial", 27 | "requires", 28 | "transformation_factory", 29 | "use", 30 | "use_context", 31 | "use_context_processor", 32 | "use_no_input", 33 | "use_raw_input", 34 | ] 35 | -------------------------------------------------------------------------------- /bonobo/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | class Plugin: 2 | """ 3 | A plugin is an extension to the core behavior of bonobo. If you're writing transformations, you should not need 4 | to use this interface. 5 | 6 | For examples, you can read bonobo.plugins.console.ConsoleOutputPlugin, or bonobo.plugins.jupyter.JupyterOutputPlugin 7 | that respectively permits an interactive output on an ANSI console and a rich output in a jupyter notebook. Note 8 | that you most probably won't instanciate them by yourself at runtime, as it's the default behaviour of bonobo to use 9 | them if your in a compatible context (aka an interactive terminal for the console plugin, or a jupyter notebook for 10 | the notebook plugin.) 11 | 12 | Warning: THE PLUGIN API IS PRE-ALPHA AND WILL EVOLVE BEFORE 1.0, DO NOT RELY ON IT BEING STABLE! 13 | 14 | """ 15 | 16 | def register(self, dispatcher): 17 | """ 18 | :param dispatcher: whistle.EventDispatcher 19 | """ 20 | pass 21 | 22 | def unregister(self, dispatcher): 23 | """ 24 | :param dispatcher: whistle.EventDispatcher 25 | """ 26 | pass 27 | -------------------------------------------------------------------------------- /tests/util/test_collections.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bonobo.util import ensure_tuple, sortedlist 4 | from bonobo.util.collections import cast, tuple_or_const, tuplize 5 | 6 | 7 | def test_sortedlist(): 8 | l = sortedlist() 9 | l.insort(2) 10 | l.insort(1) 11 | l.insort(3) 12 | l.insort(2) 13 | assert l == [1, 2, 2, 3] 14 | 15 | 16 | def test_tuple_or_const(): 17 | assert tuple_or_const(()) == () 18 | assert tuple_or_const((1,)) == (1,) 19 | assert tuple_or_const((1, 2)) == (1, 2) 20 | assert tuple_or_const([1, 2]) == (1, 2) 21 | assert tuple_or_const("aaa") == ("aaa",) 22 | 23 | 24 | def test_ensure_tuple(): 25 | assert ensure_tuple("a") == ("a",) 26 | assert ensure_tuple(("a",)) == ("a",) 27 | assert ensure_tuple(()) is () 28 | 29 | 30 | @pytest.mark.parametrize("tuplize", [tuplize, cast(tuple)]) 31 | def test_tuplize(tuplize): 32 | tuplized_lambda = tuplize(lambda: [1, 2, 3]) 33 | assert tuplized_lambda() == (1, 2, 3) 34 | 35 | @tuplize 36 | def some_generator(): 37 | yield "c" 38 | yield "b" 39 | yield "a" 40 | 41 | assert some_generator() == ("c", "b", "a") 42 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | svg { 2 | border: 2px solid green 3 | } 4 | 5 | div.related { 6 | width: 940px; 7 | margin: 30px auto 0 auto; 8 | } 9 | 10 | @media screen and (max-width: 875px) { 11 | div.related { 12 | visibility: hidden; 13 | display: none; 14 | } 15 | } 16 | 17 | .brand { 18 | font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; 19 | font-size: 0.9em; 20 | } 21 | 22 | div.sphinxsidebar h3 { 23 | margin: 30px 0 10px 0; 24 | } 25 | 26 | div.admonition p.admonition-title { 27 | font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; 28 | } 29 | 30 | div.sphinxsidebarwrapper { 31 | padding: 0; 32 | } 33 | 34 | div.note { 35 | border: 0; 36 | } 37 | 38 | .last { 39 | margin-bottom: 0 !important; 40 | } 41 | 42 | div.admonition { 43 | padding: 16px; 44 | } 45 | 46 | pre { 47 | padding: 16px; 48 | border: 1px solid #ddd; 49 | background-color: #fafafa; 50 | } 51 | 52 | .section > dl { 53 | border: 1px solid #ddd; 54 | background-color: #fafafa; 55 | margin: 16px 0; 56 | padding: 16px; 57 | } -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | History 2 | ======= 3 | 4 | |bonobo| is a full rewrite of **rdc.etl**, aimed at modern python versions (3.5+). 5 | 6 | **rdc.etl** is a now deprecated python 2.7+ ETL library for which development started in 2012, and was opensourced in 7 | 2013 (see `first commit `_). 8 | 9 | Although the first commit in |bonobo| happened late 2016, it's based on a lot of code, learnings and experience that 10 | happened because of **rdc.etl**. 11 | 12 | It would have been counterproductive to migrate the same codebase: 13 | 14 | * a lot of mistakes were impossible to fix in a backward compatible way (for example, transformations were stateful, 15 | making them more complicated to write and impossible to reuse, a lot of effort was used to make the components have 16 | multi-inputs and multi-outputs, although in 99% of the case it's useless, etc.). 17 | * we also wanted to develop something that took advantage of modern python versions, hence the choice of 3.5+. 18 | 19 | **rdc.etl** still runs data transformation jobs, in both python 2.7 and 3, and we reuse whatever is possible to 20 | continue building |bonobo|. 21 | 22 | -------------------------------------------------------------------------------- /benchmarks/parameters.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compare passing a dict to passing a dict as kwargs to a stupid transformation 3 | 4 | Last results (1 mill calls): 5 | 6 | j1 1.5026444319955772 7 | k1 1.8377482700016117 8 | j2 1.1962292949901894 9 | k2 1.5545833489886718 10 | j3 1.0014333260041894 11 | k3 1.353256585993222 12 | 13 | """ 14 | import json 15 | import timeit 16 | 17 | 18 | def j1(d): 19 | return {"prepend": "foo", **d, "append": "bar"} 20 | 21 | 22 | def k1(**d): 23 | return {"prepend": "foo", **d, "append": "bar"} 24 | 25 | 26 | def j2(d): 27 | return {**d} 28 | 29 | 30 | def k2(**d): 31 | return {**d} 32 | 33 | 34 | def j3(d): 35 | return None 36 | 37 | 38 | def k3(**d): 39 | return None 40 | 41 | 42 | if __name__ == "__main__": 43 | import timeit 44 | 45 | with open("person.json") as f: 46 | json_data = json.load(f) 47 | 48 | for i in 1, 2, 3: 49 | print( 50 | "j{}".format(i), timeit.timeit("j{}({!r})".format(i, json_data), setup="from __main__ import j{}".format(i)) 51 | ) 52 | print( 53 | "k{}".format(i), 54 | timeit.timeit("k{}(**{!r})".format(i, json_data), setup="from __main__ import k{}".format(i)), 55 | ) 56 | -------------------------------------------------------------------------------- /bonobo/util/compat.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import warnings 3 | 4 | 5 | def deprecated_alias(alias, func): 6 | @functools.wraps(func) 7 | def new_func(*args, **kwargs): 8 | warnings.simplefilter("always", DeprecationWarning) # turn off filter 9 | warnings.warn( 10 | "Call to deprecated function alias {}, use {} instead.".format(alias, func.__name__), 11 | category=DeprecationWarning, 12 | stacklevel=2, 13 | ) 14 | warnings.simplefilter("default", DeprecationWarning) # reset filter 15 | return func(*args, **kwargs) 16 | 17 | return new_func 18 | 19 | 20 | def deprecated(func): 21 | """This is a decorator which can be used to mark functions 22 | as deprecated. It will result in a warning being emmitted 23 | when the function is used.""" 24 | 25 | @functools.wraps(func) 26 | def new_func(*args, **kwargs): 27 | warnings.simplefilter("always", DeprecationWarning) # turn off filter 28 | warnings.warn( 29 | "Call to deprecated function {}.".format(func.__name__), category=DeprecationWarning, stacklevel=2 30 | ) 31 | warnings.simplefilter("default", DeprecationWarning) # reset filter 32 | return func(*args, **kwargs) 33 | 34 | return new_func 35 | -------------------------------------------------------------------------------- /bonobo/util/statistics.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | class WithStatistics: 5 | def __init__(self, *names): 6 | self.statistics_names = names 7 | self.statistics = {name: 0 for name in names} 8 | 9 | def get_statistics(self, *args, **kwargs): 10 | return ((name, self.statistics[name]) for name in self.statistics_names) 11 | 12 | def get_statistics_as_string(self, *args, **kwargs): 13 | stats = tuple("{0}={1}".format(name, cnt) for name, cnt in self.get_statistics(*args, **kwargs) if cnt > 0) 14 | return (kwargs.get("prefix", "") + " ".join(stats)) if len(stats) else "" 15 | 16 | def increment(self, name, *, amount=1): 17 | self.statistics[name] += amount 18 | 19 | 20 | class Timer: 21 | """ 22 | Context manager used to time execution of stuff. 23 | """ 24 | 25 | def __enter__(self): 26 | self.__start = time.time() 27 | return self 28 | 29 | def __exit__(self, type=None, value=None, traceback=None): # lgtm [py/special-method-wrong-signature] 30 | # Error handling here 31 | self.__finish = time.time() 32 | 33 | @property 34 | def duration(self): 35 | return self.__finish - self.__start 36 | 37 | def __str__(self): 38 | return str(int(self.duration * 1000) / 1000.0) + "s" 39 | -------------------------------------------------------------------------------- /bonobo/plugins/jupyter.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from bonobo.contrib.jupyter.widget import BonoboWidget 4 | from bonobo.execution import events 5 | from bonobo.plugins import Plugin 6 | 7 | try: 8 | import IPython.core.display 9 | except ImportError as e: 10 | logging.exception( 11 | "You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the " 12 | 'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a ' 13 | "specific version by yourself." 14 | ) 15 | 16 | 17 | class JupyterOutputPlugin(Plugin): 18 | def register(self, dispatcher): 19 | dispatcher.add_listener(events.START, self.setup) 20 | dispatcher.add_listener(events.TICK, self.tick) 21 | dispatcher.add_listener(events.STOPPED, self.tick) 22 | 23 | def unregister(self, dispatcher): 24 | dispatcher.remove_listener(events.STOPPED, self.tick) 25 | dispatcher.remove_listener(events.TICK, self.tick) 26 | dispatcher.remove_listener(events.START, self.setup) 27 | 28 | def setup(self, event): 29 | self.widget = BonoboWidget() 30 | IPython.core.display.display(self.widget) 31 | 32 | def tick(self, event): 33 | self.widget.value = [event.context[i].as_dict() for i in event.context.graph.topologically_sorted_indexes] 34 | -------------------------------------------------------------------------------- /bonobo/commands/download.py: -------------------------------------------------------------------------------- 1 | import io 2 | import re 3 | 4 | import requests 5 | 6 | import bonobo 7 | from bonobo.commands import BaseCommand 8 | 9 | EXAMPLES_BASE_URL = "https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/" 10 | """The URL to our git repository, in raw mode.""" 11 | 12 | 13 | class DownloadCommand(BaseCommand): 14 | def handle(self, *, path, **options): 15 | if not path.startswith("examples"): 16 | raise ValueError("Download command currently supports examples only") 17 | examples_path = re.sub("^examples/", "", path) 18 | output_path = bonobo.get_examples_path(examples_path) 19 | with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, "wb") as fout: 20 | for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE): 21 | fout.write(chunk) 22 | self.logger.info("Download saved to {}".format(output_path)) 23 | 24 | def add_arguments(self, parser): 25 | parser.add_argument("path", help="The relative path of the thing to download.") 26 | 27 | 28 | def _open_url(url): 29 | """Open a HTTP connection to the URL and return a file-like object.""" 30 | response = requests.get(url, stream=True) 31 | if response.status_code != 200: 32 | raise IOError("Unable to download {}, HTTP {}".format(url, response.status_code)) 33 | return response 34 | -------------------------------------------------------------------------------- /benchmarks/person.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "http://schema.org", 3 | "@type": "MusicEvent", 4 | "location": { 5 | "@type": "MusicVenue", 6 | "name": "Chicago Symphony Center", 7 | "address": "220 S. Michigan Ave, Chicago, Illinois, USA" 8 | }, 9 | "name": "Shostakovich Leningrad", 10 | "offers": { 11 | "@type": "Offer", 12 | "url": "/examples/ticket/12341234", 13 | "price": "40", 14 | "priceCurrency": "USD", 15 | "availability": "http://schema.org/InStock" 16 | }, 17 | "performer": [ 18 | { 19 | "@type": "MusicGroup", 20 | "name": "Chicago Symphony Orchestra", 21 | "sameAs": [ 22 | "http://cso.org/", 23 | "http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra" 24 | ] 25 | }, 26 | { 27 | "@type": "Person", 28 | "image": "/examples/jvanzweden_s.jpg", 29 | "name": "Jaap van Zweden", 30 | "sameAs": "http://www.jaapvanzweden.com/" 31 | } 32 | ], 33 | "startDate": "2014-05-23T20:00", 34 | "workPerformed": [ 35 | { 36 | "@type": "CreativeWork", 37 | "name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes", 38 | "sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes" 39 | }, 40 | { 41 | "@type": "CreativeWork", 42 | "name": "Shostakovich Symphony No. 7 (Leningrad)", 43 | "sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)" 44 | } 45 | ] 46 | } 47 | -------------------------------------------------------------------------------- /tests/commands/test_download.py: -------------------------------------------------------------------------------- 1 | import io 2 | from unittest.mock import patch 3 | 4 | import pytest 5 | 6 | from bonobo.commands.download import EXAMPLES_BASE_URL 7 | from bonobo.util.testing import all_runners 8 | 9 | 10 | @all_runners 11 | def test_download_works_for_examples(runner): 12 | expected_bytes = b"hello world" 13 | 14 | class MockResponse(object): 15 | def __init__(self): 16 | self.status_code = 200 17 | 18 | def iter_content(self, *args, **kwargs): 19 | return [expected_bytes] 20 | 21 | def __enter__(self): 22 | return self 23 | 24 | def __exit__(self, *args, **kwargs): 25 | pass 26 | 27 | fout = io.BytesIO() 28 | fout.close = lambda: None 29 | 30 | with patch("bonobo.commands.download._open_url") as mock_open_url, patch( 31 | "bonobo.commands.download.open" 32 | ) as mock_open: 33 | mock_open_url.return_value = MockResponse() 34 | mock_open.return_value = fout 35 | runner("download", "examples/datasets/coffeeshops.txt") 36 | expected_url = EXAMPLES_BASE_URL + "datasets/coffeeshops.txt" 37 | mock_open_url.assert_called_once_with(expected_url) 38 | 39 | assert fout.getvalue() == expected_bytes 40 | 41 | 42 | @all_runners 43 | def test_download_fails_non_example(runner): 44 | with pytest.raises(ValueError): 45 | runner("download", "something/entirely/different.txt") 46 | -------------------------------------------------------------------------------- /tests/plugins/test_console.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | from whistle import EventDispatcher 4 | 5 | import bonobo 6 | from bonobo.execution import events 7 | from bonobo.execution.contexts.graph import GraphExecutionContext 8 | from bonobo.plugins.console import ConsoleOutputPlugin 9 | 10 | 11 | def test_register_unregister(): 12 | plugin = ConsoleOutputPlugin() 13 | dispatcher = EventDispatcher() 14 | 15 | plugin.register(dispatcher) 16 | assert plugin.setup in dispatcher.get_listeners(events.START) 17 | assert plugin.tick in dispatcher.get_listeners(events.TICK) 18 | assert plugin.teardown in dispatcher.get_listeners(events.STOPPED) 19 | plugin.unregister(dispatcher) 20 | assert plugin.setup not in dispatcher.get_listeners(events.START) 21 | assert plugin.tick not in dispatcher.get_listeners(events.TICK) 22 | assert plugin.teardown not in dispatcher.get_listeners(events.STOPPED) 23 | 24 | 25 | def test_one_pass(): 26 | plugin = ConsoleOutputPlugin() 27 | dispatcher = EventDispatcher() 28 | plugin.register(dispatcher) 29 | 30 | graph = bonobo.Graph() 31 | context = MagicMock(spec=GraphExecutionContext(graph)) 32 | 33 | dispatcher.dispatch(events.START, events.ExecutionEvent(context)) 34 | dispatcher.dispatch(events.TICK, events.ExecutionEvent(context)) 35 | dispatcher.dispatch(events.STOPPED, events.ExecutionEvent(context)) 36 | 37 | plugin.unregister(dispatcher) 38 | -------------------------------------------------------------------------------- /tests/nodes/io/test_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | import pytest 4 | 5 | from bonobo import PickleReader, PickleWriter 6 | from bonobo.constants import EMPTY 7 | from bonobo.execution.contexts.node import NodeExecutionContext 8 | from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester 9 | 10 | pickle_tester = FilesystemTester("pkl", mode="wb") 11 | pickle_tester.input_data = pickle.dumps([["a", "b", "c"], ["a foo", "b foo", "c foo"], ["a bar", "b bar", "c bar"]]) 12 | 13 | 14 | def test_write_pickled_dict_to_file(tmpdir): 15 | fs, filename, services = pickle_tester.get_services_for_writer(tmpdir) 16 | 17 | with NodeExecutionContext(PickleWriter(filename), services=services) as context: 18 | context.write_sync({"foo": "bar"}, {"foo": "baz", "ignore": "this"}) 19 | 20 | with fs.open(filename, "rb") as fp: 21 | assert pickle.loads(fp.read()) == {"foo": "bar"} 22 | 23 | with pytest.raises(AttributeError): 24 | getattr(context, "file") 25 | 26 | 27 | def test_read_pickled_list_from_file(tmpdir): 28 | fs, filename, services = pickle_tester.get_services_for_reader(tmpdir) 29 | 30 | with BufferingNodeExecutionContext(PickleReader(filename), services=services) as context: 31 | context.write_sync(EMPTY) 32 | 33 | output = context.get_buffer() 34 | assert context.get_output_fields() == ("a", "b", "c") 35 | assert output == [("a foo", "b foo", "c foo"), ("a bar", "b bar", "c bar")] 36 | -------------------------------------------------------------------------------- /bonobo/util/api.py: -------------------------------------------------------------------------------- 1 | from bonobo.util import get_name 2 | 3 | 4 | class ApiHelper: 5 | # TODO __all__ kwarg only 6 | def __init__(self, __all__): 7 | self.__all__ = __all__ 8 | 9 | def register(self, x, graph=False): 10 | """Register a function as being part of an API, then returns the original function.""" 11 | 12 | if graph: 13 | # This function must comply to the "graph" API interface, meaning it can bahave like bonobo.run. 14 | from inspect import signature 15 | 16 | parameters = list(signature(x).parameters) 17 | required_parameters = {"plugins", "services", "strategy"} 18 | assert ( 19 | len(parameters) > 0 and parameters[0] == "graph" 20 | ), 'First parameter of a graph api function must be "graph".' 21 | assert ( 22 | required_parameters.intersection(parameters) == required_parameters 23 | ), "Graph api functions must define the following parameters: " + ", ".join(sorted(required_parameters)) 24 | 25 | self.__all__.append(get_name(x)) 26 | return x 27 | 28 | def register_graph(self, x): 29 | return self.register(x, graph=True) 30 | 31 | def register_group(self, *args, check=None): 32 | check = set(check) if check else None 33 | for attr in args: 34 | self.register(attr) 35 | if check: 36 | check.remove(get_name(attr)) 37 | assert not (check and len(check)) 38 | -------------------------------------------------------------------------------- /docs/reference/commands.rst: -------------------------------------------------------------------------------- 1 | Command-line 2 | ============ 3 | 4 | 5 | Bonobo Convert 6 | :::::::::::::: 7 | 8 | Build a simple bonobo graph with one reader and one writer, then execute it, allowing to use bonobo in "no code" mode 9 | for simple file format conversions. 10 | 11 | Syntax: `bonobo convert [-r reader] input_filename [-w writer] output_filename` 12 | 13 | .. todo:: 14 | 15 | add a way to override default options of reader/writers, add a way to add "filters", for example this could be used 16 | to read from csv and write to csv too (or other format) but adding a geocoder filter that would add some fields. 17 | 18 | 19 | Bonobo Inspect 20 | :::::::::::::: 21 | 22 | Inspects a bonobo graph source files. For now, only support graphviz output. 23 | 24 | Syntax: `bonobo inspect [--graph|-g] filename` 25 | 26 | Requires graphviz if you want to generate an actual graph picture, although the command itself depends on nothing. 27 | 28 | 29 | Bonobo Run 30 | :::::::::: 31 | 32 | Run a transformation graph. 33 | 34 | Syntax: `bonobo run [-c cmd | -m mod | file | -] [arg]` 35 | 36 | .. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too. 37 | 38 | 39 | Bonobo RunC 40 | ::::::::::: 41 | 42 | Run a transformation graph in a docker container. 43 | 44 | Syntax: `bonobo runc [-c cmd | -m mod | file | -] [arg]` 45 | 46 | .. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too. 47 | 48 | Requires `bonobo-docker`, install with `docker` extra: `pip install bonobo[docker]`. 49 | 50 | -------------------------------------------------------------------------------- /bonobo/nodes/throttle.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | 4 | from bonobo.config import Configurable, ContextProcessor, Method, Option 5 | 6 | 7 | class RateLimitBucket(threading.Thread): 8 | daemon = True 9 | 10 | @property 11 | def stopped(self): 12 | return self._stop_event.is_set() 13 | 14 | def __init__(self, initial=1, period=1, amount=1): 15 | super(RateLimitBucket, self).__init__() 16 | self.semaphore = threading.BoundedSemaphore(initial) 17 | self.amount = amount 18 | self.period = period 19 | 20 | self._stop_event = threading.Event() 21 | 22 | def stop(self): 23 | self._stop_event.set() 24 | 25 | def run(self): 26 | while not self.stopped: 27 | time.sleep(self.period) 28 | for _ in range(self.amount): 29 | self.semaphore.release() 30 | 31 | def wait(self): 32 | return self.semaphore.acquire() 33 | 34 | 35 | class RateLimited(Configurable): 36 | handler = Method() 37 | 38 | initial = Option(int, positional=True, default=1) 39 | period = Option(int, positional=True, default=1) 40 | amount = Option(int, positional=True, default=1) 41 | 42 | @ContextProcessor 43 | def bucket(self, context): 44 | bucket = RateLimitBucket(self.initial, self.amount, self.period) 45 | bucket.start() 46 | yield bucket 47 | bucket.stop() 48 | bucket.join() 49 | 50 | def __call__(self, bucket, *args, **kwargs): 51 | bucket.wait() 52 | return self.handler(*args, **kwargs) 53 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/src/bonobo.js: -------------------------------------------------------------------------------- 1 | var widgets = require('jupyter-js-widgets'); 2 | var _ = require('underscore'); 3 | 4 | // Custom Model. Custom widgets models must at least provide default values 5 | // for model attributes, including `_model_name`, `_view_name`, `_model_module` 6 | // and `_view_module` when different from the base class. 7 | // 8 | // When serialiazing entire widget state for embedding, only values different from the 9 | // defaults will be specified. 10 | 11 | const BonoboModel = widgets.DOMWidgetModel.extend({ 12 | defaults: _.extend({}, widgets.DOMWidgetModel.prototype.defaults, { 13 | _model_name: 'BonoboModel', 14 | _view_name: 'BonoboView', 15 | _model_module: 'bonobo', 16 | _view_module: 'bonobo', 17 | value: [] 18 | }) 19 | }); 20 | 21 | 22 | // Custom View. Renders the widget model. 23 | const BonoboView = widgets.DOMWidgetView.extend({ 24 | render: function () { 25 | this.value_changed(); 26 | this.model.on('change:value', this.value_changed, this); 27 | }, 28 | 29 | value_changed: function () { 30 | this.$el.html( 31 | '
' + this.model.get('value').map((key, i) => { 32 | return `` 33 | }).join('\n') + '
${key.status}${key.name}${key.stats}${key.flags}
' 34 | ); 35 | }, 36 | }); 37 | 38 | 39 | module.exports = { 40 | BonoboModel: BonoboModel, 41 | BonoboView: BonoboView 42 | }; 43 | -------------------------------------------------------------------------------- /bonobo/commands/templates/default.py-tpl: -------------------------------------------------------------------------------- 1 | import bonobo 2 | 3 | 4 | def extract(): 5 | """Placeholder, change, rename, remove... """ 6 | yield 'hello' 7 | yield 'world' 8 | 9 | 10 | def transform(*args): 11 | """Placeholder, change, rename, remove... """ 12 | yield tuple( 13 | map(str.title, args) 14 | ) 15 | 16 | 17 | def load(*args): 18 | """Placeholder, change, rename, remove... """ 19 | print(*args) 20 | 21 | 22 | def get_graph(**options): 23 | """ 24 | This function builds the graph that needs to be executed. 25 | 26 | :return: bonobo.Graph 27 | 28 | """ 29 | graph = bonobo.Graph() 30 | ( 31 | graph 32 | >> extract 33 | >> transform 34 | >> load 35 | ) 36 | return graph 37 | 38 | 39 | def get_services(**options): 40 | """ 41 | This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo 42 | for runtime injection. 43 | 44 | It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just 45 | let the framework define them. You can also define your own services and naming is up to you. 46 | 47 | :return: dict 48 | """ 49 | return {} 50 | 51 | 52 | # The __main__ block actually execute the graph. 53 | if __name__ == '__main__': 54 | parser = bonobo.get_argument_parser() 55 | with bonobo.parse_args(parser) as options: 56 | bonobo.run( 57 | get_graph(**options), 58 | services=get_services(**options) 59 | ) 60 | -------------------------------------------------------------------------------- /docs/extension/jupyter.rst: -------------------------------------------------------------------------------- 1 | Working with Jupyter 2 | ==================== 3 | 4 | .. include:: _beta.rst 5 | 6 | There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so 7 | you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget. 8 | 9 | Installation 10 | :::::::::::: 11 | 12 | Install `bonobo` with the **jupyter** extra:: 13 | 14 | pip install bonobo[jupyter] 15 | 16 | Install the jupyter extension:: 17 | 18 | jupyter nbextension enable --py --sys-prefix widgetsnbextension 19 | jupyter nbextension enable --py --sys-prefix bonobo.contrib.jupyter 20 | 21 | Development 22 | ::::::::::: 23 | 24 | You should favor yarn over npm to install node packages. If you prefer to use npm, it's up to you to adapt the code. 25 | 26 | To install the widget for development, make sure you're using an editable install of bonobo (see install document):: 27 | 28 | jupyter nbextension install --py --symlink --sys-prefix bonobo.contrib.jupyter 29 | jupyter nbextension enable --py --sys-prefix bonobo.contrib.jupyter 30 | 31 | If you want to change the javascript, you should run webpack in watch mode in some terminal:: 32 | 33 | cd bonobo/ext/jupyter/js 34 | yarn install 35 | ./node_modules/.bin/webpack --watch 36 | 37 | To compile the widget into a distributable version (which gets packaged on PyPI when a release is made), just run 38 | webpack:: 39 | 40 | ./node_modules/.bin/webpack 41 | 42 | 43 | Source code 44 | ::::::::::: 45 | 46 | https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/jupyter 47 | -------------------------------------------------------------------------------- /tests/nodes/io/test_file.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from bonobo import FileReader, FileWriter 4 | from bonobo.constants import EMPTY 5 | from bonobo.execution.contexts.node import NodeExecutionContext 6 | from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester 7 | 8 | txt_tester = FilesystemTester("txt") 9 | txt_tester.input_data = "Hello\nWorld\n" 10 | 11 | 12 | def test_file_writer_contextless(tmpdir): 13 | fs, filename, services = txt_tester.get_services_for_writer(tmpdir) 14 | 15 | with FileWriter(path=filename).open(fs) as fp: 16 | fp.write("Yosh!") 17 | 18 | with fs.open(filename) as fp: 19 | assert fp.read() == "Yosh!" 20 | 21 | 22 | @pytest.mark.parametrize( 23 | "lines,output", 24 | [(("ACME",), "ACME"), (("Foo", "Bar", "Baz"), "Foo\nBar\nBaz")], # one line... # more than one line... 25 | ) 26 | def test_file_writer_in_context(tmpdir, lines, output): 27 | fs, filename, services = txt_tester.get_services_for_writer(tmpdir) 28 | 29 | with NodeExecutionContext(FileWriter(path=filename), services=services) as context: 30 | context.write_sync(*lines) 31 | 32 | with fs.open(filename) as fp: 33 | assert fp.read() == output 34 | 35 | 36 | def test_file_reader(tmpdir): 37 | fs, filename, services = txt_tester.get_services_for_reader(tmpdir) 38 | 39 | with BufferingNodeExecutionContext(FileReader(path=filename), services=services) as context: 40 | context.write_sync(EMPTY) 41 | 42 | output = context.get_buffer() 43 | assert len(output) == 2 44 | assert output[0] == ("Hello",) 45 | assert output[1] == ("World",) 46 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/coffeeshops.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import bonobo 4 | from bonobo import examples 5 | from bonobo.contrib.opendatasoft import OpenDataSoftAPI as ODSReader 6 | from bonobo.examples import get_services 7 | from bonobo.structs.graphs import PartialGraph 8 | 9 | 10 | def get_graph(graph=None, *, _limit=(), _print=()): 11 | """ 12 | Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, 13 | reorders the fields and formats to json and csv files. 14 | 15 | """ 16 | graph = graph or bonobo.Graph() 17 | 18 | producer = ( 19 | graph.get_cursor() 20 | >> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr") 21 | >> PartialGraph(*_limit) 22 | >> bonobo.UnpackItems(0) 23 | >> bonobo.Rename(name="nom_du_cafe", address="adresse", zipcode="arrondissement") 24 | >> bonobo.Format(city="Paris", country="France") 25 | >> bonobo.OrderFields(["name", "address", "zipcode", "city", "country", "geometry", "geoloc"]) 26 | >> PartialGraph(*_print) 27 | ) 28 | 29 | # Comma separated values. 30 | graph.get_cursor(producer.output) >> bonobo.CsvWriter( 31 | "coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter="," 32 | ) 33 | 34 | # Standard JSON 35 | graph.get_cursor(producer.output) >> bonobo.JsonWriter(path="coffeeshops.json") 36 | 37 | # Line-delimited JSON 38 | graph.get_cursor(producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson") 39 | 40 | return graph 41 | 42 | 43 | if __name__ == "__main__": 44 | sys.exit(examples.run(get_graph, get_services)) 45 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/fablabs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extracts a list of fablabs in the world, restricted to the ones in france, then format its both for a nice console output 3 | and a flat txt file. 4 | 5 | .. graphviz:: 6 | 7 | digraph { 8 | rankdir = LR; 9 | stylesheet = "../_static/graphs.css"; 10 | 11 | BEGIN [shape="point"]; 12 | BEGIN -> "ODS()" -> "normalize" -> "filter_france" -> "Tee()" -> "JsonWriter()"; 13 | } 14 | 15 | """ 16 | 17 | import json 18 | import sys 19 | 20 | import bonobo 21 | from bonobo import examples 22 | from bonobo.contrib.opendatasoft import OpenDataSoftAPI 23 | from bonobo.examples import get_services 24 | 25 | try: 26 | import pycountry 27 | except ImportError as exc: 28 | raise ImportError('You must install package "pycountry" to run this example.') from exc 29 | 30 | API_DATASET = "fablabs@public-us" 31 | ROWS = 100 32 | 33 | 34 | def _getlink(x): 35 | return x.get("url", None) 36 | 37 | 38 | def normalize(row): 39 | result = { 40 | **row, 41 | "links": list(filter(None, map(_getlink, json.loads(row.get("links"))))), 42 | "country": pycountry.countries.get(alpha_2=row.get("country_code", "").upper()).name, 43 | } 44 | return result 45 | 46 | 47 | def get_graph(graph=None, *, _limit=(), _print=()): 48 | graph = graph or bonobo.Graph() 49 | graph.add_chain( 50 | OpenDataSoftAPI(dataset=API_DATASET), 51 | *_limit, 52 | normalize, 53 | bonobo.UnpackItems(0), 54 | *_print, 55 | bonobo.JsonWriter(path="fablabs.json"), 56 | ) 57 | return graph 58 | 59 | 60 | if __name__ == "__main__": 61 | sys.exit(examples.run(get_graph, get_services)) 62 | -------------------------------------------------------------------------------- /docs/guide/debugging.rst: -------------------------------------------------------------------------------- 1 | Debugging 2 | ========= 3 | 4 | .. note:: 5 | 6 | This document writing is in progress, but its content should be correct (but succint). 7 | 8 | Using a debugger (pdb...) 9 | ::::::::::::::::::::::::: 10 | 11 | Using a debugger works (as in any python piece of code), but you must be aware that each node runs in a separate thread, 12 | which means a few things: 13 | 14 | * If a breakpoint happens in a thread, then this thread will stop, but all other threads will continue running. This 15 | can be especially annoying if you try to use the pdb REPL for example, as your prompt will be overriden a few 16 | times/second by the current excution statistics. 17 | 18 | To avoid that, you can run bonobo with `QUIET=1` in environment, to hide statistics. 19 | 20 | * If your breakpoint never happens (although it's at the very beginning of your transformation), it may mean that 21 | something happens out of the transform. The :class:`bonobo.execution.contexts.NodeExecutionContext` instance that 22 | surrounds your transformation may be stuck in its `while True: transform()` loop. 23 | 24 | Break one level higher 25 | 26 | 27 | Using printing statements 28 | ::::::::::::::::::::::::: 29 | 30 | Of course, you can :obj:`print` things. 31 | 32 | You can even add :obj:`print` statements in graphs, to :obj:`print` once per row. 33 | 34 | A better :obj:`print` is available though, suitable for both flow-based data processing and human eyes. 35 | Check :class:`bonobo.PrettyPrinter`. 36 | 37 | 38 | Inspecting graphs 39 | ::::::::::::::::: 40 | 41 | * Using the console: `bonobo inspect --graph`. 42 | * Using Jupyter notebook: install the extension and just display a graph. 43 | 44 | 45 | .. include:: _next.rst 46 | -------------------------------------------------------------------------------- /tests/commands/test_run.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest.mock import patch 3 | 4 | from bonobo import get_examples_path 5 | from bonobo.util.testing import all_runners 6 | 7 | 8 | @all_runners 9 | def test_run(runner): 10 | out, err = runner("run", "--quiet", get_examples_path("types/strings.py")) 11 | out = out.split("\n") 12 | assert out[0].startswith("Foo ") 13 | assert out[1].startswith("Bar ") 14 | assert out[2].startswith("Baz ") 15 | 16 | 17 | @all_runners 18 | def test_run_module(runner): 19 | out, err = runner("run", "--quiet", "-m", "bonobo.examples.types.strings") 20 | out = out.split("\n") 21 | assert out[0].startswith("Foo ") 22 | assert out[1].startswith("Bar ") 23 | assert out[2].startswith("Baz ") 24 | 25 | 26 | @all_runners 27 | def test_run_path(runner): 28 | out, err = runner("run", "--quiet", get_examples_path("types")) 29 | out = out.split("\n") 30 | assert out[0].startswith("Foo ") 31 | assert out[1].startswith("Bar ") 32 | assert out[2].startswith("Baz ") 33 | 34 | 35 | @all_runners 36 | def test_install_requirements_for_dir(runner): 37 | dirname = get_examples_path("types") 38 | with patch("bonobo.commands.run._install_requirements") as install_mock: 39 | runner("run", "--install", dirname) 40 | install_mock.assert_called_once_with(os.path.join(dirname, "requirements.txt")) 41 | 42 | 43 | @all_runners 44 | def test_install_requirements_for_file(runner): 45 | dirname = get_examples_path("types") 46 | with patch("bonobo.commands.run._install_requirements") as install_mock: 47 | runner("run", "--install", os.path.join(dirname, "strings.py")) 48 | install_mock.assert_called_once_with(os.path.join(dirname, "requirements.txt")) 49 | -------------------------------------------------------------------------------- /bonobo/execution/strategies/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Execution strategies define how an actual job execution will happen. Default and recommended strategy is "threadpool", 3 | for now, which leverage a :obj:`concurrent.futures.ThreadPoolExecutor` to run each node in a separate thread. 4 | 5 | In the future, the two strategies that would really benefit bonobo are subprocess and dask/dask.distributed. Please be 6 | at home if you want to give it a shot. 7 | 8 | """ 9 | from bonobo.execution.strategies.executor import ( 10 | AsyncThreadPoolExecutorStrategy, ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy 11 | ) 12 | from bonobo.execution.strategies.naive import NaiveStrategy 13 | 14 | __all__ = ["create_strategy"] 15 | 16 | STRATEGIES = { 17 | "naive": NaiveStrategy, 18 | "processpool": ProcessPoolExecutorStrategy, 19 | "threadpool": ThreadPoolExecutorStrategy, 20 | "aio_threadpool": AsyncThreadPoolExecutorStrategy, 21 | } 22 | 23 | DEFAULT_STRATEGY = "threadpool" 24 | 25 | 26 | def create_strategy(name=None): 27 | """ 28 | Create a strategy, or just returns it if it's already one. 29 | 30 | :param name: 31 | :return: Strategy 32 | """ 33 | import logging 34 | from bonobo.execution.strategies.base import Strategy 35 | 36 | if isinstance(name, Strategy): 37 | return name 38 | 39 | if name is None: 40 | name = DEFAULT_STRATEGY 41 | 42 | logging.debug("Creating execution strategy {!r}...".format(name)) 43 | 44 | try: 45 | factory = STRATEGIES[name] 46 | except KeyError as exc: 47 | raise RuntimeError( 48 | "Invalid strategy {}. Available choices: {}.".format(repr(name), ", ".join(sorted(STRATEGIES.keys()))) 49 | ) from exc 50 | 51 | return factory() 52 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/static/passwd.txt: -------------------------------------------------------------------------------- 1 | root:x:0:0:root:/root:/bin/bash 2 | daemon:x:105:1:daemon:/usr/sbin:/usr/sbin/nologin 3 | bin:x:2:2:bin:/bin:/usr/sbin/nologin 4 | sys:x:3:3:sys:/dev:/usr/sbin/nologin 5 | sync:x:4:65534:sync:/bin:/bin/sync 6 | games:x:5:60:games:/usr/games:/usr/sbin/nologin 7 | man:x:6:12:man:/var/cache/man:/usr/sbin/nologin 8 | lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin 9 | mail:x:0:8:mail:/var/mail:/usr/sbin/nologin 10 | news:x:9:9:news:/var/spool/news:/usr/sbin/nologin 11 | uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin 12 | proxy:x:13:13:proxy:/bin:/usr/sbin/nologin 13 | www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin 14 | backup:x:33:34:backup:/var/backups:/usr/sbin/nologin 15 | list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin 16 | irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin 17 | gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin 18 | nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin 19 | systemd-timesync:x:33:103:systemd Time Synchronization,,,:/run/systemd:/bin/false 20 | systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif:/bin/false 21 | systemd-resolve:x:102:105:systemd Resolver,,,:/run/systemd/resolve:/bin/false 22 | systemd-bus-proxy:x:103:106:systemd Bus Proxy,,,:/run/systemd:/bin/false 23 | sshd:x:104:65534::/var/run/sshd:/usr/sbin/nologin 24 | ntp:x:105:110::/home/ntp:/bin/false 25 | postfix:x:105:112::/var/spool/postfix:/bin/false 26 | messagebus:x:107:114::/var/run/dbus:/bin/false 27 | debian-security-support:x:108:115:Debian security support check,,,:/var/lib/debian-security-support:/bin/false 28 | snmp:x:109:116::/var/lib/snmp:/usr/sbin/nologin 29 | postgres:x:105:117:PostgreSQL administrator,,,:/var/lib/postgresql:/bin/bash 30 | redis:x:111:118::/var/lib/redis:/bin/false -------------------------------------------------------------------------------- /bonobo/nodes/io/base.py: -------------------------------------------------------------------------------- 1 | from bonobo.config import Configurable, ContextProcessor, Option, Service 2 | 3 | 4 | def filesystem_path(path: str): 5 | if path.startswith("/"): 6 | raise ValueError( 7 | "File path should not be absolute. If you really need to provide absolute paths, then you must pass a " 8 | "filesystem instance that is bound to your filesystem root and provide a relative path from there." 9 | ) 10 | return str(path) 11 | 12 | 13 | class FileHandler(Configurable): 14 | """Abstract component factory for file-related components. 15 | 16 | Args: 17 | fs (str): service name to use for filesystem. 18 | path (str): which path to use within the provided filesystem. 19 | eol (str): which character to use to separate lines. 20 | mode (str): which mode to use when opening the file. 21 | encoding (str): which encoding to use when opening the file. 22 | """ 23 | 24 | path = Option( 25 | filesystem_path, required=True, positional=True, __doc__="Path to use within the provided filesystem." 26 | ) # type: str 27 | eol = Option(str, default="\n", __doc__="Character to use as line separator.") # type: str 28 | mode = Option(str, __doc__="What mode to use for open() call.") # type: str 29 | encoding = Option(str, default="utf-8", __doc__="Encoding.") # type: str 30 | fs = Service("fs", __doc__="The filesystem instance to use.") # type: str 31 | 32 | @ContextProcessor 33 | def file(self, context, *, fs): 34 | with self.open(fs) as file: 35 | yield file 36 | 37 | def open(self, fs): 38 | return fs.open(self.path, self.mode, encoding=self.encoding) 39 | 40 | 41 | class Reader: 42 | pass 43 | 44 | 45 | class Writer: 46 | pass 47 | -------------------------------------------------------------------------------- /bonobo/__init__.py: -------------------------------------------------------------------------------- 1 | # Bonobo data-processing toolkit. 2 | # 3 | # Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data 4 | # transformations using a simple directed graph of python callables. 5 | # 6 | # Licensed under Apache License 2.0, read the LICENSE file in the root of the source tree. 7 | 8 | import sys 9 | from pathlib import Path 10 | 11 | from bonobo._api import * 12 | from bonobo._api import ( 13 | CsvReader, CsvWriter, FileReader, FileWriter, Filter, FixedWindow, Format, Graph, JsonReader, JsonWriter, 14 | LdjsonReader, LdjsonWriter, Limit, MapFields, OrderFields, PickleReader, PickleWriter, PrettyPrinter, RateLimited, 15 | Rename, SetFields, Tee, UnpackItems, __all__, __doc__, count, create_reader, create_strategy, create_writer, 16 | get_argument_parser, get_examples_path, identity, inspect, noop, open_examples_fs, open_fs, parse_args, run 17 | ) 18 | from bonobo._version import __version__ 19 | 20 | if sys.version_info < (3, 5): 21 | raise RuntimeError("Python 3.5+ is required to use Bonobo.") 22 | 23 | 24 | __all__ = ["__version__"] + __all__ 25 | with (Path(__file__).parent / "bonobo.svg").open() as f: 26 | __logo__ = f.read() 27 | __doc__ = __doc__ # lgtm [py/redundant-assignment] 28 | __version__ = __version__ # lgtm [py/redundant-assignment] 29 | 30 | 31 | def _repr_html_(): 32 | """This allows to easily display a version snippet in Jupyter.""" 33 | from bonobo.commands.version import get_versions 34 | 35 | return ( 36 | '
' 37 | '
{}
' 38 | '
{}
' 39 | "
" 40 | ).format(__logo__, "
".join(get_versions(all=True))) 41 | 42 | 43 | del sys, Path, f 44 | -------------------------------------------------------------------------------- /bonobo/commands/version.py: -------------------------------------------------------------------------------- 1 | from mondrian import humanizer 2 | 3 | from bonobo.commands import BaseCommand 4 | 5 | 6 | def get_versions(*, all=False, quiet=None): 7 | import bonobo 8 | from bonobo.util.pkgs import bonobo_packages 9 | 10 | yield _format_version(bonobo, quiet=quiet) 11 | 12 | if all: 13 | for name in sorted(bonobo_packages): 14 | if name != "bonobo": 15 | try: 16 | mod = __import__(name.replace("-", "_")) 17 | try: 18 | yield _format_version(mod, name=name, quiet=quiet) 19 | except Exception as exc: 20 | yield "{} ({})".format(name, exc) 21 | except ImportError as exc: 22 | yield "{} is not importable ({}).".format(name, exc) 23 | 24 | 25 | class VersionCommand(BaseCommand): 26 | @humanizer.humanize() 27 | def handle(self, *, all=False, quiet=False): 28 | for line in get_versions(all=all, quiet=quiet): 29 | print(line) 30 | 31 | def add_arguments(self, parser): 32 | parser.add_argument("--all", "-a", action="store_true") 33 | parser.add_argument("--quiet", "-q", action="count") 34 | 35 | 36 | def _format_version(mod, *, name=None, quiet=False): 37 | from bonobo.util.pkgs import bonobo_packages 38 | 39 | args = { 40 | "name": name or mod.__name__, 41 | "version": mod.__version__, 42 | "location": bonobo_packages[name or mod.__name__].location, 43 | } 44 | 45 | if not quiet: 46 | return "{name} v.{version} (in {location})".format(**args) 47 | if quiet < 2: 48 | return "{name} {version}".format(**args) 49 | if quiet < 3: 50 | return "{version}".format(**args) 51 | 52 | raise RuntimeError("Hard to be so quiet...") 53 | -------------------------------------------------------------------------------- /tests/config/test_processors.py: -------------------------------------------------------------------------------- 1 | from operator import attrgetter 2 | 3 | from bonobo.config import Configurable 4 | from bonobo.config.processors import ContextCurrifier, ContextProcessor, resolve_processors, use_context_processor 5 | 6 | 7 | class CP1(Configurable): 8 | @ContextProcessor 9 | def c(self): 10 | yield 11 | 12 | @ContextProcessor 13 | def a(self): 14 | yield "this is A" 15 | 16 | @ContextProcessor 17 | def b(self, a): 18 | yield a.upper()[:-1] + "b" 19 | 20 | def __call__(self, a, b): 21 | return a, b 22 | 23 | 24 | class CP2(CP1): 25 | @ContextProcessor 26 | def f(self): 27 | pass 28 | 29 | @ContextProcessor 30 | def e(self): 31 | pass 32 | 33 | @ContextProcessor 34 | def d(self): 35 | pass 36 | 37 | 38 | class CP3(CP2): 39 | @ContextProcessor 40 | def c(self): 41 | pass 42 | 43 | @ContextProcessor 44 | def b(self): 45 | pass 46 | 47 | 48 | def get_all_processors_names(cls): 49 | return list(map(attrgetter("__name__"), resolve_processors(cls))) 50 | 51 | 52 | def test_inheritance_and_ordering(): 53 | assert get_all_processors_names(CP1) == ["c", "a", "b"] 54 | assert get_all_processors_names(CP2) == ["c", "a", "b", "f", "e", "d"] 55 | assert get_all_processors_names(CP3) == ["c", "a", "b", "f", "e", "d", "c", "b"] 56 | 57 | 58 | def test_setup_teardown(): 59 | o = CP1() 60 | stack = ContextCurrifier(o) 61 | stack.setup() 62 | assert o(*stack.args) == ("this is A", "THIS IS b") 63 | stack.teardown() 64 | 65 | 66 | def test_processors_on_func(): 67 | def cp(context): 68 | yield context 69 | 70 | @use_context_processor(cp) 71 | def node(context): 72 | pass 73 | 74 | assert get_all_processors_names(node) == ["cp"] 75 | -------------------------------------------------------------------------------- /docs/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | First steps 2 | =========== 3 | 4 | Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with 5 | python code in charge of handling similar shaped independent lines of data. 6 | 7 | Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas. 8 | 9 | Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery 10 | (execution contexts, parallelism, error handling, console output, logging, ...). 11 | 12 | Bonobo uses simple python and should be quick and easy to learn. 13 | 14 | **Tutorials** 15 | 16 | .. toctree:: 17 | :maxdepth: 1 18 | 19 | 1-init 20 | 2-jobs 21 | 3-files 22 | 4-services 23 | 5-packaging 24 | 25 | **What's next?** 26 | 27 | Once you're familiar with all the base concepts, you can... 28 | 29 | * Read the :doc:`Guides ` to have a deep dive in each concept. 30 | * Explore the :doc:`Extensions ` to widen the possibilities: 31 | 32 | * :doc:`/extension/django` 33 | * :doc:`/extension/docker` 34 | * :doc:`/extension/jupyter` 35 | * :doc:`/extension/sqlalchemy` 36 | 37 | * Open the :doc:`References ` and start hacking like crazy. 38 | 39 | **You're not alone!** 40 | 41 | Good documentation is not easy to write. 42 | 43 | Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and 44 | apologize. 45 | 46 | If you're stuck, please come to the `Bonobo Slack Channel `_ and we'll figure it 47 | out. 48 | 49 | If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub 50 | pull requests). 51 | 52 | .. include:: _wip_note.rst 53 | -------------------------------------------------------------------------------- /bonobo/errors.py: -------------------------------------------------------------------------------- 1 | from bonobo.util import get_name 2 | 3 | 4 | class InactiveIOError(IOError): 5 | pass 6 | 7 | 8 | class InactiveReadableError(InactiveIOError): 9 | pass 10 | 11 | 12 | class InactiveWritableError(InactiveIOError): 13 | pass 14 | 15 | 16 | class ValidationError(RuntimeError): 17 | def __init__(self, inst, message): 18 | super(ValidationError, self).__init__( 19 | "Validation error in {class_name}: {message}".format(class_name=type(inst).__name__, message=message) 20 | ) 21 | 22 | 23 | class ProhibitedOperationError(RuntimeError): 24 | pass 25 | 26 | 27 | class ConfigurationError(Exception): 28 | pass 29 | 30 | 31 | class UnrecoverableError(Exception): 32 | """Flag for errors that must interrupt the workflow, either because they will happen for sure on each node run, or 33 | because you know that your transformation has no point continuing running after a bad event.""" 34 | 35 | 36 | class AbstractError(UnrecoverableError, NotImplementedError): 37 | """Abstract error is a convenient error to declare a method as "being left as an exercise for the reader".""" 38 | 39 | def __init__(self, method): 40 | super().__init__( 41 | "Call to abstract method {class_name}.{method_name}(...): missing implementation.".format( 42 | class_name=get_name(method.__self__), method_name=get_name(method) 43 | ) 44 | ) 45 | 46 | 47 | class UnrecoverableTypeError(UnrecoverableError, TypeError): 48 | pass 49 | 50 | 51 | class UnrecoverableAttributeError(UnrecoverableError, AttributeError): 52 | pass 53 | 54 | 55 | class UnrecoverableValueError(UnrecoverableError, ValueError): 56 | pass 57 | 58 | 59 | class UnrecoverableRuntimeError(UnrecoverableError, RuntimeError): 60 | pass 61 | 62 | 63 | class UnrecoverableNotImplementedError(UnrecoverableError, NotImplementedError): 64 | pass 65 | 66 | 67 | class MissingServiceImplementationError(UnrecoverableError, KeyError): 68 | pass 69 | -------------------------------------------------------------------------------- /tests/config/test_methods_partial.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | from bonobo.config import Configurable, ContextProcessor, Method, Option 4 | from bonobo.util.inspect import inspect_node 5 | 6 | 7 | class Bobby(Configurable): 8 | handler = Method() 9 | handler2 = Method() 10 | foo = Option(positional=True) 11 | bar = Option(required=False) 12 | 13 | @ContextProcessor 14 | def think(self, context): 15 | yield "different" 16 | 17 | def __call__(self, think, *args, **kwargs): 18 | self.handler("1", *args, **kwargs) 19 | self.handler2("2", *args, **kwargs) 20 | 21 | 22 | def test_partial(): 23 | C = Bobby 24 | 25 | # inspect the configurable class 26 | with inspect_node(C) as ci: 27 | assert ci.type == Bobby 28 | assert not ci.instance 29 | assert len(ci.options) == 4 30 | assert len(ci.processors) == 1 31 | assert not ci.partial 32 | 33 | # instanciate a partial instance ... 34 | f1 = MagicMock() 35 | C = C(f1) 36 | 37 | with inspect_node(C) as ci: 38 | assert ci.type == Bobby 39 | assert not ci.instance 40 | assert len(ci.options) == 4 41 | assert len(ci.processors) == 1 42 | assert ci.partial 43 | assert ci.partial[0] == (f1,) 44 | assert not len(ci.partial[1]) 45 | 46 | # instanciate a more complete partial instance ... 47 | f2 = MagicMock() 48 | C = C(f2) 49 | 50 | with inspect_node(C) as ci: 51 | assert ci.type == Bobby 52 | assert not ci.instance 53 | assert len(ci.options) == 4 54 | assert len(ci.processors) == 1 55 | assert ci.partial 56 | assert ci.partial[0] == (f1, f2) 57 | assert not len(ci.partial[1]) 58 | 59 | c = C("foo") 60 | 61 | with inspect_node(c) as ci: 62 | assert ci.type == Bobby 63 | assert ci.instance 64 | assert len(ci.options) == 4 65 | assert len(ci.processors) == 1 66 | assert not ci.partial 67 | -------------------------------------------------------------------------------- /tests/test_execution.py: -------------------------------------------------------------------------------- 1 | from bonobo.config.processors import use_context_processor 2 | from bonobo.constants import BEGIN, END 3 | from bonobo.execution.contexts.graph import GraphExecutionContext 4 | from bonobo.execution.strategies import NaiveStrategy 5 | from bonobo.structs.graphs import Graph 6 | 7 | 8 | def generate_integers(): 9 | yield from range(10) 10 | 11 | 12 | def square(i): 13 | return i ** 2 14 | 15 | 16 | def results(f, context): 17 | results = yield list() 18 | context.parent.results = results 19 | 20 | 21 | @use_context_processor(results) 22 | def push_result(results, i): 23 | results.append(i) 24 | 25 | 26 | chain = (generate_integers, square, push_result) 27 | 28 | 29 | def test_empty_execution_context(): 30 | graph = Graph() 31 | 32 | ctx = GraphExecutionContext(graph) 33 | assert not len(ctx.nodes) 34 | assert not len(ctx.plugins) 35 | 36 | assert not ctx.alive 37 | 38 | 39 | def test_execution(): 40 | graph = Graph() 41 | graph.add_chain(*chain) 42 | 43 | strategy = NaiveStrategy() 44 | ctx = strategy.execute(graph) 45 | 46 | assert ctx.results == [1, 4, 9, 16, 25, 36, 49, 64, 81] 47 | 48 | 49 | def test_simple_execution_context(): 50 | graph = Graph() 51 | graph.add_chain(*chain) 52 | 53 | context = GraphExecutionContext(graph) 54 | assert len(context.nodes) == len(chain) 55 | assert not len(context.plugins) 56 | 57 | for i, node in enumerate(chain): 58 | assert context[i].wrapped is node 59 | 60 | assert not context.alive 61 | assert not context.started 62 | assert not context.stopped 63 | 64 | context.write(BEGIN, (), END) 65 | 66 | assert not context.alive 67 | assert not context.started 68 | assert not context.stopped 69 | 70 | context.start() 71 | 72 | assert context.alive 73 | assert context.started 74 | assert not context.stopped 75 | 76 | context.stop() 77 | 78 | assert not context.alive 79 | assert context.started 80 | assert context.stopped 81 | -------------------------------------------------------------------------------- /bonobo/examples/datasets/__main__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import bonobo 4 | from bonobo import examples 5 | from bonobo.examples import get_datasets_dir, get_minor_version, get_services 6 | from bonobo.examples.datasets.coffeeshops import get_graph as get_coffeeshops_graph 7 | from bonobo.examples.datasets.fablabs import get_graph as get_fablabs_graph 8 | 9 | graph_factories = {"coffeeshops": get_coffeeshops_graph, "fablabs": get_fablabs_graph} 10 | 11 | if __name__ == "__main__": 12 | parser = examples.get_argument_parser() 13 | parser.add_argument("--target", "-t", choices=graph_factories.keys(), nargs="+") 14 | parser.add_argument("--sync", action="store_true", default=False) 15 | 16 | with bonobo.parse_args(parser) as options: 17 | graph_options = examples.get_graph_options(options) 18 | graph_names = list(options["target"] if options["target"] else sorted(graph_factories.keys())) 19 | 20 | # Create a graph with all requested subgraphs 21 | graph = bonobo.Graph() 22 | for name in graph_names: 23 | graph = graph_factories[name](graph, **graph_options) 24 | 25 | bonobo.run(graph, services=get_services()) 26 | 27 | if options["sync"]: 28 | # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph. 29 | import boto3 30 | 31 | s3 = boto3.client("s3") 32 | 33 | local_dir = get_datasets_dir() 34 | for root, dirs, files in os.walk(local_dir): 35 | for filename in files: 36 | local_path = os.path.join(root, filename) 37 | relative_path = os.path.relpath(local_path, local_dir) 38 | s3_path = os.path.join(get_minor_version(), relative_path) 39 | 40 | try: 41 | s3.head_object(Bucket="bonobo-examples", Key=s3_path) 42 | except Exception: 43 | s3.upload_file(local_path, "bonobo-examples", s3_path, ExtraArgs={"ACL": "public-read"}) 44 | -------------------------------------------------------------------------------- /docs/reference/examples.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | There are a few examples bundled with **bonobo**. 5 | 6 | You'll find them under the :mod:`bonobo.examples` package, and you can run them directly as modules: 7 | 8 | .. code-block:: shell-session 9 | 10 | $ bonobo run -m bonobo.examples.module 11 | 12 | 13 | or 14 | 15 | .. code-block:: shell-session 16 | 17 | $ python -m bonobo.examples.module 18 | 19 | 20 | 21 | .. toctree:: 22 | :maxdepth: 4 23 | 24 | examples/tutorials 25 | 26 | 27 | Datasets 28 | :::::::: 29 | 30 | 31 | .. module:: bonobo.examples.datasets 32 | 33 | The :mod:`bonobo.examples.datasets` package contains examples that generates datasets locally for other examples to 34 | use. As of today, we commit the content of those datasets to git, even if that may be a bad idea, so all the examples 35 | are easily runnable. Later, we'll see if we favor a "missing dependency exception" approach. 36 | 37 | 38 | Coffeeshops 39 | ----------- 40 | 41 | .. automodule:: bonobo.examples.datasets.coffeeshops 42 | :members: 43 | :undoc-members: 44 | :show-inheritance: 45 | 46 | Fablabs 47 | ------- 48 | 49 | .. automodule:: bonobo.examples.datasets.fablabs 50 | :members: 51 | :undoc-members: 52 | :show-inheritance: 53 | 54 | Types 55 | ::::: 56 | 57 | Strings 58 | ------- 59 | 60 | .. automodule:: bonobo.examples.types.strings 61 | :members: graph, extract, transform, load 62 | :undoc-members: 63 | :show-inheritance: 64 | 65 | 66 | Dicts 67 | ----- 68 | 69 | .. automodule:: bonobo.examples.types.dicts 70 | :members: graph, extract, transform, load 71 | :undoc-members: 72 | :show-inheritance: 73 | 74 | 75 | Bags 76 | ---- 77 | 78 | .. automodule:: bonobo.examples.types.bags 79 | :members: graph, extract, transform, load 80 | :undoc-members: 81 | :show-inheritance: 82 | 83 | 84 | Utils 85 | ::::: 86 | 87 | Count 88 | ----- 89 | 90 | .. automodule:: bonobo.examples.nodes.count 91 | :members: 92 | :undoc-members: 93 | :show-inheritance: 94 | 95 | 96 | -------------------------------------------------------------------------------- /tests/execution/contexts/test_execution_contexts_graph.py: -------------------------------------------------------------------------------- 1 | from bonobo import Graph 2 | from bonobo.constants import BEGIN, EMPTY, END 3 | from bonobo.execution.contexts import GraphExecutionContext 4 | 5 | 6 | def raise_an_error(*args, **kwargs): 7 | raise Exception("Careful, man, there's a beverage here!") 8 | 9 | 10 | def raise_an_unrecoverrable_error(*args, **kwargs): 11 | raise Exception("You are entering a world of pain!") 12 | 13 | 14 | def test_lifecycle_of_empty_graph(): 15 | graph = Graph() 16 | with GraphExecutionContext(graph) as context: 17 | assert context.started 18 | assert context.alive 19 | assert not context.stopped 20 | assert context.started 21 | assert not context.alive 22 | assert context.stopped 23 | assert not context.xstatus 24 | 25 | 26 | def test_lifecycle_of_nonempty_graph(): 27 | graph = Graph([1, 2, 3], print) 28 | with GraphExecutionContext(graph) as context: 29 | assert context.started 30 | assert context.alive 31 | assert not context.stopped 32 | assert context.started 33 | assert not context.alive 34 | assert context.stopped 35 | assert not context.xstatus 36 | 37 | 38 | def test_lifecycle_of_graph_with_recoverable_error(): 39 | graph = Graph([1, 2, 3], raise_an_error, print) 40 | with GraphExecutionContext(graph) as context: 41 | assert context.started 42 | assert context.alive 43 | assert not context.stopped 44 | assert context.started 45 | assert not context.alive 46 | assert context.stopped 47 | assert not context.xstatus 48 | 49 | 50 | def test_lifecycle_of_graph_with_unrecoverable_error(): 51 | graph = Graph([1, 2, 3], raise_an_unrecoverrable_error, print) 52 | with GraphExecutionContext(graph) as context: 53 | assert context.started and context.alive and not context.stopped 54 | context.write(BEGIN, EMPTY, END) 55 | context.loop() 56 | assert context.started 57 | assert not context.alive 58 | assert context.stopped 59 | assert not context.xstatus 60 | -------------------------------------------------------------------------------- /tests/structs/test_inputs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Copyright 2012-2014 Romain Dorgueil 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from queue import Empty 18 | 19 | import pytest 20 | 21 | from bonobo.constants import BEGIN, END 22 | from bonobo.errors import InactiveReadableError, InactiveWritableError 23 | from bonobo.structs.inputs import Input 24 | 25 | 26 | def test_input_runlevels(): 27 | q = Input() 28 | 29 | # Before BEGIN, noone should be able to write in an Input queue. 30 | assert not q.alive 31 | with pytest.raises(InactiveWritableError): 32 | q.put("hello, unborn queue.") 33 | 34 | # Begin 35 | q.put(BEGIN) 36 | assert q.alive and q._runlevel == 1 37 | q.put("foo") 38 | 39 | # Second Begin 40 | q.put(BEGIN) 41 | assert q.alive and q._runlevel == 2 42 | q.put("bar") 43 | q.put(END) 44 | 45 | # FIFO 46 | assert q.get() == "foo" 47 | assert q.get() == "bar" 48 | 49 | # self.assertEqual(q.alive, False) XXX queue don't know it's dead yet, but it is ... 50 | # Async get raises Empty (End is not returned) 51 | with pytest.raises(Empty): 52 | q.get(block=False) 53 | assert q.alive 54 | 55 | # Before killing, let's slide some data in. 56 | q.put("baz") 57 | 58 | # Now kill the queue... 59 | q.put(END) 60 | with pytest.raises(InactiveWritableError): 61 | q.put("foo") 62 | 63 | # Still can get remaining data 64 | assert q.get() == "baz" 65 | with pytest.raises(InactiveReadableError): 66 | q.get() 67 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/about.html: -------------------------------------------------------------------------------- 1 | {% if theme_logo %} 2 |

{{ project }}

7 | {% endif %} 8 | 9 |

10 | {% else %} 11 |

{{ project }}

12 | {% endif %} 13 | 14 | {% if theme_description %} 15 |

{{ theme_description }}

16 | {% endif %} 17 | 18 | {% if theme_github_user and theme_github_repo %} 19 | {% if theme_github_button|lower == 'true' %} 20 |

21 | 23 |

24 | {% endif %} 25 | {% endif %} 26 | 27 | {% if theme_travis_button|lower != 'false' %} 28 | {% if theme_travis_button|lower == 'true' %} 29 | {% set path = theme_github_user + '/' + theme_github_repo %} 30 | {% else %} 31 | {% set path = theme_travis_button %} 32 | {% endif %} 33 |

34 | 35 | https://secure.travis-ci.org/{{ path }}.svg?branch={{ theme_badge_branch }} 39 | 40 |

41 | {% endif %} 42 | 43 | {% if theme_codecov_button|lower != 'false' %} 44 | {% if theme_codecov_button|lower == 'true' %} 45 | {% set path = theme_github_user + '/' + theme_github_repo %} 46 | {% else %} 47 | {% set path = theme_codecov_button %} 48 | {% endif %} 49 |

50 | 51 | https://codecov.io/github/{{ path }}/coverage.svg?branch={{ theme_badge_branch }} 55 | 56 |

57 | {% endif %} 58 | -------------------------------------------------------------------------------- /bonobo/commands/__init__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | import mondrian 5 | 6 | from bonobo import settings 7 | from bonobo.commands.base import BaseCommand, BaseGraphCommand 8 | 9 | 10 | def entrypoint(args=None): 11 | """ 12 | Main callable for "bonobo" entrypoint. 13 | 14 | Will load commands from "bonobo.commands" entrypoints, using stevedore. 15 | 16 | """ 17 | 18 | mondrian.setup(excepthook=True) 19 | logger = logging.getLogger() 20 | logger.setLevel(settings.LOGGING_LEVEL.get()) 21 | 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument("--debug", "-D", action="store_true") 24 | 25 | subparsers = parser.add_subparsers(dest="command") 26 | subparsers.required = True 27 | 28 | commands = {} 29 | 30 | def register_extension(ext): 31 | nonlocal commands 32 | 33 | try: 34 | parser = subparsers.add_parser(ext.name) 35 | if isinstance(ext.plugin, type) and issubclass(ext.plugin, BaseCommand): 36 | # current way, class based. 37 | cmd = ext.plugin() 38 | cmd.add_arguments(parser) 39 | cmd.__name__ = ext.name 40 | commands[ext.name] = cmd.handle 41 | else: 42 | # old school, function based. 43 | commands[ext.name] = ext.plugin(parser) 44 | except Exception: 45 | logger.exception("Error while loading command {}.".format(ext.name)) 46 | 47 | from stevedore import ExtensionManager 48 | 49 | mgr = ExtensionManager(namespace="bonobo.commands") 50 | mgr.map(register_extension) 51 | 52 | parsed_args = parser.parse_args(args).__dict__ 53 | 54 | if parsed_args.pop("debug", False): 55 | settings.DEBUG.set(True) 56 | settings.LOGGING_LEVEL.set(logging.DEBUG) 57 | logger.setLevel(settings.LOGGING_LEVEL.get()) 58 | 59 | logger.debug("Command: " + parsed_args["command"] + " Arguments: " + repr(parsed_args)) 60 | 61 | # Get command handler, execute, rince. 62 | command = commands[parsed_args.pop("command")] 63 | command(**parsed_args) 64 | 65 | return 0 66 | -------------------------------------------------------------------------------- /docs/_templates/base.html: -------------------------------------------------------------------------------- 1 | {%- extends "alabaster/layout.html" %} 2 | 3 | 4 | {%- block extrahead %} 5 | {{ super() }} 6 | 8 | 9 | {% endblock %} 10 | 11 | {%- block footer %} 12 | {{ relbar() }} 13 | 14 | 22 | 23 | 24 | Fork me on GitHub 27 | 28 | 29 | {% if theme_analytics_id %} 30 | 45 | {% endif %} 46 | 47 | 54 | {%- endblock %} 55 | -------------------------------------------------------------------------------- /bonobo/examples/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import bonobo 4 | from bonobo.execution.strategies import DEFAULT_STRATEGY, STRATEGIES 5 | from bonobo.util.statistics import Timer 6 | 7 | 8 | def get_argument_parser(parser=None): 9 | parser = bonobo.get_argument_parser(parser=parser) 10 | 11 | parser.add_argument("--limit", "-l", type=int, default=None, help="If set, limits the number of processed lines.") 12 | parser.add_argument( 13 | "--print", "-p", action="store_true", default=False, help="If set, pretty prints before writing to output file." 14 | ) 15 | 16 | parser.add_argument("--strategy", "-s", type=str, choices=STRATEGIES.keys(), default=DEFAULT_STRATEGY) 17 | 18 | return parser 19 | 20 | 21 | def get_graph_options(options): 22 | _limit = options.pop("limit", None) 23 | _print = options.pop("print", False) 24 | 25 | return {"_limit": (bonobo.Limit(_limit),) if _limit else (), "_print": (bonobo.PrettyPrinter(),) if _print else ()} 26 | 27 | 28 | def run(get_graph, get_services, *, parser=None): 29 | parser = parser or get_argument_parser() 30 | 31 | with bonobo.parse_args(parser) as options: 32 | with Timer() as timer: 33 | print("Options:", " ".join("{}={}".format(k, v) for k, v in sorted(options.items()))) 34 | retval = bonobo.run( 35 | get_graph(**get_graph_options(options)), services=get_services(), strategy=options["strategy"] 36 | ) 37 | print("Execution time:", timer) 38 | print("Return value:", retval) 39 | print("XStatus:", retval.xstatus) 40 | return retval.xstatus 41 | 42 | 43 | def get_minor_version(): 44 | return ".".join(bonobo.__version__.split(".")[:2]) 45 | 46 | 47 | def get_datasets_dir(*dirs): 48 | home_dir = os.path.expanduser("~") 49 | target_dir = os.path.join(home_dir, ".cache/bonobo", get_minor_version(), *dirs) 50 | os.makedirs(target_dir, exist_ok=True) 51 | return target_dir 52 | 53 | 54 | def get_services(): 55 | return { 56 | "fs": bonobo.open_fs(get_datasets_dir("datasets")), 57 | "fs.static": bonobo.open_examples_fs("datasets", "static"), 58 | } 59 | -------------------------------------------------------------------------------- /bonobo/examples/files/pickle_handlers.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example shows how a different file system service can be injected 3 | into a transformation (as compressing pickled objects often makes sense 4 | anyways). The pickle itself contains a list of lists as follows: 5 | 6 | ``` 7 | [ 8 | ['category', 'sms'], 9 | ['ham', 'Go until jurong point, crazy..'], 10 | ['ham', 'Ok lar... Joking wif u oni...'], 11 | ['spam', 'Free entry in 2 a wkly comp to win...'], 12 | ['ham', 'U dun say so early hor... U c already then say...'], 13 | ['ham', 'Nah I don't think he goes to usf, he lives around here though'], 14 | ['spam', 'FreeMsg Hey there darling it's been 3 week's now...'], 15 | ... 16 | ] 17 | ``` 18 | 19 | where the first column categorizes and sms as "ham" or "spam". The second 20 | column contains the sms itself. 21 | 22 | Data set taken from: 23 | https://www.kaggle.com/uciml/sms-spam-collection-dataset/downloads/sms-spam-collection-dataset.zip 24 | 25 | The transformation (1) reads the pickled data, (2) marks and shortens 26 | messages categorized as spam, and (3) prints the output. 27 | 28 | """ 29 | 30 | import sys 31 | 32 | from fs.tarfs import TarFS 33 | 34 | import bonobo 35 | from bonobo import examples 36 | 37 | 38 | def cleanse_sms(category, sms): 39 | if category == "spam": 40 | sms_clean = "**MARKED AS SPAM** " + sms[0:50] + ("..." if len(sms) > 50 else "") 41 | elif category == "ham": 42 | sms_clean = sms 43 | else: 44 | raise ValueError("Unknown category {!r}.".format(category)) 45 | 46 | return category, sms, sms_clean 47 | 48 | 49 | def get_graph(*, _limit=(), _print=()): 50 | graph = bonobo.Graph() 51 | 52 | graph.add_chain( 53 | # spam.pkl is within the gzipped tarball 54 | bonobo.PickleReader("spam.pkl"), 55 | *_limit, 56 | cleanse_sms, 57 | *_print, 58 | ) 59 | 60 | return graph 61 | 62 | 63 | def get_services(): 64 | return {**examples.get_services(), "fs": TarFS(bonobo.get_examples_path("datasets", "static", "spam.tgz"))} 65 | 66 | 67 | if __name__ == "__main__": 68 | sys.exit(examples.run(get_graph, get_services)) 69 | -------------------------------------------------------------------------------- /tests/test_settings.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from os import environ 3 | from unittest.mock import patch 4 | 5 | import pytest 6 | 7 | from bonobo import settings 8 | from bonobo.errors import ValidationError 9 | 10 | TEST_SETTING = "TEST_SETTING" 11 | 12 | 13 | def test_to_bool(): 14 | assert not settings.to_bool("") 15 | assert not settings.to_bool("FALSE") 16 | assert not settings.to_bool("NO") 17 | assert not settings.to_bool("0") 18 | 19 | assert settings.to_bool("yup") 20 | assert settings.to_bool("True") 21 | assert settings.to_bool("yes") 22 | assert settings.to_bool("1") 23 | 24 | 25 | def test_setting(): 26 | s = settings.Setting(TEST_SETTING) 27 | assert s.get() is None 28 | 29 | with patch.dict(environ, {TEST_SETTING: "hello"}): 30 | assert s.get() is None 31 | s.clear() 32 | assert s.get() == "hello" 33 | 34 | s = settings.Setting(TEST_SETTING, default="nope") 35 | assert s.get() is "nope" 36 | 37 | with patch.dict(environ, {TEST_SETTING: "hello"}): 38 | assert s.get() == "nope" 39 | s.clear() 40 | assert s.get() == "hello" 41 | 42 | s = settings.Setting(TEST_SETTING, default=0, validator=lambda x: x == 42) 43 | with pytest.raises(ValidationError): 44 | assert s.get() is 0 45 | 46 | s.set(42) 47 | 48 | with pytest.raises(ValidationError): 49 | s.set(21) 50 | 51 | 52 | def test_default_settings(): 53 | settings.clear_all() 54 | 55 | assert settings.DEBUG.get() is False 56 | assert settings.PROFILE.get() is False 57 | assert settings.QUIET.get() is False 58 | assert settings.LOGGING_LEVEL.get() == logging._checkLevel("INFO") 59 | 60 | with patch.dict(environ, {"DEBUG": "t"}): 61 | settings.clear_all() 62 | assert settings.LOGGING_LEVEL.get() == logging._checkLevel("DEBUG") 63 | 64 | settings.clear_all() 65 | 66 | 67 | def test_check(): 68 | settings.check() 69 | with patch.dict(environ, {"DEBUG": "t", "PROFILE": "t", "QUIET": "t"}): 70 | settings.clear_all() 71 | with pytest.raises(RuntimeError): 72 | settings.check() 73 | settings.clear_all() 74 | -------------------------------------------------------------------------------- /bonobo/nodes/io/pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | 3 | from bonobo.config import Option, use_context 4 | from bonobo.constants import NOT_MODIFIED 5 | from bonobo.nodes.io.base import FileHandler 6 | from bonobo.nodes.io.file import FileReader, FileWriter 7 | 8 | 9 | class PickleHandler(FileHandler): 10 | """ 11 | 12 | .. attribute:: item_names 13 | 14 | The names of the items in the pickle, if it is not defined in the first item of the pickle. 15 | 16 | """ 17 | 18 | fields = Option(tuple, required=False) 19 | 20 | 21 | @use_context 22 | class PickleReader(FileReader, PickleHandler): 23 | """ 24 | Reads a Python pickle object and yields the items in dicts. 25 | """ 26 | 27 | mode = Option(str, default="rb") 28 | 29 | def read(self, file, context, *, fs): 30 | data = pickle.load(file) 31 | 32 | # if the data is not iterable, then wrap the object in a list so it may be iterated 33 | if isinstance(data, dict): 34 | is_dict = True 35 | iterator = iter(data.items()) 36 | else: 37 | is_dict = False 38 | try: 39 | iterator = iter(data) 40 | except TypeError: 41 | iterator = iter([data]) 42 | 43 | if not context.output_type: 44 | context.set_output_fields(self.fields or next(iterator)) 45 | fields = context.get_output_fields() 46 | fields_length = len(fields) 47 | 48 | for row in iterator: 49 | if len(row) != fields_length: 50 | raise ValueError("Received an object with {} items, expected {}.".format(len(row), fields_length)) 51 | 52 | yield tuple(row.values() if is_dict else row) 53 | 54 | __call__ = read 55 | 56 | 57 | @use_context 58 | class PickleWriter(FileWriter, PickleHandler): 59 | mode = Option(str, default="wb") 60 | 61 | def write(self, file, context, item, *, fs): 62 | """ 63 | Write a pickled item to the opened file. 64 | """ 65 | context.setdefault("lineno", 0) 66 | file.write(pickle.dumps(item)) 67 | context.lineno += 1 68 | return NOT_MODIFIED 69 | 70 | __call__ = write 71 | -------------------------------------------------------------------------------- /docs/reference/settings.rst: -------------------------------------------------------------------------------- 1 | Settings & Environment 2 | ====================== 3 | 4 | .. module:: bonobo.settings 5 | 6 | All settings that you can find in the :mod:`bonobo.settings` module. You can override those settings using 7 | environment variables. For you own settings and configuration values, see the :doc:`/guide/environment` guide. 8 | 9 | Debug 10 | ::::: 11 | 12 | :Purpose: Sets the debug mode, which is more verbose. Loglevel will be lowered to DEBUG instead of INFO. 13 | :Environment: `DEBUG` 14 | :Setting: `bonobo.settings.DEBUG` 15 | :Default: `False` 16 | 17 | Profile 18 | ::::::: 19 | 20 | :Purpose: Sets profiling, which adds memory/cpu usage output. Not yet fully implemented. It is expected that setting 21 | this to true will have a non-neglictible performance impact. 22 | :Environment: `PROFILE` 23 | :Setting: `bonobo.settings.PROFILE` 24 | :Default: `False` 25 | 26 | Quiet 27 | ::::: 28 | 29 | :Purpose: Sets the quiet mode, which ask any output to be computer parsable. Formating will be removed, but it will 30 | allow to use unix pipes, etc. Not yet fully implemented, few transformations already use it. Probably, it 31 | should be the default on non-interactive terminals. 32 | :Environment: `QUIET` 33 | :Setting: `bonobo.settings.QUIET` 34 | :Default: `False` 35 | 36 | Logging Level 37 | ::::::::::::: 38 | 39 | :Purpose: Sets the python minimum logging level. 40 | :Environment: `LOGGING_LEVEL` 41 | :Setting: `bonobo.settings.LOGGING_LEVEL` 42 | :Default: `DEBUG` if DEBUG is False, otherwise `INFO` 43 | :Values: `CRITICAL`, `FATAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`, `NOTSET` 44 | 45 | I/O Format 46 | :::::::::: 47 | 48 | :Purpose: Sets default input/output format for builtin transformations. It can be overriden on each node. The `kwargs` 49 | value means that each node will try to read its input from keywords arguments (and write similar formated 50 | output), while `arg0` means it will try to read its input from the first positional argument (and write 51 | similar formated output). 52 | :Environment: `IOFORMAT` 53 | :Setting: `bonobo.settings.IOFORMAT` 54 | :Default: `kwargs` 55 | :Values: `kwargs`, `arg0` 56 | 57 | 58 | -------------------------------------------------------------------------------- /bonobo/contrib/opendatasoft/__init__.py: -------------------------------------------------------------------------------- 1 | from urllib.parse import urlencode 2 | 3 | import requests # todo: make this a service so we can substitute it ? 4 | 5 | from bonobo.config import Option 6 | from bonobo.config.configurables import Configurable 7 | from bonobo.config.processors import ContextProcessor 8 | from bonobo.util.objects import ValueHolder 9 | 10 | 11 | def path_str(path): 12 | return path if path.startswith("/") else "/" + path 13 | 14 | 15 | class OpenDataSoftAPI(Configurable): 16 | dataset = Option(str, positional=True) 17 | endpoint = Option(str, required=False, default="{scheme}://{netloc}{path}") 18 | scheme = Option(str, required=False, default="https") 19 | netloc = Option(str, required=False, default="data.opendatasoft.com") 20 | path = Option(path_str, required=False, default="/api/records/1.0/search/") 21 | rows = Option(int, required=False, default=500) 22 | limit = Option(int, required=False) 23 | timezone = Option(str, required=False, default="Europe/Paris") 24 | kwargs = Option(dict, required=False, default=dict) 25 | 26 | @ContextProcessor 27 | def compute_path(self, context): 28 | params = (("dataset", self.dataset), ("timezone", self.timezone)) + tuple(sorted(self.kwargs.items())) 29 | yield self.endpoint.format(scheme=self.scheme, netloc=self.netloc, path=self.path) + "?" + urlencode(params) 30 | 31 | @ContextProcessor 32 | def start(self, context, base_url): 33 | yield ValueHolder(0) 34 | 35 | def __call__(self, base_url, start, *args, **kwargs): 36 | while (not self.limit) or (self.limit > start): 37 | url = "{}&start={start}&rows={rows}".format( 38 | base_url, start=start.value, rows=self.rows if not self.limit else min(self.rows, self.limit - start) 39 | ) 40 | resp = requests.get(url) 41 | records = resp.json().get("records", []) 42 | 43 | if not len(records): 44 | break 45 | 46 | for row in records: 47 | yield {**row.get("fields", {}), "geometry": row.get("geometry", {}), "recordid": row.get("recordid")} 48 | 49 | start += self.rows 50 | 51 | 52 | __all__ = ["OpenDataSoftAPI"] 53 | -------------------------------------------------------------------------------- /bonobo/util/resolvers.py: -------------------------------------------------------------------------------- 1 | """ 2 | This package is considered private, and should only be used within bonobo. 3 | 4 | """ 5 | 6 | import json 7 | import os 8 | import runpy 9 | 10 | import bonobo 11 | from bonobo.util import cast 12 | 13 | 14 | class _RequiredModule: 15 | def __init__(self, dct): 16 | self.__dict__ = dct 17 | 18 | 19 | class _ModulesRegistry(dict): 20 | @property 21 | def pathname(self): 22 | return os.getcwd() 23 | 24 | def require(self, name): 25 | if name not in self: 26 | bits = name.split(".") 27 | filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + ".py") 28 | self[name] = _RequiredModule(runpy.run_path(filename, run_name=name)) 29 | return self[name] 30 | 31 | 32 | def _parse_option(option): 33 | """ 34 | Parse a 'key=val' option string into a python (key, val) pair 35 | 36 | :param option: str 37 | :return: tuple 38 | """ 39 | try: 40 | key, val = option.split("=", 1) 41 | except ValueError: 42 | return option, True 43 | 44 | try: 45 | val = json.loads(val) 46 | except json.JSONDecodeError: 47 | pass 48 | 49 | return key, val 50 | 51 | 52 | def _resolve_options(options=None): 53 | """ 54 | Resolve a collection of option strings (eventually coming from command line) into a python dictionary. 55 | 56 | :param options: tuple[str] 57 | :return: dict 58 | """ 59 | if options: 60 | return dict(map(_parse_option, options)) 61 | return dict() 62 | 63 | 64 | @cast(tuple) 65 | def _resolve_transformations(transformations): 66 | """ 67 | Resolve a collection of strings into the matching python objects, defaulting to bonobo namespace if no package is provided. 68 | 69 | Syntax for each string is path.to.package:attribute 70 | 71 | :param transformations: tuple(str) 72 | :return: tuple(object) 73 | """ 74 | registry = _ModulesRegistry() 75 | transformations = transformations or [] 76 | for t in transformations: 77 | try: 78 | mod, attr = t.split(":", 1) 79 | yield getattr(registry.require(mod), attr) 80 | except ValueError: 81 | yield getattr(bonobo, t) 82 | -------------------------------------------------------------------------------- /bonobo/util/errors.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from contextlib import contextmanager 4 | from sys import exc_info 5 | 6 | from mondrian import term 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | @contextmanager 12 | def sweeten_errors(): 13 | try: 14 | yield 15 | except Exception as exc: 16 | SPACES = 2 17 | w = term.white 18 | prefix = w("║" + " " * (SPACES - 1)) 19 | suffix = w(" " * (SPACES - 1) + "║") 20 | 21 | pre_re = re.compile("([^`]*)`([^`]*)`([^`]*)") 22 | 23 | def format_arg(arg): 24 | length = len(pre_re.sub("\\1\\2\\3", arg)) 25 | 26 | arg = pre_re.sub(w("\\1") + term.bold("\\2") + w("\\3"), arg) 27 | arg = re.sub(r"^ \$ (.*)", term.lightblack(" $ ") + term.reset("\\1"), arg) 28 | 29 | return (arg, length) 30 | 31 | def f(*args): 32 | return "".join(args) 33 | 34 | term_width, term_height = term.get_size() 35 | line_length = min(80, term_width) 36 | for arg in exc.args: 37 | line_length = max(min(line_length, len(arg) + 2 * SPACES), 120) 38 | 39 | print(f(w("╔" + "═" * (line_length - 2) + "╗"))) 40 | for i, arg in enumerate(exc.args): 41 | 42 | if i == 1: 43 | print(f(prefix, " " * (line_length - 2 * SPACES), suffix)) 44 | 45 | arg_formatted, arg_length = format_arg(arg) 46 | if not i: 47 | # first line 48 | print( 49 | f( 50 | prefix, 51 | term.red_bg(term.bold(" " + type(exc).__name__ + " ")), 52 | " ", 53 | w(arg_formatted), 54 | " " * (line_length - (arg_length + 3 + len(type(exc).__name__) + 2 * SPACES)), 55 | suffix, 56 | ) 57 | ) 58 | else: 59 | # other lines 60 | print(f(prefix, arg_formatted + " " * (line_length - arg_length - 2 * SPACES), suffix)) 61 | 62 | print(f(w("╚" + "═" * (line_length - 2) + "╝"))) 63 | 64 | logging.getLogger().debug("This error was caused by the following exception chain.", exc_info=exc_info()) 65 | -------------------------------------------------------------------------------- /tests/examples/test_example_change_some_fields.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | 3 | import bonobo 4 | from bonobo.config import use_raw_input 5 | from bonobo.execution.contexts import GraphExecutionContext 6 | from bonobo.util.bags import BagType 7 | 8 | Extracted = namedtuple("Extracted", ["id", "name", "value"]) 9 | ExtractedBT = BagType("ExtractedBT", ["id", "name", "value"]) 10 | 11 | 12 | def extract_nt(): 13 | yield Extracted(id=1, name="Guido", value=".py") 14 | yield Extracted(id=2, name="Larry", value=".pl") 15 | yield Extracted(id=3, name="Dennis", value=".c") 16 | yield Extracted(id=4, name="Yukihiro", value=".rb") 17 | 18 | 19 | def extract_bt(): 20 | yield ExtractedBT(id=1, name="Guido", value=".py") 21 | yield ExtractedBT(id=2, name="Larry", value=".pl") 22 | yield ExtractedBT(id=3, name="Dennis", value=".c") 23 | yield ExtractedBT(id=4, name="Yukihiro", value=".rb") 24 | 25 | 26 | def transform_using_args(id, name, value): 27 | yield Extracted(id=id * 2, name=name, value=name.lower() + value) 28 | 29 | 30 | @use_raw_input 31 | def transform_nt(row): 32 | yield row._replace(name=row.name.upper()) 33 | 34 | 35 | def StoreInList(buffer: list): 36 | def store_in_list(*args, buffer=buffer): 37 | buffer.append(args) 38 | 39 | return store_in_list 40 | 41 | 42 | def test_execution(): 43 | graph = bonobo.Graph() 44 | 45 | result_args = [] 46 | result_nt = [] 47 | result_bt = [] 48 | 49 | graph.add_chain(extract_nt, transform_using_args, StoreInList(result_args)) 50 | graph.add_chain(transform_nt, StoreInList(result_nt), _input=extract_nt) 51 | graph.add_chain(extract_bt, transform_using_args, StoreInList(result_bt)) 52 | 53 | with GraphExecutionContext(graph) as context: 54 | context.run_until_complete() 55 | 56 | assert result_args == [ 57 | (2, "Guido", "guido.py"), 58 | (4, "Larry", "larry.pl"), 59 | (6, "Dennis", "dennis.c"), 60 | (8, "Yukihiro", "yukihiro.rb"), 61 | ] 62 | 63 | assert result_nt == [(1, "GUIDO", ".py"), (2, "LARRY", ".pl"), (3, "DENNIS", ".c"), (4, "YUKIHIRO", ".rb")] 64 | 65 | assert result_bt == [ 66 | (2, "Guido", "guido.py"), 67 | (4, "Larry", "larry.pl"), 68 | (6, "Dennis", "dennis.c"), 69 | (8, "Yukihiro", "yukihiro.rb"), 70 | ] 71 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/static/extension.js: -------------------------------------------------------------------------------- 1 | define(function() { return /******/ (function(modules) { // webpackBootstrap 2 | /******/ // The module cache 3 | /******/ var installedModules = {}; 4 | 5 | /******/ // The require function 6 | /******/ function __webpack_require__(moduleId) { 7 | 8 | /******/ // Check if module is in cache 9 | /******/ if(installedModules[moduleId]) 10 | /******/ return installedModules[moduleId].exports; 11 | 12 | /******/ // Create a new module (and put it into the cache) 13 | /******/ var module = installedModules[moduleId] = { 14 | /******/ exports: {}, 15 | /******/ id: moduleId, 16 | /******/ loaded: false 17 | /******/ }; 18 | 19 | /******/ // Execute the module function 20 | /******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); 21 | 22 | /******/ // Flag the module as loaded 23 | /******/ module.loaded = true; 24 | 25 | /******/ // Return the exports of the module 26 | /******/ return module.exports; 27 | /******/ } 28 | 29 | 30 | /******/ // expose the modules object (__webpack_modules__) 31 | /******/ __webpack_require__.m = modules; 32 | 33 | /******/ // expose the module cache 34 | /******/ __webpack_require__.c = installedModules; 35 | 36 | /******/ // __webpack_public_path__ 37 | /******/ __webpack_require__.p = ""; 38 | 39 | /******/ // Load entry module and return exports 40 | /******/ return __webpack_require__(0); 41 | /******/ }) 42 | /************************************************************************/ 43 | /******/ ([ 44 | /* 0 */ 45 | /***/ (function(module, exports) { 46 | 47 | // This file contains the javascript that is run when the notebook is loaded. 48 | // It contains some requirejs configuration and the `load_ipython_extension` 49 | // which is required for any notebook extension. 50 | 51 | // Configure requirejs 52 | if (window.require) { 53 | window.require.config({ 54 | map: { 55 | "*" : { 56 | "bonobo-jupyter": "nbextensions/bonobo-jupyter/index", 57 | "jupyter-js-widgets": "nbextensions/jupyter-js-widgets/extension" 58 | } 59 | } 60 | }); 61 | } 62 | 63 | // Export the required load_ipython_extention 64 | module.exports = { 65 | load_ipython_extension: function() {} 66 | }; 67 | 68 | 69 | /***/ }) 70 | /******/ ])});; -------------------------------------------------------------------------------- /tests/nodes/test_casts.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Callable 3 | 4 | import pytest 5 | 6 | from bonobo.constants import EMPTY 7 | from bonobo.util.bags import BagType 8 | from bonobo.util.envelopes import Envelope 9 | from bonobo.util.testing import BufferingNodeExecutionContext 10 | 11 | MyTuple = namedtuple("MyTuple", ["a", "b", "c"]) 12 | MyBag = BagType("MyBag", ["a", "b", "c"]) 13 | 14 | 15 | class MyCustomType: 16 | def __init__(self, *args): 17 | self.args = args 18 | 19 | def as_tuple(self): 20 | return MyBag(*self.args) 21 | 22 | 23 | @pytest.mark.parametrize( 24 | ["factory", "expected", "expected_item0"], 25 | [ 26 | [lambda: (1, 2, 3), tuple, int], 27 | [lambda: Envelope((1, 2, 3)), tuple, int], 28 | [lambda: MyTuple(1, 2, 3), MyTuple, int], 29 | [lambda: Envelope(MyTuple(1, 2, 3)), MyTuple, int], 30 | [lambda: MyBag(1, 2, 3), MyBag, int], 31 | [lambda: Envelope(MyBag(1, 2, 3)), MyBag, int], 32 | [lambda: MyCustomType(1, 2, 3), tuple, MyCustomType], 33 | [lambda: Envelope(MyCustomType(1, 2, 3)), tuple, MyCustomType], 34 | ], 35 | ) 36 | def test_casts_after_output(factory: Callable, expected, expected_item0): 37 | def transform(): 38 | yield factory() 39 | yield factory() 40 | 41 | with BufferingNodeExecutionContext(transform) as context: 42 | context.write_sync(EMPTY) 43 | 44 | result = context.get_buffer() 45 | assert expected == type(result[0]) 46 | assert expected_item0 == type(result[0][0]) 47 | assert expected == type(result[1]) 48 | assert expected_item0 == type(result[1][0]) 49 | 50 | 51 | def test_cast_after_returning_custom_type(): 52 | def transform(): 53 | yield MyCustomType(1, 2, 3) 54 | yield MyCustomType(4, 5, 6) 55 | 56 | with BufferingNodeExecutionContext(transform) as context: 57 | context.write_sync(EMPTY) 58 | result = context.get_buffer() 59 | assert tuple == type(result[0]) 60 | assert tuple == type(result[1]) 61 | assert MyCustomType == type(result[0][0]) 62 | assert MyCustomType == type(result[1][0]) 63 | 64 | with BufferingNodeExecutionContext(MyCustomType.as_tuple) as context: 65 | context.write_sync(*result) 66 | result = context.get_buffer() 67 | assert MyBag == type(result[0]) 68 | assert MyBag == type(result[1]) 69 | -------------------------------------------------------------------------------- /RELEASE-0.6.rst: -------------------------------------------------------------------------------- 1 | Problems 2 | ======== 3 | 4 | Failed to display Jupyter Widget of type BonoboWidget. 5 | If you're reading this message in Jupyter Notebook or JupyterLab, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions. 6 | If you're reading this message in another notebook frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets. 7 | 8 | .. code-block:: shell-session 9 | 10 | $ jupyter nbextension enable --py widgetsnbextension 11 | $ jupyter nbextension install --py --symlink bonobo.contrib.jupyter 12 | $ jupyter nbextension enable --py bonobo.contrib.jupyter 13 | 14 | 15 | Todo 16 | ==== 17 | 18 | * Pretty printer 19 | 20 | 21 | Options for Bags 22 | ================ 23 | 24 | tuple only 25 | 26 | pros : simple 27 | cons : 28 | - how to name columns / store headers ? 29 | - how to return a dictionary 30 | 31 | 32 | 33 | yield keys('foo', 'bar', 'baz') 34 | 35 | 36 | yield 'a', 'b', 'c' 37 | 38 | 39 | CHANGELOG 40 | ========= 41 | 42 | * Bags changed to something way closer to namedtuples. 43 | * Better at managing memory 44 | * Less flexible for kwargs usage, but much more standard and portable from one to another version of python 45 | * More future proof for different execution strategies 46 | * May lead to changes in your current transformation 47 | 48 | * A given transformation now have an input and a output "type" which is either manually set by the user or 49 | detected from the first item sent through a queue. It is a restiction on how bonobo can be used, but 50 | will help having better predicatability. 51 | 52 | * No more "graph" instance detection. This was misleading for new users, and not really pythonic. The 53 | recommended way to start with bonobo is just to use one python file with a __main__ block, and if the 54 | project grows, include this file in a package, either new or existing one. The init cli changed to 55 | help you generate files or packages. That also means that we do not generate things with cookiecutter 56 | anymore. 57 | 58 | * Jupyter enhancements 59 | 60 | * Graphviz support 61 | 62 | * New nodes in stdlib 63 | 64 | * Registry, used for conversions but also for your own integrations. 65 | 66 | 67 | -------------------------------------------------------------------------------- /docs/guide/future/services.rst: -------------------------------------------------------------------------------- 1 | Services 2 | ======== 3 | 4 | .. warning:: 5 | 6 | This is a "future" document, that does not exist, it's only kept here not to lose the data until we organize better 7 | documentation versioning. 8 | 9 | Future and proposals 10 | :::::::::::::::::::: 11 | 12 | This is a first implementation and it will evolve. Base concepts will stay the same though. 13 | 14 | May or may not happen, depending on discussions. 15 | 16 | * Singleton or prototype based injection (to use spring terminology, see 17 | https://www.tutorialspoint.com/spring/spring_bean_scopes.htm), allowing smart factory usage and efficient sharing of 18 | resources. 19 | * Lazily resolved parameters, eventually overriden by command line or environment, so you can for example override the 20 | database DSN or target filesystem on command line (or with shell environment vars). 21 | * Pool based locks that ensure that only one (or n) transformations are using a given service at the same time. 22 | * Simple config implementation, using a python file for config (ex: bonobo run ... --services=services_prod.py). 23 | * Default configuration for services, using an optional callable (`def get_services(args): ...`). Maybe tie default 24 | configuration to graph, but not really a fan because this is unrelated to graph logic. 25 | * Default implementation for a service in a transformation or in the descriptor. Maybe not a good idea, because it 26 | tends to push forward multiple instances of the same thing, but maybe... 27 | 28 | A few ideas on how it can be implemented, from the user perspective. 29 | 30 | .. code-block:: python 31 | 32 | # using call 33 | http = Service('http.client')(requests) 34 | 35 | # using more explicit call 36 | http = Service('http.client').set_default_impl(requests) 37 | 38 | # using a decorator 39 | @Service('http.client') 40 | def http(self, services): 41 | import requests 42 | return requests 43 | 44 | # as a default in a subclass of Service 45 | class HttpService(Service): 46 | def get_default_impl(self, services): 47 | import requests 48 | return requests 49 | 50 | # ... then use it as another service 51 | http = HttpService('http.client') 52 | 53 | 54 | This is under development, let us know what you think (slack may be a good place for this). 55 | The basics already work, and you can try it. 56 | 57 | -------------------------------------------------------------------------------- /bonobo/nodes/io/json.py: -------------------------------------------------------------------------------- 1 | import json 2 | from collections import OrderedDict 3 | 4 | from bonobo.config import Method 5 | from bonobo.config.processors import ContextProcessor, use_context 6 | from bonobo.constants import NOT_MODIFIED 7 | from bonobo.nodes.io.base import FileHandler 8 | from bonobo.nodes.io.file import FileReader, FileWriter 9 | 10 | 11 | class JsonHandler(FileHandler): 12 | eol = ",\n" 13 | prefix, suffix = "[", "]" 14 | 15 | 16 | class LdjsonHandler(FileHandler): 17 | eol = "\n" 18 | prefix, suffix = "", "" 19 | 20 | 21 | class JsonReader(JsonHandler, FileReader): 22 | @Method(positional=False) 23 | def loader(self, file): 24 | return json.loads(file) 25 | 26 | def read(self, file, *, fs): 27 | yield from self.loader(file.read()) 28 | 29 | __call__ = read 30 | 31 | 32 | class LdjsonReader(LdjsonHandler, JsonReader): 33 | """ 34 | Read a stream of line-delimited JSON objects (one object per line). 35 | 36 | Not to be mistaken with JSON-LD (where LD stands for linked data). 37 | 38 | """ 39 | 40 | def read(self, file, *, fs): 41 | yield from map(self.loader, file) 42 | 43 | __call__ = read 44 | 45 | 46 | @use_context 47 | class JsonWriter(JsonHandler, FileWriter): 48 | @ContextProcessor 49 | def envelope(self, context, file, *, fs): 50 | file.write(self.prefix) 51 | yield 52 | file.write(self.suffix) 53 | 54 | def write(self, file, context, *args, fs): 55 | """ 56 | Write a json row on the next line of file pointed by ctx.file. 57 | 58 | :param ctx: 59 | :param row: 60 | """ 61 | context.setdefault("lineno", 0) 62 | fields = context.get_input_fields() 63 | 64 | if fields: 65 | prefix = self.eol if context.lineno else "" 66 | self._write_line(file, prefix + json.dumps(OrderedDict(zip(fields, args)))) 67 | context.lineno += 1 68 | else: 69 | for arg in args: 70 | prefix = self.eol if context.lineno else "" 71 | self._write_line(file, prefix + json.dumps(arg)) 72 | context.lineno += 1 73 | 74 | return NOT_MODIFIED 75 | 76 | __call__ = write 77 | 78 | 79 | @use_context 80 | class LdjsonWriter(LdjsonHandler, JsonWriter): 81 | """ 82 | Write a stream of Line-delimited JSON objects (one object per line). 83 | 84 | Not to be mistaken with JSON-LD (where LD stands for linked data). 85 | 86 | """ 87 | -------------------------------------------------------------------------------- /bonobo/contrib/google/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import httplib2 4 | from apiclient import discovery 5 | from oauth2client import client, tools 6 | from oauth2client.file import Storage 7 | from oauth2client.tools import argparser 8 | 9 | # https://developers.google.com/api-client-library/python/guide/aaa_oauth 10 | # pip install google-api-python-client (1.6.4) 11 | 12 | 13 | HOME_DIR = os.path.expanduser("~") 14 | GOOGLE_SECRETS = os.path.join(HOME_DIR, ".cache/secrets/client_secrets.json") 15 | 16 | 17 | def get_credentials(*, scopes): 18 | """Gets valid user credentials from storage. 19 | 20 | If nothing has been stored, or if the stored credentials are invalid, 21 | the OAuth2 flow is completed to obtain the new credentials. 22 | 23 | Returns: 24 | Credentials, the obtained credential. 25 | """ 26 | credential_dir = os.path.join(HOME_DIR, ".cache", __package__, "credentials") 27 | if not os.path.exists(credential_dir): 28 | os.makedirs(credential_dir) 29 | credential_path = os.path.join(credential_dir, "googleapis.json") 30 | 31 | store = Storage(credential_path) 32 | credentials = store.get() 33 | 34 | # see https://developers.google.com/api-client-library/python/auth/web-app 35 | # kw: "incremental scopes" 36 | if not credentials or credentials.invalid or not credentials.has_scopes(scopes): 37 | flow = client.flow_from_clientsecrets(GOOGLE_SECRETS, scopes) 38 | flow.user_agent = "Bonobo ETL (https://www.bonobo-project.org/)" 39 | flags = argparser.parse_args(["--noauth_local_webserver"]) 40 | credentials = tools.run_flow(flow, store, flags) 41 | print("Storing credentials to " + credential_path) 42 | return credentials 43 | 44 | 45 | def get_google_spreadsheets_api_client(scopes=("https://www.googleapis.com/auth/spreadsheets",)): 46 | credentials = get_credentials(scopes=scopes) 47 | http = credentials.authorize(httplib2.Http()) 48 | discoveryUrl = "https://sheets.googleapis.com/$discovery/rest?version=v4" 49 | return discovery.build("sheets", "v4", http=http, discoveryServiceUrl=discoveryUrl, cache_discovery=False) 50 | 51 | 52 | def get_google_people_api_client(scopes=("https://www.googleapis.com/auth/contacts",)): 53 | credentials = get_credentials(scopes=scopes) 54 | http = credentials.authorize(httplib2.Http()) 55 | discoveryUrl = "https://people.googleapis.com/$discovery/rest?version=v1" 56 | return discovery.build("people", "v1", http=http, discoveryServiceUrl=discoveryUrl, cache_discovery=False) 57 | -------------------------------------------------------------------------------- /bonobo/contrib/jupyter/js/webpack.config.js: -------------------------------------------------------------------------------- 1 | var version = require('./package.json').version; 2 | 3 | // Custom webpack loaders are generally the same for all webpack bundles, hence 4 | // stored in a separate local variable. 5 | var loaders = [ 6 | {test: /\.json$/, loader: 'json-loader'}, 7 | ]; 8 | 9 | 10 | module.exports = [ 11 | { 12 | // Notebook extension 13 | // 14 | // This bundle only contains the part of the JavaScript that is run on 15 | // load of the notebook. This section generally only performs 16 | // some configuration for requirejs, and provides the legacy 17 | // "load_ipython_extension" function which is required for any notebook 18 | // extension. 19 | // 20 | entry: './src/extension.js', 21 | output: { 22 | filename: 'extension.js', 23 | path: '../static', 24 | libraryTarget: 'amd' 25 | } 26 | }, 27 | { 28 | // Bundle for the notebook containing the custom widget views and models 29 | // 30 | // This bundle contains the implementation for the custom widget views and 31 | // custom widget. 32 | // It must be an amd module 33 | // 34 | entry: './src/index.js', 35 | output: { 36 | filename: 'index.js', 37 | path: '../static', 38 | libraryTarget: 'amd' 39 | }, 40 | devtool: 'source-map', 41 | module: { 42 | loaders: loaders 43 | }, 44 | externals: ['jupyter-js-widgets'] 45 | }, 46 | { 47 | // Embeddable jupyter-widget-example bundle 48 | // 49 | // This bundle is generally almost identical to the notebook bundle 50 | // containing the custom widget views and models. 51 | // 52 | // The only difference is in the configuration of the webpack public path 53 | // for the static assets. 54 | // 55 | // It will be automatically distributed by unpkg to work with the static 56 | // widget embedder. 57 | // 58 | // The target bundle is always `dist/index.js`, which is the path required 59 | // by the custom widget embedder. 60 | // 61 | entry: './src/embed.js', 62 | output: { 63 | filename: 'index.js', 64 | path: './dist/', 65 | libraryTarget: 'amd', 66 | publicPath: 'https://unpkg.com/jupyter-widget-example@' + version + '/dist/' 67 | }, 68 | devtool: 'source-map', 69 | module: { 70 | loaders: loaders 71 | }, 72 | externals: ['jupyter-js-widgets'] 73 | } 74 | ]; 75 | -------------------------------------------------------------------------------- /bonobo/commands/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import bonobo 4 | from bonobo.commands import BaseGraphCommand 5 | 6 | 7 | class RunCommand(BaseGraphCommand): 8 | install = False 9 | handler = staticmethod(bonobo.run) 10 | 11 | def add_arguments(self, parser): 12 | super(RunCommand, self).add_arguments(parser) 13 | 14 | verbosity_group = parser.add_mutually_exclusive_group() 15 | verbosity_group.add_argument("--quiet", "-q", action="store_true") 16 | verbosity_group.add_argument("--verbose", "-v", action="store_true") 17 | 18 | parser.add_argument("--install", "-I", action="store_true") 19 | 20 | def parse_options(self, *, quiet=False, verbose=False, install=False, **options): 21 | from bonobo import settings 22 | 23 | settings.QUIET.set_if_true(quiet) 24 | settings.DEBUG.set_if_true(verbose) 25 | self.install = install 26 | return options 27 | 28 | def _run_path(self, file): 29 | # add install logic 30 | if self.install: 31 | if os.path.isdir(file): 32 | requirements = os.path.join(file, "requirements.txt") 33 | else: 34 | requirements = os.path.join(os.path.dirname(file), "requirements.txt") 35 | _install_requirements(requirements) 36 | 37 | return super()._run_path(file) 38 | 39 | def _run_module(self, mod): 40 | # install not implemented for a module, not sure it even make sense. 41 | if self.install: 42 | raise RuntimeError("--install behaviour when running a module is not defined.") 43 | 44 | return super()._run_module(mod) 45 | 46 | 47 | def register_generic_run_arguments(parser, required=True): 48 | """ 49 | Only there for backward compatibility with third party extensions. 50 | TODO: This should be deprecated (using the @deprecated decorator) in 0.7, and removed in 0.8 or 0.9. 51 | """ 52 | dummy_command = BaseGraphCommand() 53 | dummy_command.required = required 54 | dummy_command.add_arguments(parser) 55 | return parser 56 | 57 | 58 | def _install_requirements(requirements): 59 | """Install requirements given a path to requirements.txt file.""" 60 | import importlib 61 | import pip 62 | 63 | pip.main(["install", "-r", requirements]) 64 | # Some shenanigans to be sure everything is importable after this, especially .egg-link files which 65 | # are referenced in *.pth files and apparently loaded by site.py at some magic bootstrap moment of the 66 | # python interpreter. 67 | pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources) 68 | import site 69 | 70 | importlib.reload(site) 71 | -------------------------------------------------------------------------------- /docs/reference/api/bonobo.rst: -------------------------------------------------------------------------------- 1 | :mod:`Bonobo ` 2 | ====================== 3 | 4 | .. currentmodule:: bonobo 5 | 6 | :Module: :mod:`bonobo` 7 | 8 | 9 | .. automodule:: bonobo 10 | :no-members: 11 | 12 | 13 | 14 | Graphs 15 | :::::: 16 | 17 | * :class:`bonobo.structs.graphs.Graph` 18 | 19 | 20 | Nodes 21 | ::::: 22 | 23 | * :class:`bonobo.nodes.CsvReader` 24 | * :class:`bonobo.nodes.CsvWriter` 25 | * :class:`bonobo.nodes.FileReader` 26 | * :class:`bonobo.nodes.FileWriter` 27 | * :class:`bonobo.nodes.Filter` 28 | * :class:`bonobo.nodes.FixedWindow` 29 | * :func:`bonobo.nodes.Format` 30 | * :class:`bonobo.nodes.JsonReader` 31 | * :class:`bonobo.nodes.JsonWriter` 32 | * :class:`bonobo.nodes.LdjsonReader` 33 | * :class:`bonobo.nodes.LdjsonWriter` 34 | * :class:`bonobo.nodes.Limit` 35 | * :func:`bonobo.nodes.MapFields` 36 | * :func:`bonobo.nodes.OrderFields` 37 | * :class:`bonobo.nodes.PickleReader` 38 | * :class:`bonobo.nodes.PickleWriter` 39 | * :class:`bonobo.nodes.PrettyPrinter` 40 | * :class:`bonobo.nodes.RateLimited` 41 | * :func:`bonobo.nodes.Rename` 42 | * :func:`bonobo.nodes.SetFields` 43 | * :func:`bonobo.nodes.Tee` 44 | * :func:`bonobo.nodes.UnpackItems` 45 | * :func:`bonobo.nodes.count` 46 | * :func:`bonobo.nodes.identity` 47 | * :func:`bonobo.nodes.noop` 48 | 49 | 50 | Other top-level APIs 51 | :::::::::::::::::::: 52 | 53 | * :func:`bonobo.create_reader` 54 | * :func:`bonobo.create_strategy` 55 | * :func:`bonobo.create_writer` 56 | * :func:`bonobo.get_argument_parser` 57 | * :func:`bonobo.get_examples_path` 58 | * :func:`bonobo.inspect` 59 | * :func:`bonobo.open_examples_fs` 60 | * :func:`bonobo.open_fs` 61 | * :func:`bonobo.parse_args` 62 | * :func:`bonobo.run` 63 | 64 | 65 | create_reader 66 | ------------- 67 | 68 | .. autofunction:: bonobo.create_reader 69 | 70 | 71 | create_strategy 72 | --------------- 73 | 74 | .. autofunction:: bonobo.create_strategy 75 | 76 | 77 | create_writer 78 | ------------- 79 | 80 | .. autofunction:: bonobo.create_writer 81 | 82 | 83 | get_argument_parser 84 | ------------------- 85 | 86 | .. autofunction:: bonobo.get_argument_parser 87 | 88 | 89 | get_examples_path 90 | ----------------- 91 | 92 | .. autofunction:: bonobo.get_examples_path 93 | 94 | 95 | inspect 96 | ------- 97 | 98 | .. autofunction:: bonobo.inspect 99 | 100 | 101 | open_examples_fs 102 | ---------------- 103 | 104 | .. autofunction:: bonobo.open_examples_fs 105 | 106 | 107 | open_fs 108 | ------- 109 | 110 | .. autofunction:: bonobo.open_fs 111 | 112 | 113 | parse_args 114 | ---------- 115 | 116 | .. autofunction:: bonobo.parse_args 117 | 118 | 119 | run 120 | --- 121 | 122 | .. autofunction:: bonobo.run 123 | 124 | -------------------------------------------------------------------------------- /docs/extension/django.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: bonobo.contrib.django 2 | 3 | Working with Django 4 | =================== 5 | 6 | |bonobo| provides a lightweight integration with django, to allow to include ETL pipelines in your django management 7 | commands. 8 | 9 | Quick start 10 | ::::::::::: 11 | 12 | To write a django management command that runs |bonobo| job(s), just extend :class:`ETLCommand` 13 | instead of :class:`django.core.management.base.BaseCommand`, and override the :meth:`ETLCommand.get_graph` method: 14 | 15 | .. code-block:: python 16 | 17 | import bonobo 18 | from bonobo.contrib.django import ETLCommand 19 | 20 | class Command(ETLCommand): 21 | def get_graph(self, **options): 22 | graph = bonobo.Graph() 23 | graph.add_chain(...) 24 | return graph 25 | 26 | Services 27 | -------- 28 | 29 | You can override :meth:`ETLCommand.get_services` to provide your service implementations. 30 | 31 | One common recipe to do so is to import it from somewhere else and override it as a :obj:`staticmethod`: 32 | 33 | .. code-block:: python 34 | 35 | import bonobo 36 | from bonobo.contrib.django import ETLCommand 37 | 38 | from myproject.services import get_services 39 | 40 | class Command(ETLCommand): 41 | get_services = staticmethod(get_services) 42 | 43 | def get_graph(...): 44 | ... 45 | 46 | 47 | Multiple graphs 48 | --------------- 49 | 50 | The :meth:`ETLCommand.get_graph` method can also be implemented as a generator. In this case, each element yielded must 51 | be a graph, and each graph will be executed in order: 52 | 53 | .. code-block:: python 54 | 55 | import bonobo 56 | from bonobo.contrib.django import ETLCommand 57 | 58 | class Command(ETLCommand): 59 | def get_graph(self, **options): 60 | yield bonobo.Graph(...) 61 | yield bonobo.Graph(...) 62 | yield bonobo.Graph(...) 63 | 64 | This is especially helpful in two major cases: 65 | 66 | * You must ensure that one job is finished before the next is run, and thus you can't add both graph's nodes in the 67 | same graph. 68 | * You want to change which graph is run depending on command line arguments. 69 | 70 | 71 | Command line arguments 72 | ---------------------- 73 | 74 | Like with regular django management commands, you can add arguments to the argument parser by overriding 75 | :meth:`ETLCommand.add_arguments`. 76 | 77 | The only difference with django is that the provided argument parser will already have arguments added to handle 78 | environment. 79 | 80 | 81 | Reference 82 | ::::::::: 83 | 84 | :mod:`bonobo.contrib.django` 85 | ---------------------------- 86 | 87 | .. automodule:: bonobo.contrib.django 88 | 89 | Source code 90 | ::::::::::: 91 | 92 | https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django 93 | 94 | -------------------------------------------------------------------------------- /bin/imgcat: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # tmux requires unrecognized OSC sequences to be wrapped with DCS tmux; 4 | # ST, and for all ESCs in to be replaced with ESC ESC. It 5 | # only accepts ESC backslash for ST. 6 | function print_osc() { 7 | if [[ $TERM == screen* ]] ; then 8 | printf "\033Ptmux;\033\033]" 9 | else 10 | printf "\033]" 11 | fi 12 | } 13 | 14 | # More of the tmux workaround described above. 15 | function print_st() { 16 | if [[ $TERM == screen* ]] ; then 17 | printf "\a\033\\" 18 | else 19 | printf "\a" 20 | fi 21 | } 22 | 23 | # print_image filename inline base64contents print_filename 24 | # filename: Filename to convey to client 25 | # inline: 0 or 1 26 | # base64contents: Base64-encoded contents 27 | # print_filename: If non-empty, print the filename 28 | # before outputting the image 29 | function print_image() { 30 | print_osc 31 | printf '1337;File=' 32 | if [[ -n "$1" ]]; then 33 | printf 'name='`printf "%s" "$1" | base64`";" 34 | fi 35 | 36 | VERSION=$(base64 --version 2>&1) 37 | if [[ "$VERSION" =~ fourmilab ]]; then 38 | BASE64ARG=-d 39 | elif [[ "$VERSION" =~ GNU ]]; then 40 | BASE64ARG=-di 41 | else 42 | BASE64ARG=-D 43 | fi 44 | 45 | printf "%s" "$3" | base64 $BASE64ARG | wc -c | awk '{printf "size=%d",$1}' 46 | printf ";inline=$2" 47 | printf ":" 48 | printf "%s" "$3" 49 | print_st 50 | printf '\n' 51 | if [[ -n "$4" ]]; then 52 | echo $1 53 | fi 54 | } 55 | 56 | function error() { 57 | echo "ERROR: $*" 1>&2 58 | } 59 | 60 | function show_help() { 61 | echo "Usage: imgcat [-p] filename ..." 1>& 2 62 | echo " or: cat filename | imgcat" 1>& 2 63 | } 64 | 65 | ## Main 66 | 67 | if [ -t 0 ]; then 68 | has_stdin=f 69 | else 70 | has_stdin=t 71 | fi 72 | 73 | # Show help if no arguments and no stdin. 74 | if [ $has_stdin = f -a $# -eq 0 ]; then 75 | show_help 76 | exit 77 | fi 78 | 79 | # Look for command line flags. 80 | while [ $# -gt 0 ]; do 81 | case "$1" in 82 | -h|--h|--help) 83 | show_help 84 | exit 85 | ;; 86 | -p|--p|--print) 87 | print_filename=1 88 | ;; 89 | -*) 90 | error "Unknown option flag: $1" 91 | show_help 92 | exit 1 93 | ;; 94 | *) 95 | if [ -r "$1" ] ; then 96 | has_stdin=f 97 | print_image "$1" 1 "$(base64 < "$1")" "$print_filename" 98 | else 99 | error "imgcat: $1: No such file or directory" 100 | exit 2 101 | fi 102 | ;; 103 | esac 104 | shift 105 | done 106 | 107 | # Read and print stdin 108 | if [ $has_stdin = t ]; then 109 | print_image "" 1 "$(cat | base64)" "" 110 | fi 111 | 112 | exit 0 113 | -------------------------------------------------------------------------------- /docs/_templates/alabaster/theme.conf: -------------------------------------------------------------------------------- 1 | [theme] 2 | inherit = basic 3 | stylesheet = alabaster.css 4 | pygments_style = alabaster.support.Alabaster 5 | 6 | [options] 7 | logo = 8 | logo_name = false 9 | logo_text_align = left 10 | description = 11 | description_font_style = normal 12 | github_user = 13 | github_repo = 14 | github_button = true 15 | github_banner = false 16 | github_type = watch 17 | github_count = true 18 | badge_branch = master 19 | travis_button = false 20 | codecov_button = false 21 | gratipay_user = 22 | gittip_user = 23 | analytics_id = 24 | touch_icon = 25 | canonical_url = 26 | extra_nav_links = 27 | sidebar_includehidden = true 28 | sidebar_collapse = true 29 | show_powered_by = true 30 | show_related = false 31 | 32 | gray_1 = #444 33 | gray_2 = #EEE 34 | gray_3 = #AAA 35 | 36 | pink_1 = #FCC 37 | pink_2 = #FAA 38 | pink_3 = #D52C2C 39 | 40 | base_bg = #fff 41 | base_text = #000 42 | hr_border = #B1B4B6 43 | body_bg = 44 | body_text = #3E4349 45 | body_text_align = left 46 | footer_text = #888 47 | link = #004B6B 48 | link_hover = #6D4100 49 | sidebar_header = 50 | sidebar_text = #555 51 | sidebar_link = 52 | sidebar_link_underscore = #999 53 | sidebar_search_button = #CCC 54 | sidebar_list = #000 55 | sidebar_hr = 56 | anchor = #DDD 57 | anchor_hover_fg = 58 | anchor_hover_bg = #EAEAEA 59 | table_border = #888 60 | shadow = 61 | 62 | # Admonition options 63 | ## basic level 64 | admonition_bg = 65 | admonition_border = #CCC 66 | note_bg = 67 | note_border = #CCC 68 | seealso_bg = 69 | seealso_border = #CCC 70 | 71 | ## critical level 72 | danger_bg = 73 | danger_border = 74 | danger_shadow = 75 | error_bg = 76 | error_border = 77 | error_shadow = 78 | 79 | ## normal level 80 | tip_bg = 81 | tip_border = #CCC 82 | hint_bg = 83 | hint_border = #CCC 84 | important_bg = 85 | important_border = #CCC 86 | 87 | ## warning level 88 | caution_bg = 89 | caution_border = 90 | attention_bg = 91 | attention_border = 92 | warn_bg = 93 | warn_border = 94 | 95 | topic_bg = 96 | code_highlight_bg = 97 | highlight_bg = #FAF3E8 98 | xref_border = #fff 99 | xref_bg = #FBFBFB 100 | admonition_xref_border = #fafafa 101 | admonition_xref_bg = 102 | footnote_bg = #FDFDFD 103 | footnote_border = 104 | pre_bg = 105 | narrow_sidebar_bg = #333 106 | narrow_sidebar_fg = #FFF 107 | narrow_sidebar_link = 108 | font_size = 17px 109 | caption_font_size = inherit 110 | viewcode_target_bg = #ffd 111 | code_bg = #ecf0f3 112 | code_text = #222 113 | code_hover = #EEE 114 | code_font_size = 0.9em 115 | code_font_family = 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace 116 | font_family = 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif 117 | head_font_family = 'Garamond', 'Georgia', serif 118 | caption_font_family = inherit 119 | code_highlight = #FFC 120 | page_width = 940px 121 | sidebar_width = 220px 122 | fixed_sidebar = false 123 | -------------------------------------------------------------------------------- /docs/guide/future/transformations.rst: -------------------------------------------------------------------------------- 1 | Transformations 2 | =============== 3 | 4 | .. warning:: 5 | 6 | This is a "future" document, that does not exist, it's only kept here not to lose the data until we organize better 7 | documentation versioning. 8 | 9 | 10 | Output 11 | ------ 12 | 13 | Let's see the rules (first to match wins). 14 | 15 | 1. A flag, eventually followed by something else, marks a special behaviour. If it supports it, the remaining part of 16 | the output line will be interpreted using the same rules, and some flags can be combined. 17 | 18 | **NOT_MODIFIED** 19 | 20 | **NOT_MODIFIED** tells bonobo to use the input row unmodified as the output. 21 | 22 | *CANNOT be combined* 23 | 24 | Example: 25 | 26 | .. code-block:: python 27 | 28 | from bonobo import NOT_MODIFIED 29 | 30 | def output_will_be_same_as_input(*args, **kwargs): 31 | yield NOT_MODIFIED 32 | 33 | 2. Once all flags are "consumed", the remaining part is interpreted. 34 | 35 | * If it is a :class:`bonobo.Bag` instance, then it's used directly. 36 | * If it is a :class:`dict` then a kwargs-only :class:`bonobo.Bag` will be created. 37 | * If it is a :class:`tuple` then an args-only :class:`bonobo.Bag` will be created, unless its last argument is a 38 | :class:`dict` in which case a args+kwargs :class:`bonobo.Bag` will be created. 39 | * If it's something else, it will be used to create a one-arg-only :class:`bonobo.Bag`. 40 | 41 | **APPEND** 42 | 43 | **APPEND** tells bonobo to append this output to the input (positional arguments will equal `input_args + output_args`, 44 | keyword arguments will equal `{**input_kwargs, **output_kwargs}`). 45 | 46 | *CAN be combined, but not with itself* 47 | 48 | .. code-block:: python 49 | 50 | from bonobo import APPEND 51 | 52 | def output_will_be_appended_to_input(*args, **kwargs): 53 | yield APPEND, 'foo', 'bar', {'eat_at': 'joe'} 54 | 55 | **LOOPBACK** 56 | 57 | **LOOPBACK** tells bonobo that this output must be looped back into our own input queue, allowing to create the stream 58 | processing version of recursive algorithms. 59 | 60 | *CAN be combined, but not with itself* 61 | 62 | .. code-block:: python 63 | 64 | from bonobo import LOOPBACK 65 | 66 | def output_will_be_sent_to_self(*args, **kwargs): 67 | yield LOOPBACK, 'Hello, I am the future "you".' 68 | 69 | **CHANNEL(...)** 70 | 71 | **CHANNEL(...)** tells bonobo that this output does not use the default channel and is routed through another path. 72 | This is something you should probably not use unless your data flow design is complex, and if you're not certain 73 | about it, it probably means that it is not the feature you're looking for. 74 | 75 | *CAN be combined, but not with itself* 76 | 77 | .. code-block:: python 78 | 79 | from bonobo import CHANNEL 80 | 81 | def output_will_be_sent_to_self(*args, **kwargs): 82 | yield CHANNEL("errors"), 'That is not cool.' 83 | 84 | -------------------------------------------------------------------------------- /Projectfile: -------------------------------------------------------------------------------- 1 | # bonobo's description for medikit 2 | 3 | from medikit import require 4 | 5 | make = require('make') 6 | pytest = require('pytest') 7 | python = require('python') 8 | sphinx = require('sphinx') 9 | 10 | python.setup( 11 | name='bonobo', 12 | python_requires='>=3.5', 13 | description='Bonobo, a simple, modern and atomic extract-transform-load toolkit for python 3.5+.', 14 | license='Apache License, Version 2.0', 15 | url='https://www.bonobo-project.org/', 16 | download_url='https://github.com/python-bonobo/bonobo/tarball/{version}', 17 | author='Romain Dorgueil', 18 | author_email='romain@dorgueil.net', 19 | data_files=[ 20 | ( 21 | 'share/jupyter/nbextensions/bonobo-jupyter', [ 22 | 'bonobo/contrib/jupyter/static/extension.js', 23 | 'bonobo/contrib/jupyter/static/index.js', 24 | 'bonobo/contrib/jupyter/static/index.js.map', 25 | ] 26 | ), 27 | ], 28 | entry_points={ 29 | 'console_scripts': [ 30 | 'bonobo = bonobo.commands:entrypoint', 31 | ], 32 | 'bonobo.commands': [ 33 | 'convert = bonobo.commands.convert:ConvertCommand', 34 | 'download = bonobo.commands.download:DownloadCommand', 35 | 'examples = bonobo.commands.examples:ExamplesCommand', 36 | 'init = bonobo.commands.init:InitCommand', 37 | 'inspect = bonobo.commands.inspect:InspectCommand', 38 | 'run = bonobo.commands.run:RunCommand', 39 | 'version = bonobo.commands.version:VersionCommand', 40 | ], 41 | } 42 | ) 43 | 44 | python.add_requirements( 45 | 'cached-property ~=1.4', 46 | 'fs ~=2.0', 47 | 'graphviz >=0.8,<0.9', 48 | 'jinja2 ~=2.9', 49 | 'mondrian ~=0.8', 50 | 'packaging ~=19.0', 51 | 'psutil ~=5.4', 52 | 'python-slugify ~=1.2.0', 53 | 'requests ~=2.0', 54 | 'stevedore ~=1.27', 55 | 'whistle ~=1.0', 56 | dev=[ 57 | 'cookiecutter >=1.5,<1.6', 58 | 'pytest-timeout >=1,<2', 59 | 'sphinx-sitemap >=0.2,<0.3', 60 | ], 61 | docker=[ 62 | 'bonobo-docker ~=0.6.0a1', 63 | ], 64 | jupyter=[ 65 | 'ipywidgets ~=6.0', 66 | 'jupyter ~=1.0', 67 | ], 68 | sqlalchemy=[ 69 | 'bonobo-sqlalchemy ~=0.6.0a1', 70 | ], 71 | ) 72 | 73 | 74 | @listen(make.on_generate) 75 | def on_make_generate(event): 76 | makefile = event.makefile 77 | 78 | # Sphinx 79 | makefile['SPHINX_AUTOBUILD'] = '$(PYTHON_DIRNAME)/sphinx-autobuild' 80 | makefile.add_target( 81 | 'watch-$(SPHINX_SOURCEDIR)', 82 | '$(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d)', 83 | phony=True 84 | ) 85 | 86 | # Formating 87 | makefile.add_target( 88 | 'format', 89 | ''' 90 | black -l 120 . 91 | isort -rc -o mondrian -o whistle -y . 92 | ''', 93 | phony=True, 94 | doc='Reformats the whole codebase using our standards (requires black and isort).' 95 | ) 96 | 97 | 98 | 99 | 100 | 101 | # vim: ft=python: 102 | -------------------------------------------------------------------------------- /bonobo/contrib/django/commands.py: -------------------------------------------------------------------------------- 1 | from logging import getLogger 2 | from types import GeneratorType 3 | 4 | from colorama import Back, Fore, Style 5 | from mondrian import term 6 | 7 | import bonobo 8 | from bonobo.plugins.console import ConsoleOutputPlugin 9 | from bonobo.util.term import CLEAR_EOL 10 | from django.core.management import BaseCommand 11 | from django.core.management.base import OutputWrapper 12 | 13 | from .utils import create_or_update 14 | 15 | 16 | class ETLCommand(BaseCommand): 17 | @property 18 | def logger(self): 19 | try: 20 | return self._logger 21 | except AttributeError: 22 | self._logger = getLogger(type(self).__module__) 23 | return self._logger 24 | 25 | create_or_update = staticmethod(create_or_update) 26 | 27 | def create_parser(self, prog_name, subcommand): 28 | return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand)) 29 | 30 | def add_arguments(self, parser): 31 | """ 32 | Entry point for subclassed commands to add custom arguments. 33 | """ 34 | pass 35 | 36 | def get_graph(self, *args, **options): 37 | def not_implemented(): 38 | raise NotImplementedError("You must implement {}.get_graph() method.".format(self)) 39 | 40 | return bonobo.Graph(not_implemented) 41 | 42 | def get_services(self): 43 | return {} 44 | 45 | def get_strategy(self): 46 | return None 47 | 48 | def info(self, *args, **kwargs): 49 | self.logger.info(*args, **kwargs) 50 | 51 | def run(self, *args, **options): 52 | results = [] 53 | with bonobo.parse_args(options) as options: 54 | services = self.get_services() 55 | strategy = self.get_strategy() 56 | graph_coll = self.get_graph(*args, **options) 57 | 58 | if not isinstance(graph_coll, GeneratorType): 59 | graph_coll = (graph_coll,) 60 | 61 | for i, graph in enumerate(graph_coll): 62 | if not isinstance(graph, bonobo.Graph): 63 | raise ValueError("Expected a Graph instance, got {!r}.".format(graph)) 64 | print(term.lightwhite("{}. {}".format(i + 1, graph.name or repr(graph).strip("<>")))) 65 | result = bonobo.run(graph, services=services, strategy=strategy) 66 | results.append(result) 67 | for node in result.nodes: 68 | print(node.get_statistics_as_string(), node.get_flags_as_string()) 69 | print(term.lightblack(" ... return value: " + str(result))) 70 | 71 | return results 72 | 73 | def handle(self, *args, **options): 74 | _stdout_backup, _stderr_backup = self.stdout, self.stderr 75 | 76 | self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + "\n") 77 | self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + "\n") 78 | self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + "!" + Style.RESET_ALL + " " + x 79 | 80 | try: 81 | return self.run(*args, **options) 82 | finally: 83 | self.stdout, self.stderr = _stdout_backup, _stderr_backup 84 | -------------------------------------------------------------------------------- /bonobo/nodes/io/file.py: -------------------------------------------------------------------------------- 1 | from bonobo.config import ContextProcessor, Option, use_context 2 | from bonobo.constants import NOT_MODIFIED 3 | from bonobo.errors import UnrecoverableError 4 | from bonobo.nodes.io.base import FileHandler, Reader, Writer 5 | from bonobo.util import ensure_tuple 6 | 7 | 8 | class FileReader(Reader, FileHandler): 9 | """Component factory for file-like readers. 10 | 11 | On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if 12 | present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.) 13 | """ 14 | 15 | mode = Option( 16 | str, 17 | default="r", 18 | __doc__=""" 19 | What mode to use for open() call. 20 | """, 21 | ) # type: str 22 | 23 | output_fields = Option( 24 | ensure_tuple, 25 | required=False, 26 | __doc__=""" 27 | Specify the field names of output lines. 28 | Mutually exclusive with "output_type". 29 | """, 30 | ) 31 | output_type = Option( 32 | required=False, 33 | __doc__=""" 34 | Specify the type of output lines. 35 | Mutually exclusive with "output_fields". 36 | """, 37 | ) 38 | 39 | @ContextProcessor 40 | def output(self, context, *args, **kwargs): 41 | """ 42 | Allow all readers to use eventually use output_fields XOR output_type options. 43 | 44 | """ 45 | 46 | output_fields = self.output_fields 47 | output_type = self.output_type 48 | 49 | if output_fields and output_type: 50 | raise UnrecoverableError("Cannot specify both output_fields and output_type option.") 51 | 52 | if self.output_type: 53 | context.set_output_type(self.output_type) 54 | 55 | if self.output_fields: 56 | context.set_output_fields(self.output_fields) 57 | 58 | yield 59 | 60 | def read(self, file, *, fs): 61 | """ 62 | Write a row on the next line of given file. 63 | Prefix is used for newlines. 64 | """ 65 | for line in file: 66 | yield line.rstrip(self.eol) 67 | 68 | __call__ = read 69 | 70 | 71 | @use_context 72 | class FileWriter(Writer, FileHandler): 73 | """Component factory for file or file-like writers. 74 | 75 | On its own, it can be used to write in a file one line per row that comes into this component. Extending it is 76 | usually the right way to create more specific file writers (like json, csv, etc.) 77 | """ 78 | 79 | mode = Option( 80 | str, 81 | default="w+", 82 | __doc__=""" 83 | What mode to use for open() call. 84 | """, 85 | ) # type: str 86 | 87 | def write(self, file, context, line, *, fs): 88 | """ 89 | Write a row on the next line of opened file in context. 90 | """ 91 | context.setdefault("lineno", 0) 92 | self._write_line(file, (self.eol if context.lineno else "") + line) 93 | context.lineno += 1 94 | return NOT_MODIFIED 95 | 96 | def _write_line(self, file, line): 97 | return file.write(line) 98 | 99 | __call__ = write 100 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========== 2 | 🐵 bonobo 3 | ========== 4 | 5 | Data-processing for humans. 6 | 7 | .. image:: https://img.shields.io/pypi/v/bonobo.svg 8 | :target: https://pypi.python.org/pypi/bonobo 9 | :alt: PyPI 10 | 11 | .. image:: https://img.shields.io/pypi/pyversions/bonobo.svg 12 | :target: https://pypi.python.org/pypi/bonobo 13 | :alt: Versions 14 | 15 | .. image:: https://readthedocs.org/projects/bonobo/badge/?version=master 16 | :target: http://docs.bonobo-project.org/ 17 | :alt: Documentation 18 | 19 | .. image:: https://travis-ci.org/python-bonobo/bonobo.svg?branch=master 20 | :target: https://travis-ci.org/python-bonobo/bonobo 21 | :alt: Continuous Integration (Linux) 22 | 23 | .. image:: https://ci.appveyor.com/api/projects/status/github/python-bonobo/bonobo?retina=true&branch=master&svg=true 24 | :target: https://ci.appveyor.com/project/hartym/bonobo?branch=master 25 | :alt: Continuous Integration (Windows) 26 | 27 | .. image:: https://codeclimate.com/github/python-bonobo/bonobo/badges/gpa.svg 28 | :target: https://codeclimate.com/github/python-bonobo/bonobo 29 | :alt: Code Climate 30 | 31 | .. image:: https://img.shields.io/coveralls/python-bonobo/bonobo/master.svg 32 | :target: https://coveralls.io/github/python-bonobo/bonobo?branch=master 33 | :alt: Coverage 34 | 35 | Bonobo is an extract-transform-load framework for python 3.5+ (see comparisons with other data tools). 36 | 37 | Bonobo uses plain old python objects (functions, generators and iterators), allows them to be linked together in a directed graph, and then executed using a parallelized strategy, without having to worry about the underlying complexity. 38 | 39 | Developers can focus on writing simple and atomic operations, that are easy to unit-test by-design, while the focus of the 40 | framework is to apply them concurrently to rows of data. 41 | 42 | One thing to note: write pure transformations and you'll be safe. 43 | 44 | Bonobo is a young rewrite of an old python2.7 tool that ran millions of transformations per day for years on production. 45 | Although it may not yet be complete or fully stable (please, allow us to reach 1.0), the basics are there. 46 | 47 | ---- 48 | 49 | *Bonobo is under heavy development, we're doing our best to keep the core as stable as possible while still moving forward. Please allow us to reach 1.0 stability and our sincere apologies for anything we break in the process (feel free to complain on issues, allowing us to correct breakages we did not expect)* 50 | 51 | ---- 52 | 53 | Homepage: https://www.bonobo-project.org/ (`Roadmap `_) 54 | 55 | Documentation: http://docs.bonobo-project.org/ 56 | 57 | Contributing guide: http://docs.bonobo-project.org/en/latest/contribute/index.html 58 | 59 | Issues: https://github.com/python-bonobo/bonobo/issues 60 | 61 | Slack: https://bonobo-slack.herokuapp.com/ 62 | 63 | Release announcements: http://eepurl.com/csHFKL 64 | 65 | ---- 66 | 67 | Made with ♥ by `Romain Dorgueil `_ and `contributors `_. 68 | 69 | .. image:: https://img.shields.io/pypi/l/bonobo.svg 70 | :target: https://pypi.python.org/pypi/bonobo 71 | :alt: License 72 | 73 | 74 | -------------------------------------------------------------------------------- /bonobo/commands/init.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from jinja2 import Environment, FileSystemLoader 4 | from mondrian import humanizer 5 | 6 | from bonobo.commands import BaseCommand 7 | 8 | 9 | class InitCommand(BaseCommand): 10 | TEMPLATES = {"bare", "default"} 11 | TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), "templates") 12 | 13 | def add_arguments(self, parser): 14 | parser.add_argument("filename") 15 | parser.add_argument("--force", "-f", default=False, action="store_true") 16 | 17 | target_group = parser.add_mutually_exclusive_group(required=False) 18 | target_group.add_argument("--template", "-t", choices=self.TEMPLATES, default="default") 19 | target_group.add_argument("--package", "-p", action="store_true", default=False) 20 | 21 | def create_file_from_template(self, *, template, filename): 22 | template_name = template 23 | name, ext = os.path.splitext(filename) 24 | if ext != ".py": 25 | raise ValueError('Filenames should end with ".py".') 26 | 27 | loader = FileSystemLoader(self.TEMPLATES_PATH) 28 | env = Environment(loader=loader) 29 | template = env.get_template(template_name + ".py-tpl") 30 | 31 | with open(filename, "w+") as f: 32 | f.write(template.render(name=name)) 33 | 34 | print(humanizer.Success("Generated {} using template {!r}.".format(filename, template_name))) 35 | 36 | def create_package(self, *, filename): 37 | _, ext = os.path.splitext(filename) 38 | if ext != "": 39 | raise ValueError("Package names should not have an extension.") 40 | 41 | try: 42 | import medikit.commands 43 | except ImportError as exc: 44 | raise ImportError( 45 | "To initialize a package, you need to install medikit (pip install --upgrade medikit)." 46 | ) from exc 47 | 48 | package_name = os.path.basename(filename) 49 | medikit.commands.handle_init( 50 | os.path.join(os.getcwd(), filename, "Projectfile"), name=package_name, requirements=["bonobo"] 51 | ) 52 | 53 | self.logger.info('Generated "{}" package with medikit.'.format(package_name)) 54 | self.create_file_from_template(template="default", filename=os.path.join(filename, package_name, "__main__.py")) 55 | 56 | print( 57 | humanizer.Success( 58 | 'Package "{}" has been created.'.format(package_name), 59 | "", 60 | "Install it...", 61 | "", 62 | " $ `pip install --editable {}`".format(filename), 63 | "", 64 | "Then maybe run the example...", 65 | "", 66 | " $ `python -m {}`".format(package_name), 67 | "", 68 | "Enjoy!", 69 | ) 70 | ) 71 | 72 | @humanizer.humanize() 73 | def handle(self, *, template, filename, package=False, force=False): 74 | if os.path.exists(filename) and not force: 75 | raise FileExistsError("Target filename already exists, use --force to override.") 76 | 77 | if package: 78 | self.create_package(filename=filename) 79 | else: 80 | self.create_file_from_template(template=template, filename=filename) 81 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at bonobo@rdc.li. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /docs/extension/sqlalchemy.rst: -------------------------------------------------------------------------------- 1 | .. currentmodule:: bonobo_sqlalchemy 2 | 3 | Working with SQLAlchemy 4 | ======================= 5 | 6 | .. include:: _beta.rst 7 | 8 | Read the introduction: https://www.bonobo-project.org/with/sqlalchemy 9 | 10 | Installation 11 | :::::::::::: 12 | 13 | To install the extension, use the `sqlalchemy` extra: 14 | 15 | .. code-block:: shell-session 16 | 17 | $ pip install bonobo[sqlalchemy] 18 | 19 | .. note:: You can install more than one extra at a time separating the names with commas. 20 | 21 | Overview and examples 22 | ::::::::::::::::::::: 23 | 24 | First, you'll need a database connection (:obj:`sqlalchemy.engine.Engine` instance), that must be provided as a service. 25 | 26 | .. code-block:: python 27 | 28 | import sqlalchemy 29 | 30 | def get_services(): 31 | return { 32 | 'sqlalchemy.engine': sqlalchemy.create_engine(...) 33 | } 34 | 35 | The `sqlalchemy.engine` name is the default name used by the provided transformations, but you can override it (for 36 | example if you need more than one connection) and specify the service name using `engine='myengine'` while building your 37 | transformations. 38 | 39 | Lets create some tables and add some data. (You may need to edit the SQL if your database server uses a different 40 | version of SQL.) 41 | 42 | .. code-block:: sql 43 | 44 | CREATE TABLE test_in ( 45 | id INTEGER PRIMARY KEY NOT NULL, 46 | text TEXT 47 | ); 48 | 49 | CREATE TABLE test_out ( 50 | id INTEGER PRIMARY KEY NOT NULL, 51 | text TEXT 52 | ); 53 | 54 | INSERT INTO test_in (id, text) VALUES (1, 'Cat'); 55 | INSERT INTO test_in (id, text) VALUES (2, 'Dog'); 56 | 57 | 58 | There are two transformation classes provided by this extension. 59 | 60 | One reader, one writer. 61 | 62 | Let's select some data: 63 | 64 | .. code-block:: python 65 | 66 | import bonobo 67 | import bonobo_sqlalchemy 68 | 69 | def get_graph(): 70 | graph = bonobo.Graph() 71 | graph.add_chain( 72 | bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100), 73 | bonobo.PrettyPrinter(), 74 | ) 75 | return graph 76 | 77 | You should see: 78 | 79 | .. code-block:: shell-session 80 | 81 | $ python tutorial.py 82 | ┌ 83 | │ id[0] = 1 84 | │ text[1] = 'Cat' 85 | └ 86 | ┌ 87 | │ id[0] = 2 88 | │ text[1] = 'Dog' 89 | └ 90 | - Select in=1 out=2 [done] 91 | - PrettyPrinter in=2 out=2 [done] 92 | 93 | 94 | Now let's insert some data: 95 | 96 | .. code-block:: python 97 | 98 | import bonobo 99 | import bonobo_sqlalchemy 100 | 101 | 102 | def get_graph(**options): 103 | graph = bonobo.Graph() 104 | graph.add_chain( 105 | bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100), 106 | bonobo_sqlalchemy.InsertOrUpdate('test_out') 107 | ) 108 | 109 | return graph 110 | 111 | If you check the `test_out` table, it should now have the data. 112 | 113 | Reference 114 | ::::::::: 115 | 116 | :mod:`bonobo_sqlalchemy` 117 | ------------------------ 118 | 119 | .. automodule:: bonobo_sqlalchemy 120 | 121 | Source code 122 | ::::::::::: 123 | 124 | https://github.com/python-bonobo/bonobo-sqlalchemy 125 | 126 | --------------------------------------------------------------------------------