├── pytest.ini
├── tests
    ├── .gitkeep
    ├── util
    │   ├── requireable
    │   │   └── dummy.py
    │   ├── test_statistics.py
    │   ├── test_compat.py
    │   ├── test_resolvers.py
    │   └── test_collections.py
    ├── structs
    │   ├── test_tokens.py
    │   └── test_inputs.py
    ├── nodes
    │   ├── io
    │   │   ├── test_io_base.py
    │   │   ├── test_pickle.py
    │   │   └── test_file.py
    │   └── test_casts.py
    ├── test_publicapi.py
    ├── test_basicusage.py
    ├── commands
    │   ├── test_convert.py
    │   ├── test_version.py
    │   ├── test_clibasics.py
    │   ├── test_init.py
    │   ├── test_download.py
    │   └── test_run.py
    ├── features
    │   ├── test_not_modified.py
    │   └── test_inherit.py
    ├── test_registry.py
    ├── execution
    │   ├── test_events.py
    │   └── contexts
    │   │   └── test_execution_contexts_graph.py
    ├── ext
    │   └── test_ods.py
    ├── plugins
    │   └── test_console.py
    ├── config
    │   ├── test_processors.py
    │   └── test_methods_partial.py
    ├── test_execution.py
    ├── test_settings.py
    └── examples
    │   └── test_example_change_some_fields.py
├── bonobo
    ├── contrib
    │   ├── __init__.py
    │   ├── jupyter
    │   │   ├── js
    │   │   │   ├── .gitignore
    │   │   │   ├── README.rst
    │   │   │   ├── src
    │   │   │   │   ├── embed.js
    │   │   │   │   ├── index.js
    │   │   │   │   ├── extension.js
    │   │   │   │   └── bonobo.js
    │   │   │   ├── package.json
    │   │   │   └── webpack.config.js
    │   │   ├── __init__.py
    │   │   ├── widget.py
    │   │   └── static
    │   │   │   └── extension.js
    │   ├── django
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── commands.py
    │   ├── opendatasoft
    │   │   └── __init__.py
    │   └── google
    │   │   └── __init__.py
    ├── structs
    │   ├── __init__.py
    │   └── tokens.py
    ├── examples
    │   ├── files
    │   │   ├── __init__.py
    │   │   ├── services.py
    │   │   ├── csv_handlers.py
    │   │   ├── text_handlers.py
    │   │   ├── json_handlers.py
    │   │   └── pickle_handlers.py
    │   ├── types
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   └── strings.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── static
    │   │   │   ├── spam.tgz
    │   │   │   ├── Makefile
    │   │   │   └── passwd.txt
    │   │   ├── coffeeshops.py
    │   │   ├── fablabs.py
    │   │   └── __main__.py
    │   ├── .style.yapf
    │   ├── __main__.py
    │   ├── empty.py
    │   ├── clock.py
    │   ├── environ.py
    │   └── __init__.py
    ├── _version.py
    ├── util
    │   ├── term.py
    │   ├── pkgs.py
    │   ├── envelopes.py
    │   ├── __init__.py
    │   ├── compat.py
    │   ├── statistics.py
    │   ├── api.py
    │   ├── resolvers.py
    │   └── errors.py
    ├── __main__.py
    ├── plugins
    │   ├── sentry.py
    │   ├── __init__.py
    │   └── jupyter.py
    ├── execution
    │   ├── __init__.py
    │   ├── strategies
    │   │   ├── naive.py
    │   │   ├── base.py
    │   │   └── __init__.py
    │   ├── contexts
    │   │   ├── plugin.py
    │   │   └── __init__.py
    │   └── events.py
    ├── commands
    │   ├── templates
    │   │   ├── bare.py-tpl
    │   │   └── default.py-tpl
    │   ├── inspect.py
    │   ├── examples.py
    │   ├── download.py
    │   ├── version.py
    │   ├── __init__.py
    │   ├── run.py
    │   └── init.py
    ├── nodes
    │   ├── io
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── pickle.py
    │   │   ├── json.py
    │   │   └── file.py
    │   ├── aggregation.py
    │   ├── __init__.py
    │   ├── filter.py
    │   └── throttle.py
    ├── config
    │   ├── functools.py
    │   └── __init__.py
    ├── constants.py
    ├── __init__.py
    └── errors.py
├── docs
    ├── guide
    │   ├── packaging.rst
    │   ├── plugins.rst
    │   ├── index.rst
    │   ├── _next.rst
    │   ├── debugging.rst
    │   └── future
    │   │   ├── services.rst
    │   │   └── transformations.rst
    ├── _static
    │   ├── graphs.css
    │   ├── bonobo.png
    │   └── custom.css
    ├── genindex.rst
    ├── _templates
    │   ├── alabaster
    │   │   ├── static
    │   │   │   └── custom.css
    │   │   ├── _version.py
    │   │   ├── donate.html
    │   │   ├── navigation.html
    │   │   ├── relations.html
    │   │   ├── __init__.py
    │   │   ├── about.html
    │   │   └── theme.conf
    │   ├── layout.html
    │   ├── sidebarlogo.html
    │   ├── sidebarinfos.html
    │   ├── sidebarintro.html
    │   └── base.html
    ├── tutorial
    │   ├── _todo.rst
    │   ├── _wip_note.rst
    │   └── index.rst
    ├── extension
    │   ├── _beta.rst
    │   ├── docker.rst
    │   ├── _alpha.rst
    │   ├── index.rst
    │   ├── selenium.rst
    │   ├── jupyter.rst
    │   ├── django.rst
    │   └── sqlalchemy.rst
    ├── reference
    │   ├── api
    │   │   ├── bonobo
    │   │   │   ├── util.rst
    │   │   │   ├── nodes.rst
    │   │   │   ├── config.rst
    │   │   │   ├── execution.rst
    │   │   │   ├── constants.rst
    │   │   │   ├── structs
    │   │   │   │   └── graphs.rst
    │   │   │   └── execution
    │   │   │   │   ├── events.rst
    │   │   │   │   ├── contexts.rst
    │   │   │   │   └── strategies.rst
    │   │   └── bonobo.rst
    │   ├── index.rst
    │   ├── commands.rst
    │   ├── examples.rst
    │   └── settings.rst
    ├── index.rst
    ├── Makefile
    ├── make.bat
    ├── contribute
    │   └── release.rst
    └── history.rst
├── CONTRIBUTING.md
├── MANIFEST.in
├── setup.cfg
├── readthedocs.yml
├── bin
    ├── test_graph
    ├── run_all_examples.sh
    └── imgcat
├── .isort.cfg
├── .codacy.yml
├── CREDITS.rst
├── .landscape.yml
├── .editorconfig
├── .travis.yml
├── .style.yapf
├── classifiers.txt
├── requirements.txt
├── requirements-sqlalchemy.txt
├── requirements-docker.txt
├── .gitignore
├── wercker.yml
├── .coveragerc
├── requirements-dev.txt
├── readthedocs-conda.yml
├── requirements-jupyter.txt
├── .github
    └── ISSUE_TEMPLATE.md
├── benchmarks
    ├── parameters.py
    └── person.json
├── RELEASE-0.6.rst
├── Projectfile
├── README.rst
└── CODE_OF_CONDUCT.md


/pytest.ini:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bonobo/contrib/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bonobo/structs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/guide/packaging.rst:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bonobo/examples/files/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bonobo/examples/types/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/_static/graphs.css:
--------------------------------------------------------------------------------
1 | .node {
2 | }
3 | 


--------------------------------------------------------------------------------
/bonobo/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.7.0rc2'
2 | 


--------------------------------------------------------------------------------
/tests/util/requireable/dummy.py:
--------------------------------------------------------------------------------
1 | foo = "bar"
2 | 


--------------------------------------------------------------------------------
/docs/genindex.rst:
--------------------------------------------------------------------------------
1 | Full Index
2 | ==========
3 | 
4 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/.gitignore:
--------------------------------------------------------------------------------
1 | /node_modules
2 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | See http://docs.bonobo-project.org/en/latest/contribute/index.html
2 | 


--------------------------------------------------------------------------------
/bonobo/util/term.py:
--------------------------------------------------------------------------------
1 | CLEAR_EOL = "\033[0K"
2 | MOVE_CURSOR_UP = "\033[{}A".format
3 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/static/custom.css:
--------------------------------------------------------------------------------
1 | /* This file intentionally left blank. */
2 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.txt
2 | include bonobo/bonobo.svg
3 | recursive-include bonobo *.py-tpl
4 | 


--------------------------------------------------------------------------------
/docs/_static/bonobo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/variable/bonobo/develop/docs/_static/bonobo.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.rst
3 | 
4 | [bdist_wheel]
5 | universal = 1
6 | 
7 | 


--------------------------------------------------------------------------------
/bonobo/__main__.py:
--------------------------------------------------------------------------------
1 | from bonobo.commands import entrypoint
2 | 
3 | if __name__ == "__main__":
4 |     entrypoint()
5 | 


--------------------------------------------------------------------------------
/bonobo/plugins/sentry.py:
--------------------------------------------------------------------------------
1 | from bonobo.plugins import Plugin
2 | 
3 | 
4 | class SentryPlugin(Plugin):
5 |     pass
6 | 


--------------------------------------------------------------------------------
/docs/tutorial/_todo.rst:
--------------------------------------------------------------------------------
1 | .. warning::
2 | 
3 |     This section is missing. Sorry, but stay tuned! It'll be added soon.


--------------------------------------------------------------------------------
/bonobo/examples/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | based_on_style = pep8
3 | column_limit = 74
4 | dedent_closing_brackets = true
5 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/_version.py:
--------------------------------------------------------------------------------
1 | __version_info__ = (0, 7, 10)
2 | __version__ = ".".join(map(str, __version_info__))
3 | 


--------------------------------------------------------------------------------
/readthedocs.yml:
--------------------------------------------------------------------------------
1 | conda:
2 |   file: readthedocs-conda.yml
3 | python:
4 |   extra_requirements: [ dev, docker, sqlalchemy ]
5 | 
6 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/static/spam.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/variable/bonobo/develop/bonobo/examples/datasets/static/spam.tgz


--------------------------------------------------------------------------------
/bin/test_graph:
--------------------------------------------------------------------------------
1 | bonobo inspect --graph bonobo/examples/tutorials/tut02e03_writeasmap.py | dot -o test_output.png -T png && bin/imgcat test_output.png
2 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {%- extends "base.html" %}
2 | 
3 | {%- block content %}
4 | {{ relbar() }}
5 | {{ super() }}
6 | {%- endblock %}
7 | 
8 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | line_length=120
3 | indent='    '
4 | multi_line_output=5
5 | known_first_party=bonobo
6 | known_third_party=mondrian,whistle
7 | 


--------------------------------------------------------------------------------
/tests/structs/test_tokens.py:
--------------------------------------------------------------------------------
1 | from bonobo.structs.tokens import Token
2 | 
3 | 
4 | def test_token_repr():
5 |     t = Token("Acme")
6 |     assert repr(t) == "<Acme>"
7 | 


--------------------------------------------------------------------------------
/bonobo/examples/__main__.py:
--------------------------------------------------------------------------------
1 | if __name__ == "__main__":
2 |     from bonobo.commands import entrypoint
3 |     import sys
4 | 
5 |     entrypoint(["examples"] + sys.argv[1:])
6 | 


--------------------------------------------------------------------------------
/bonobo/examples/files/services.py:
--------------------------------------------------------------------------------
1 | from bonobo import examples, open_fs
2 | 
3 | 
4 | def get_services():
5 |     return {**examples.get_services(), "fs.output": open_fs()}
6 | 


--------------------------------------------------------------------------------
/docs/extension/_beta.rst:
--------------------------------------------------------------------------------
1 | .. note::
2 | 
3 |     This extension is currently **BETA**.
4 | 
5 |     Things will change, and although we use it on some real-world software, it may, or may not, satisfy your needs.
6 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/static/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | theaters.json:
3 | 	curl 'https://data.toulouse-metropole.fr/explore/dataset/theatres-et-salles-de-spectacles/download?format=json&timezone=Europe/Berlin&use_labels_for_header=true' > $@
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/util.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Util <bonobo.util>`
 2 | =========================
 3 | 
 4 | .. currentmodule:: bonobo.util
 5 | 
 6 | :Module: :mod:`bonobo.util`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.util
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/nodes.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Nodes <bonobo.nodes>`
 2 | ===========================
 3 | 
 4 | .. currentmodule:: bonobo.nodes
 5 | 
 6 | :Module: :mod:`bonobo.nodes`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.nodes
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/.codacy.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exclude_paths:
 3 |   - benchmarks/**
 4 |   - bin/**
 5 |   - bonobo/contrib/jupyter/**.js
 6 |   - bonobo/examples/**
 7 |   - bonobo/ext/**
 8 |   - bonobo/util/testing.py
 9 |   - docs/**
10 |   - setup.py
11 |   - tests/**
12 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/config.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Config <bonobo.config>`
 2 | =============================
 3 | 
 4 | .. currentmodule:: bonobo.config
 5 | 
 6 | :Module: :mod:`bonobo.config`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.config
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/bonobo/examples/types/__main__.py:
--------------------------------------------------------------------------------
1 | import bonobo
2 | from bonobo.examples.types.strings import get_graph
3 | 
4 | if __name__ == "__main__":
5 |     parser = bonobo.get_argument_parser()
6 |     with bonobo.parse_args(parser):
7 |         bonobo.run(get_graph())
8 | 


--------------------------------------------------------------------------------
/docs/guide/plugins.rst:
--------------------------------------------------------------------------------
 1 | Plugins
 2 | =======
 3 | 
 4 | 
 5 | Graph level plugins
 6 | :::::::::::::::::::
 7 | 
 8 | 
 9 | Node level plugins
10 | ::::::::::::::::::
11 | 
12 | enhancers
13 | 
14 | 
15 | node
16 |     -
17 | 
18 | 
19 | .. include:: _next.rst
20 | 


--------------------------------------------------------------------------------
/bonobo/execution/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Execution logic, surrounding contexts for nodes and graphs and events.
 3 | 
 4 | This module is considered **internal**.
 5 | 
 6 | """
 7 | 
 8 | import logging
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | __all__ = []
13 | 


--------------------------------------------------------------------------------
/CREDITS.rst:
--------------------------------------------------------------------------------
 1 | Credits
 2 | =======
 3 | 
 4 | Logo
 5 | ::::
 6 | 
 7 | Created by Sarah GHIGLIANO and available on The Noun Project.
 8 | 
 9 | License: https://creativecommons.org/licenses/by/3.0/us/ 
10 | Source: https://thenounproject.com/Ghigliano/collection/animals/?i=320941
11 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/execution.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Execution <bonobo.execution>`
 2 | ===================================
 3 | 
 4 | .. currentmodule:: bonobo.execution
 5 | 
 6 | :Module: :mod:`bonobo.execution`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.execution
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/bonobo/util/pkgs.py:
--------------------------------------------------------------------------------
1 | import pkg_resources
2 | from packaging.utils import canonicalize_name
3 | 
4 | bonobo_packages = {}
5 | for p in pkg_resources.working_set:
6 |     name = canonicalize_name(p.project_name)
7 |     if name.startswith("bonobo"):
8 |         bonobo_packages[name] = p
9 | 


--------------------------------------------------------------------------------
/.landscape.yml:
--------------------------------------------------------------------------------
 1 | doc-warnings: true
 2 | test-warnings: true
 3 | strictness: veryhigh
 4 | max-line-length: 120
 5 | autodetect: true
 6 | python-targets:
 7 |   - 3
 8 | ignore-paths:
 9 |   - docs
10 |   - examples
11 |   - tests
12 | pep257:
13 |   disable:
14 |     - D205
15 |     - D210
16 | 


--------------------------------------------------------------------------------
/bonobo/commands/templates/bare.py-tpl:
--------------------------------------------------------------------------------
 1 | import bonobo
 2 | 
 3 | 
 4 | def create_graph():
 5 |     return (
 6 |         bonobo.Graph()
 7 |         >> ...
 8 |     )
 9 | 
10 | 
11 | if __name__ == '__main__':
12 |     with bonobo.parse_args() as options:
13 |         bonobo.run(create_graph())
14 | 


--------------------------------------------------------------------------------
/docs/extension/docker.rst:
--------------------------------------------------------------------------------
 1 | Working with Docker
 2 | ===================
 3 | 
 4 | .. include:: _beta.rst
 5 | 
 6 | Read the introduction: https://www.bonobo-project.org/with/docker
 7 | 
 8 | Source code
 9 | :::::::::::
10 | 
11 | https://github.com/python-bonobo/bonobo-docker
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/constants.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Constants <bonobo.constants>`
 2 | ===================================
 3 | 
 4 | .. currentmodule:: bonobo.constants
 5 | 
 6 | :Module: :mod:`bonobo.constants`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.constants
10 |    :no-members:
11 | 
12 | 
13 | 
14 |    


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/structs/graphs.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Graphs <bonobo.structs.graphs>`
 2 | =====================================
 3 | 
 4 | .. currentmodule:: bonobo.structs.graphs
 5 | 
 6 | :Module: :mod:`bonobo.structs.graphs`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.structs.graphs
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | insert_final_newline = true
 6 | charset = utf-8
 7 | 
 8 | [*.py]
 9 | indent = '    '
10 | indent_size = 4
11 | indent_style = space
12 | line_length = 120
13 | multi_line_output = 5
14 | 
15 | [Makefile]
16 | indent_style = tab
17 | 
18 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/__init__.py:
--------------------------------------------------------------------------------
1 | from bonobo.plugins.jupyter import JupyterOutputPlugin
2 | 
3 | 
4 | def _jupyter_nbextension_paths():
5 |     return [{"section": "notebook", "src": "static", "dest": "bonobo-jupyter", "require": "bonobo-jupyter/extension"}]
6 | 
7 | 
8 | __all__ = ["JupyterOutputPlugin"]
9 | 


--------------------------------------------------------------------------------
/bin/run_all_examples.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | __PATH__=$(cd $(dirname "$0")/..; pwd)
 4 | EXAMPLES=$(cd $__PATH__; find bonobo/examples -name \*.py -not -name _\*)
 5 | 
 6 | for example in $EXAMPLES; do
 7 |   echo "===== $example ====="
 8 |   (cd $__PATH__; time bonobo run $example > /dev/null);
 9 | done
10 | 


--------------------------------------------------------------------------------
/bonobo/structs/tokens.py:
--------------------------------------------------------------------------------
 1 | class Token:
 2 |     def __init__(self, name):
 3 |         self.__name__ = name
 4 | 
 5 |     def __repr__(self):
 6 |         return "<{}>".format(self.__name__)
 7 | 
 8 | 
 9 | class Flag(Token):
10 |     must_be_first = False
11 |     must_be_last = False
12 |     allows_data = True
13 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Bonobo
 2 | ======
 3 | 
 4 | .. toctree::
 5 |    :maxdepth: 2
 6 | 
 7 |    install
 8 |    tutorial/index
 9 |    guide/index
10 |    extension/index
11 |    reference/index
12 |    faq
13 |    contribute/index
14 | 
15 | 
16 | .. toctree::
17 |    :hidden:
18 | 
19 |    genindex
20 |    modindex
21 | 


--------------------------------------------------------------------------------
/docs/guide/index.rst:
--------------------------------------------------------------------------------
 1 | Guides
 2 | ======
 3 | 
 4 | This section will guide you through your journey with Bonobo ETL.
 5 | 
 6 | .. toctree::
 7 |     :maxdepth: 2
 8 | 
 9 |     introduction
10 |     transformations
11 |     graphs
12 |     services
13 |     environment
14 |     purity
15 |     debugging
16 |     plugins
17 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | python:
 4 |   - 3.5
 5 |   - 3.5-dev
 6 |   - 3.6
 7 |   - 3.6-dev
 8 |   - 3.7
 9 |   - 3.7-dev
10 |   # - 3.8-dev
11 |   # - nightly
12 | install:
13 |   - make install-dev
14 |   - pip install coveralls
15 | script:
16 |   - make clean test
17 | after_success:
18 |   - coveralls
19 | 


--------------------------------------------------------------------------------
/bonobo/contrib/django/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module contains all tools for Bonobo and Django to interract nicely.
 3 | 
 4 | * :class:`ETLCommand`
 5 | * :func:`create_or_update`
 6 | 
 7 | """
 8 | 
 9 | from .commands import ETLCommand
10 | from .utils import create_or_update
11 | 
12 | __all__ = ["ETLCommand", "create_or_update"]
13 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/execution/events.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Execution Events <bonobo.execution.events>`
 2 | =================================================
 3 | 
 4 | .. currentmodule:: bonobo.execution.events
 5 | 
 6 | :Module: :mod:`bonobo.execution.events`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.execution.events
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/docs/extension/_alpha.rst:
--------------------------------------------------------------------------------
 1 | .. warning::
 2 | 
 3 |     This extension is currently **ALPHA**.
 4 | 
 5 |     Things will change, break, not work as expected, and the documentation is lacking some serious work.
 6 | 
 7 |     This section is here to give a brief overview but is neither complete nor definitive.
 8 | 
 9 |     You've been warned.
10 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/execution/contexts.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Execution Contexts <bonobo.execution.contexts>`
 2 | =====================================================
 3 | 
 4 | .. currentmodule:: bonobo.execution.contexts
 5 | 
 6 | :Module: :mod:`bonobo.execution.contexts`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.execution.contexts
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
 1 | [style]
 2 | based_on_style = pep8
 3 | column_limit = 120
 4 | allow_multiline_lambdas = false
 5 | allow_multiline_dictionary_keys = false
 6 | coalesce_brackets = true
 7 | dedent_closing_brackets = true
 8 | join_multiple_lines = true
 9 | spaces_before_comment = 2
10 | split_before_first_argument = true
11 | split_complex_comprehension = true
12 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/README.rst:
--------------------------------------------------------------------------------
 1 | Bonobo within Jupyter
 2 | =====================
 3 | 
 4 | Install
 5 | -------
 6 | 
 7 | .. code-block:: shell-session
 8 | 
 9 |     yarn install
10 | 
11 | 
12 | Watch mode (for development)
13 | ----------------------------
14 | 
15 | .. code-block:: shell-session
16 | 
17 |     yarn run webpack --watch
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo/execution/strategies.rst:
--------------------------------------------------------------------------------
 1 | :mod:`Execution Strategies <bonobo.execution.strategies>`
 2 | =========================================================
 3 | 
 4 | .. currentmodule:: bonobo.execution.strategies
 5 | 
 6 | :Module: :mod:`bonobo.execution.strategies`
 7 | 
 8 | 
 9 | .. automodule:: bonobo.execution.strategies
10 | 
11 | 
12 | 
13 |    


--------------------------------------------------------------------------------
/tests/nodes/io/test_io_base.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bonobo.nodes.io.base import filesystem_path
 4 | 
 5 | 
 6 | def test_filesystem_path_absolute():
 7 |     with pytest.raises(ValueError):
 8 |         filesystem_path("/this/is/absolute")
 9 | 
10 | 
11 | def test_filesystem_path_relative():
12 |     assert filesystem_path("this/is/relative") == "this/is/relative"
13 | 


--------------------------------------------------------------------------------
/tests/util/test_statistics.py:
--------------------------------------------------------------------------------
 1 | from bonobo.util.statistics import WithStatistics
 2 | 
 3 | 
 4 | class MyThingWithStats(WithStatistics):
 5 |     def get_statistics(self, *args, **kwargs):
 6 |         return (("foo", 42), ("bar", 69))
 7 | 
 8 | 
 9 | def test_with_statistics():
10 |     o = MyThingWithStats()
11 |     assert o.get_statistics_as_string() == "foo=42 bar=69"
12 | 


--------------------------------------------------------------------------------
/docs/extension/index.rst:
--------------------------------------------------------------------------------
 1 | Extensions
 2 | ==========
 3 | 
 4 | Extensions contains all things needed to work with a few popular third party tools.
 5 | 
 6 | Most of them are available as optional extra dependencies, and the maturity stage of each may vary.
 7 | 
 8 | .. toctree::
 9 |     :maxdepth: 2
10 | 
11 |     django
12 |     docker
13 |     jupyter
14 |     selenium
15 |     sqlalchemy
16 | 


--------------------------------------------------------------------------------
/bonobo/execution/strategies/naive.py:
--------------------------------------------------------------------------------
 1 | from bonobo.execution.strategies.base import Strategy
 2 | 
 3 | 
 4 | class NaiveStrategy(Strategy):
 5 |     # TODO: how to run plugins in "naive" mode ?
 6 | 
 7 |     def execute(self, graph, **kwargs):
 8 |         with self.create_graph_execution_context(graph, **kwargs) as context:
 9 |             context.run_until_complete()
10 |         return context
11 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/donate.html:
--------------------------------------------------------------------------------
 1 | {% if theme_gratipay_user or theme_gittip_user %}
 2 | <h3>Donate</h3>
 3 | <p>
 4 | Consider supporting the authors on <a href="https://www.gratipay.com/">Gratipay</a>:
 5 | <script data-gratipay-username="{{ theme_gratipay_user or theme_gittip_user }}"
 6 |         data-gratipay-widget="button"
 7 |         src="//gttp.co/v1.js"></script>
 8 | </p>
 9 | {% endif %}
10 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/navigation.html:
--------------------------------------------------------------------------------
 1 | <h3>{{ _('Navigation') }}</h3>
 2 | {{ toctree(includehidden=theme_sidebar_includehidden, collapse=theme_sidebar_collapse) }}
 3 | {% if theme_extra_nav_links %}
 4 | <hr />
 5 | <ul>
 6 |     {% for text, uri in theme_extra_nav_links.items() %}
 7 |     <li class="toctree-l1"><a href="{{ uri }}">{{ text }}</a></li>
 8 |     {% endfor %}
 9 | </ul>
10 | {% endif %}
11 | 


--------------------------------------------------------------------------------
/bonobo/execution/contexts/plugin.py:
--------------------------------------------------------------------------------
 1 | from bonobo.execution.contexts.base import BaseContext
 2 | 
 3 | 
 4 | class PluginExecutionContext(BaseContext):
 5 |     @property
 6 |     def dispatcher(self):
 7 |         return self.parent.dispatcher
 8 | 
 9 |     def register(self):
10 |         return self.wrapped.register(self.dispatcher)
11 | 
12 |     def unregister(self):
13 |         return self.wrapped.unregister(self.dispatcher)
14 | 


--------------------------------------------------------------------------------
/classifiers.txt:
--------------------------------------------------------------------------------
 1 | Development Status :: 3 - Alpha
 2 | Intended Audience :: Developers
 3 | Intended Audience :: Information Technology
 4 | License :: OSI Approved :: Apache Software License
 5 | Programming Language :: Python
 6 | Programming Language :: Python :: 3
 7 | Programming Language :: Python :: 3.5
 8 | Programming Language :: Python :: 3.6
 9 | Programming Language :: Python :: 3.7
10 | Programming Language :: Python :: 3 :: Only
11 | 


--------------------------------------------------------------------------------
/tests/test_publicapi.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | 
 4 | def test_wildcard_import():
 5 |     bonobo = __import__("bonobo")
 6 |     assert bonobo.__version__
 7 | 
 8 |     for name in dir(bonobo):
 9 |         # ignore attributes starting by underscores
10 |         if name.startswith("_"):
11 |             continue
12 |         attr = getattr(bonobo, name)
13 |         if inspect.ismodule(attr):
14 |             continue
15 | 
16 |         assert name in bonobo.__all__
17 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/widget.py:
--------------------------------------------------------------------------------
 1 | import ipywidgets as widgets
 2 | from traitlets import List, Unicode
 3 | 
 4 | 
 5 | @widgets.register("bonobo-widget.bonobo")
 6 | class BonoboWidget(widgets.DOMWidget):
 7 |     _view_name = Unicode("BonoboView").tag(sync=True)
 8 |     _model_name = Unicode("BonoboModel").tag(sync=True)
 9 |     _view_module = Unicode("bonobo-jupyter").tag(sync=True)
10 |     _model_module = Unicode("bonobo-jupyter").tag(sync=True)
11 |     value = List().tag(sync=True)
12 | 


--------------------------------------------------------------------------------
/docs/_templates/sidebarlogo.html:
--------------------------------------------------------------------------------
 1 | <a href="{{ pathto(master_doc) }}" style="border: none">
 2 |     <h1 style="text-align: center; margin: 0;">
 3 |         <img class="logo" src="{{ pathto('_static/bonobo.png', 1) }}" title="Bonobo" style="width: 40px; height: 40px; vertical-align: bottom"/>
 4 |         <span class="brand">
 5 |             Bonobo
 6 |         </span>
 7 |     </h1>
 8 | </a>
 9 | 
10 | <p style="text-align: center" class="first">
11 |     Data processing for humans.
12 | </p>
13 | 


--------------------------------------------------------------------------------
/docs/guide/_next.rst:
--------------------------------------------------------------------------------
 1 | Where to jump next?
 2 | :::::::::::::::::::
 3 | 
 4 | We suggest that you go through the :doc:`tutorial </tutorial/index>` first.
 5 | 
 6 | Then, you can read the guides, either using the order suggested or by picking the chapter that interest you the most at
 7 | one given moment:
 8 | 
 9 | * :doc:`introduction`
10 | * :doc:`transformations`
11 | * :doc:`graphs`
12 | * :doc:`services`
13 | * :doc:`environment`
14 | * :doc:`purity`
15 | * :doc:`debugging`
16 | * :doc:`plugins`
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | -e .
 2 | appdirs==1.4.3
 3 | cached-property==1.5.1
 4 | certifi==2019.6.16
 5 | chardet==3.0.4
 6 | colorama==0.3.9
 7 | fs==2.4.8
 8 | graphviz==0.8.4
 9 | idna==2.8
10 | jinja2==2.10.1
11 | markupsafe==1.1.1
12 | mondrian==0.8.0
13 | packaging==19.0
14 | pbr==5.4.1
15 | psutil==5.6.3
16 | pyparsing==2.4.0
17 | python-slugify==1.2.6
18 | pytz==2019.1
19 | requests==2.22.0
20 | six==1.12.0
21 | stevedore==1.30.1
22 | typing==3.7.4
23 | unidecode==1.1.1
24 | urllib3==1.25.3
25 | whistle==1.0.1
26 | 


--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
 1 | Reference
 2 | =========
 3 | 
 4 | Reference documents of all stable APIs and modules. If something is not here, please be careful about using it as it
 5 | means that the api is not yet 1.0-proof.
 6 | 
 7 | .. toctree::
 8 |     :maxdepth: 2
 9 | 
10 |     api/bonobo
11 |     api/bonobo/config
12 |     api/bonobo/constants
13 |     api/bonobo/execution
14 |     api/bonobo/nodes
15 |     api/bonobo/structs/graphs
16 |     api/bonobo/util
17 |     commands
18 |     settings
19 |     examples
20 | 


--------------------------------------------------------------------------------
/tests/test_basicusage.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | import pytest
 4 | 
 5 | import bonobo
 6 | from bonobo.execution.contexts.graph import GraphExecutionContext
 7 | 
 8 | 
 9 | @pytest.mark.timeout(2)
10 | def test_run_graph_noop():
11 |     graph = bonobo.Graph(bonobo.noop)
12 |     assert len(graph) == 1
13 | 
14 |     with patch("bonobo._api._is_interactive_console", side_effect=lambda: False):
15 |         result = bonobo.run(graph)
16 | 
17 |     assert isinstance(result, GraphExecutionContext)
18 | 


--------------------------------------------------------------------------------
/tests/util/test_compat.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bonobo.util.compat import deprecated, deprecated_alias
 4 | 
 5 | 
 6 | def test_deprecated():
 7 |     @deprecated
 8 |     def foo():
 9 |         pass
10 | 
11 |     foo = deprecated(foo)
12 |     with pytest.warns(DeprecationWarning):
13 |         foo()
14 | 
15 | 
16 | def test_deprecated_alias():
17 |     def foo():
18 |         pass
19 | 
20 |     foo = deprecated_alias("bar", foo)
21 | 
22 |     with pytest.warns(DeprecationWarning):
23 |         foo()
24 | 


--------------------------------------------------------------------------------
/tests/commands/test_convert.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import pytest
 4 | 
 5 | from bonobo.util.environ import change_working_directory
 6 | from bonobo.util.testing import all_runners
 7 | 
 8 | 
 9 | @all_runners
10 | def test_convert(runner, tmpdir):
11 |     csv_content = "id;name\n1;Romain"
12 |     tmpdir.join("in.csv").write(csv_content)
13 | 
14 |     with change_working_directory(tmpdir):
15 |         runner("convert", "in.csv", "out.csv")
16 | 
17 |     assert tmpdir.join("out.csv").read().strip() == csv_content
18 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/src/embed.js:
--------------------------------------------------------------------------------
 1 | // Entry point for the unpkg bundle containing custom model definitions.
 2 | //
 3 | // It differs from the notebook bundle in that it does not need to define a
 4 | // dynamic baseURL for the static assets and may load some css that would
 5 | // already be loaded by the notebook otherwise.
 6 | 
 7 | // Export widget models and views, and the npm package version number.
 8 | module.exports = require('./bonobo.js');
 9 | module.exports['version'] = require('../package.json').version;
10 | 


--------------------------------------------------------------------------------
/tests/features/test_not_modified.py:
--------------------------------------------------------------------------------
 1 | from bonobo.constants import NOT_MODIFIED
 2 | from bonobo.util.testing import BufferingNodeExecutionContext
 3 | 
 4 | 
 5 | def useless(*args, **kwargs):
 6 |     return NOT_MODIFIED
 7 | 
 8 | 
 9 | def test_not_modified():
10 |     input_messages = [("foo", "bar"), ("foo", "baz")]
11 | 
12 |     with BufferingNodeExecutionContext(useless) as context:
13 |         context.write_sync(*input_messages)
14 | 
15 |     result = context.get_buffer()
16 |     print(result)
17 |     assert result == input_messages
18 | 


--------------------------------------------------------------------------------
/bonobo/nodes/io/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Readers and writers for common file formats. """
 2 | 
 3 | from .csv import CsvReader, CsvWriter
 4 | from .file import FileReader, FileWriter
 5 | from .json import JsonReader, JsonWriter, LdjsonReader, LdjsonWriter
 6 | from .pickle import PickleReader, PickleWriter
 7 | 
 8 | __all__ = [
 9 |     "CsvReader",
10 |     "CsvWriter",
11 |     "FileReader",
12 |     "FileWriter",
13 |     "JsonReader",
14 |     "JsonWriter",
15 |     "LdjsonReader",
16 |     "LdjsonWriter",
17 |     "PickleReader",
18 |     "PickleWriter",
19 | ]
20 | 


--------------------------------------------------------------------------------
/bonobo/examples/empty.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import time
 3 | 
 4 | import bonobo
 5 | 
 6 | 
 7 | def extract():
 8 |     """Placeholder, change, rename, remove... """
 9 |     for x in range(60):
10 |         if x:
11 |             time.sleep(1)
12 |         yield datetime.datetime.now()
13 | 
14 | 
15 | def get_graph():
16 |     graph = bonobo.Graph()
17 |     graph.add_chain()
18 | 
19 |     return graph
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     parser = bonobo.get_argument_parser()
24 |     with bonobo.parse_args(parser):
25 |         bonobo.run(get_graph())
26 | 


--------------------------------------------------------------------------------
/requirements-sqlalchemy.txt:
--------------------------------------------------------------------------------
 1 | -e .[sqlalchemy]
 2 | -r requirements.txt
 3 | appdirs==1.4.3
 4 | bonobo-sqlalchemy==0.6.0
 5 | certifi==2019.6.16
 6 | chardet==3.0.4
 7 | colorama==0.3.9
 8 | fs==2.4.8
 9 | graphviz==0.8.4
10 | idna==2.8
11 | jinja2==2.10.1
12 | markupsafe==1.1.1
13 | mondrian==0.8.0
14 | packaging==19.0
15 | pbr==5.4.1
16 | psutil==5.6.3
17 | pyparsing==2.4.0
18 | python-slugify==1.2.6
19 | pytz==2019.1
20 | requests==2.22.0
21 | six==1.12.0
22 | sqlalchemy==1.3.5
23 | stevedore==1.30.1
24 | typing==3.7.4
25 | unidecode==1.1.1
26 | urllib3==1.25.3
27 | whistle==1.0.1
28 | 


--------------------------------------------------------------------------------
/tests/test_registry.py:
--------------------------------------------------------------------------------
 1 | from bonobo import create_reader, create_writer
 2 | from bonobo.nodes import CsvReader, CsvWriter, JsonReader, JsonWriter
 3 | 
 4 | 
 5 | def test_create_reader():
 6 |     t = create_reader("foo.csv")
 7 |     assert isinstance(t, CsvReader)
 8 | 
 9 |     t = create_reader("foo.txt", format="json")
10 |     assert isinstance(t, JsonReader)
11 | 
12 | 
13 | def test_create_writer():
14 |     t = create_writer("foo.csv")
15 |     assert isinstance(t, CsvWriter)
16 | 
17 |     t = create_writer("foo.txt", format="json")
18 |     assert isinstance(t, JsonWriter)
19 | 


--------------------------------------------------------------------------------
/bonobo/examples/clock.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import time
 3 | 
 4 | import bonobo
 5 | 
 6 | 
 7 | def extract():
 8 |     """Placeholder, change, rename, remove... """
 9 |     for x in range(60):
10 |         if x:
11 |             time.sleep(1)
12 |         yield datetime.datetime.now()
13 | 
14 | 
15 | def get_graph():
16 |     graph = bonobo.Graph()
17 |     graph.add_chain(extract, print)
18 | 
19 |     return graph
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     parser = bonobo.get_argument_parser()
24 |     with bonobo.parse_args(parser):
25 |         bonobo.run(get_graph())
26 | 


--------------------------------------------------------------------------------
/bonobo/examples/files/csv_handlers.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import bonobo
 4 | from bonobo import examples
 5 | from bonobo.examples.files.services import get_services
 6 | 
 7 | 
 8 | def get_graph(*, _limit=None, _print=False):
 9 |     return bonobo.Graph(
10 |         bonobo.CsvReader("coffeeshops.csv"),
11 |         *((bonobo.Limit(_limit),) if _limit else ()),
12 |         *((bonobo.PrettyPrinter(),) if _print else ()),
13 |         bonobo.CsvWriter("coffeeshops.csv", fs="fs.output")
14 |     )
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     sys.exit(examples.run(get_graph, get_services))
19 | 


--------------------------------------------------------------------------------
/bonobo/commands/inspect.py:
--------------------------------------------------------------------------------
 1 | import bonobo
 2 | from bonobo.commands import BaseGraphCommand
 3 | 
 4 | 
 5 | class InspectCommand(BaseGraphCommand):
 6 |     handler = staticmethod(bonobo.inspect)
 7 | 
 8 |     def add_arguments(self, parser):
 9 |         super(InspectCommand, self).add_arguments(parser)
10 |         parser.add_argument("--graph", "-g", dest="format", action="store_const", const="graph")
11 | 
12 |     def parse_options(self, **options):
13 |         if not options.get("format"):
14 |             raise RuntimeError("You must provide a format (try --graph).")
15 |         return options
16 | 


--------------------------------------------------------------------------------
/bonobo/execution/contexts/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Execution Contexts are objects that wraps the stateless data-structures (graphs and nodes) during a job execution to
 3 | keep an eye on their context/state (from the simplest things like i/o statistics to lifecycle and custom userland
 4 | state).
 5 | 
 6 | """
 7 | 
 8 | from bonobo.execution.contexts.graph import GraphExecutionContext
 9 | from bonobo.execution.contexts.node import NodeExecutionContext
10 | from bonobo.execution.contexts.plugin import PluginExecutionContext
11 | 
12 | __all__ = ["GraphExecutionContext", "NodeExecutionContext", "PluginExecutionContext"]
13 | 


--------------------------------------------------------------------------------
/bonobo/nodes/aggregation.py:
--------------------------------------------------------------------------------
 1 | from bonobo.config import Configurable, ContextProcessor, Method, Option, use_raw_input
 2 | from bonobo.util import ValueHolder
 3 | 
 4 | 
 5 | class Reduce(Configurable):
 6 |     function = Method()
 7 |     initializer = Option(required=False)
 8 | 
 9 |     @ContextProcessor
10 |     def buffer(self, context):
11 |         values = yield ValueHolder(self.initializer() if callable(self.initializer) else self.initializer)
12 |         context.send(values.get())
13 | 
14 |     @use_raw_input
15 |     def __call__(self, values, bag):
16 |         values.set(self.function(values.get(), bag))
17 | 


--------------------------------------------------------------------------------
/tests/commands/test_version.py:
--------------------------------------------------------------------------------
 1 | from bonobo import __version__
 2 | from bonobo.util.testing import all_runners
 3 | 
 4 | 
 5 | @all_runners
 6 | def test_version(runner):
 7 |     out, err = runner("version")
 8 |     out = out.strip()
 9 |     assert out.startswith("bonobo ")
10 |     assert __version__ in out
11 | 
12 |     out, err = runner("version", "-q")
13 |     out = out.strip()
14 |     assert out.startswith("bonobo ")
15 |     assert __version__ in out
16 | 
17 |     out, err = runner("version", "-qq")
18 |     out = out.strip()
19 |     assert not out.startswith("bonobo ")
20 |     assert __version__ in out
21 | 


--------------------------------------------------------------------------------
/tests/commands/test_clibasics.py:
--------------------------------------------------------------------------------
 1 | import pkg_resources
 2 | 
 3 | from bonobo.util.testing import all_runners
 4 | 
 5 | 
 6 | def test_entrypoint():
 7 |     commands = {}
 8 | 
 9 |     for command in pkg_resources.iter_entry_points("bonobo.commands"):
10 |         commands[command.name] = command
11 | 
12 |     assert not {"convert", "init", "inspect", "run", "version"}.difference(set(commands))
13 | 
14 | 
15 | @all_runners
16 | def test_no_command(runner):
17 |     _, err, exc = runner(catch_errors=True)
18 |     assert type(exc) == SystemExit
19 |     assert "error: the following arguments are required: command" in err
20 | 


--------------------------------------------------------------------------------
/bonobo/nodes/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`bonobo.nodes` module contains all builtin transformations that you can use out of the box in your ETL jobs.
 3 | 
 4 | Please note that all objects from this package are also available directly through the root :mod:`bonobo` package.
 5 | 
 6 | """
 7 | 
 8 | from bonobo.nodes.basics import *
 9 | from bonobo.nodes.basics import __all__ as _all_basics
10 | from bonobo.nodes.filter import Filter
11 | from bonobo.nodes.io import *
12 | from bonobo.nodes.io import __all__ as _all_io
13 | from bonobo.nodes.throttle import RateLimited
14 | 
15 | __all__ = _all_basics + _all_io + ["Filter", "RateLimited"]
16 | 


--------------------------------------------------------------------------------
/requirements-docker.txt:
--------------------------------------------------------------------------------
 1 | -e .[docker]
 2 | -r requirements.txt
 3 | appdirs==1.4.3
 4 | bonobo-docker==0.6.0
 5 | certifi==2019.6.16
 6 | chardet==3.0.4
 7 | colorama==0.3.9
 8 | docker-pycreds==0.4.0
 9 | docker==2.7.0
10 | fs==2.4.8
11 | graphviz==0.8.4
12 | idna==2.8
13 | jinja2==2.10.1
14 | markupsafe==1.1.1
15 | mondrian==0.8.0
16 | packaging==19.0
17 | pbr==5.4.1
18 | psutil==5.6.3
19 | pyparsing==2.4.0
20 | python-slugify==1.2.6
21 | pytz==2019.1
22 | requests==2.22.0
23 | semantic-version==2.6.0
24 | six==1.12.0
25 | stevedore==1.30.1
26 | typing==3.7.4
27 | unidecode==1.1.1
28 | urllib3==1.25.3
29 | websocket-client==0.56.0
30 | whistle==1.0.1
31 | 


--------------------------------------------------------------------------------
/tests/execution/test_events.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import Mock
 2 | 
 3 | from bonobo.execution import events
 4 | 
 5 | 
 6 | def test_names():
 7 |     # This test looks useless, but as it's becoming the pliugin API, I want to make sure that nothing changes here, or
 8 |     # notice it otherwise.
 9 |     for name in "start", "started", "tick", "stop", "stopped", "kill":
10 |         event_name = getattr(events, name.upper())
11 |         assert event_name == ".".join(("execution", name))
12 | 
13 | 
14 | def test_event_object():
15 |     # Same logic as above.
16 |     c = Mock()
17 |     e = events.ExecutionEvent(c)
18 |     assert e.context is c
19 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/src/index.js:
--------------------------------------------------------------------------------
 1 | // Entry point for the notebook bundle containing custom model definitions.
 2 | //
 3 | // Setup notebook base URL
 4 | //
 5 | // Some static assets may be required by the custom widget javascript. The base
 6 | // url for the notebook is not known at build time and is therefore computed
 7 | // dynamically.
 8 | __webpack_public_path__ = document.querySelector('body').getAttribute('data-base-url') + 'nbextensions/bonobo/';
 9 | 
10 | // Export widget models and views, and the npm package version number.
11 | module.exports = require('./bonobo.js');
12 | module.exports['version'] = require('../package.json').version;
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *$py.class
 2 | *,cover
 3 | *.egg
 4 | *.egg-info/
 5 | *.iml
 6 | *.log
 7 | *.manifest
 8 | *.mo
 9 | *.pot
10 | *.py[cod]
11 | *.so
12 | *.spec
13 | .*.sw?
14 | .DS_Store
15 | .Python
16 | .cache
17 | .coverage
18 | .coverage.*
19 | .eggs/
20 | .env
21 | .hypothesis/
22 | .installed.cfg
23 | .ipynb_checkpoints
24 | .python-version
25 | /.idea
26 | /.medikit
27 | /.pytest_cache
28 | /.release
29 | /bonobo/contrib/jupyter/js/node_modules/
30 | /bonobo/examples/work_in_progress/
31 | /build/
32 | /coverage.xml
33 | /dist/
34 | /docs/_build/
35 | /eggs/
36 | /examples/private
37 | /sdist/
38 | /tags
39 | pip-delete-this-directory.txt
40 | pip-log.txt
41 | 


--------------------------------------------------------------------------------
/bonobo/commands/examples.py:
--------------------------------------------------------------------------------
 1 | from bonobo.commands import BaseCommand
 2 | 
 3 | all_examples = (
 4 |     "clock",
 5 |     "datasets",
 6 |     "environ",
 7 |     "files.csv_handlers",
 8 |     "files.json_handlers",
 9 |     "files.pickle_handlers",
10 |     "files.text_handlers",
11 |     "types",
12 | )
13 | 
14 | 
15 | class ExamplesCommand(BaseCommand):
16 |     def handle(self):
17 |         print("You can run the following examples:")
18 |         print()
19 |         for example in all_examples:
20 |             print("  $ python -m bonobo.examples.{}".format(example))
21 |         print()
22 | 
23 |     def add_arguments(self, parser):
24 |         pass
25 | 


--------------------------------------------------------------------------------
/wercker.yml:
--------------------------------------------------------------------------------
 1 | box: python:3.5
 2 | build:
 3 |   steps:
 4 |     - script:
 5 |       name: install virtualenv
 6 |       code: |
 7 |         pip install virtualenv
 8 | 
 9 |     - virtualenv:
10 |       name: setup virtual environment
11 |       install_wheel: true
12 |       python_location: /usr/local/bin/python3.5
13 | 
14 |     - pip-install
15 | 
16 |     - script:
17 |       name: echo python information
18 |       code: |
19 |         echo "python version $(python --version) running"
20 |         echo "pip version $(pip --version) running"
21 | 
22 | deploy:
23 |   steps:
24 |     - script:
25 |       name: deploy
26 |       code: python -m bonobo
27 | 
28 | 


--------------------------------------------------------------------------------
/docs/tutorial/_wip_note.rst:
--------------------------------------------------------------------------------
 1 | .. warning::
 2 | 
 3 |     This section is being rewritten for |bonobo| 0.6, and it's now in a "work in progress" state.
 4 | 
 5 |     You can read :doc:`the tutorial for the previous version (0.5) <0.5/index>`. Please note that things changed a bit
 6 |     since then and you'll have quirks here and there.
 7 | 
 8 |     You can also read the `migration guide from 0.5 to 0.6 <https://news.bonobo-project.org/migration-guide-for-bonobo-0-6-alpha-c1d36b0a9d35>`_
 9 |     that will give you a good overview of the changes.
10 | 
11 |     Hopefully, this document will be updated soon, and please accept our apologies about this doc status until then.
12 | 
13 | 


--------------------------------------------------------------------------------
/bonobo/examples/environ.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This transformation extracts the environment and prints it, sorted alphabetically, one item per line.
 3 | 
 4 | Used in the bonobo tests around environment management.
 5 | 
 6 | """
 7 | import os
 8 | 
 9 | import bonobo
10 | 
11 | 
12 | def extract_environ():
13 |     """Yield all the system environment."""
14 |     yield from sorted(os.environ.items())
15 | 
16 | 
17 | def get_graph():
18 |     graph = bonobo.Graph()
19 |     graph.add_chain(extract_environ, print)
20 | 
21 |     return graph
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     parser = bonobo.get_argument_parser()
26 |     with bonobo.parse_args(parser):
27 |         bonobo.run(get_graph())
28 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = Bonobo
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/tests/util/test_resolvers.py:
--------------------------------------------------------------------------------
 1 | import bonobo
 2 | from bonobo.util.resolvers import _parse_option, _resolve_options, _resolve_transformations
 3 | 
 4 | 
 5 | def test_parse_option():
 6 |     assert _parse_option("foo=bar") == ("foo", "bar")
 7 |     assert _parse_option('foo="bar"') == ("foo", "bar")
 8 |     assert _parse_option('sep=";"') == ("sep", ";")
 9 |     assert _parse_option("foo") == ("foo", True)
10 | 
11 | 
12 | def test_resolve_options():
13 |     assert _resolve_options(("foo=bar", 'bar="baz"')) == {"foo": "bar", "bar": "baz"}
14 |     assert _resolve_options() == {}
15 | 
16 | 
17 | def test_resolve_transformations():
18 |     assert _resolve_transformations(("PrettyPrinter",)) == (bonobo.PrettyPrinter,)
19 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "bonobo-jupyter",
 3 |   "version": "0.0.1",
 4 |   "description": "Jupyter integration for Bonobo",
 5 |   "author": "",
 6 |   "main": "src/index.js",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": ""
10 |   },
11 |   "keywords": [
12 |     "jupyter",
13 |     "widgets",
14 |     "ipython",
15 |     "ipywidgets"
16 |   ],
17 |   "scripts": {
18 |     "prepublish": "webpack",
19 |     "test": "echo \"Error: no test specified\" && exit 1"
20 |   },
21 |   "devDependencies": {
22 |     "json-loader": "^0.5.4",
23 |     "webpack": "^1.12.14"
24 |   },
25 |   "dependencies": {
26 |     "jupyter-js-widgets": "^2.0.9",
27 |     "underscore": "^1.8.3"
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/bonobo/examples/files/text_handlers.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import bonobo
 4 | from bonobo import examples
 5 | from bonobo.examples.files.services import get_services
 6 | 
 7 | 
 8 | def skip_comments(line):
 9 |     line = line.strip()
10 |     if not line.startswith("#"):
11 |         yield line
12 | 
13 | 
14 | def get_graph(*, _limit=(), _print=()):
15 |     return bonobo.Graph(
16 |         bonobo.FileReader("passwd.txt", fs="fs.static"),
17 |         skip_comments,
18 |         *_limit,
19 |         lambda s: s.split(":")[0],
20 |         *_print,
21 |         bonobo.FileWriter("usernames.txt", fs="fs.output"),
22 |     )
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     sys.exit(examples.run(get_graph, get_services))
27 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/src/extension.js:
--------------------------------------------------------------------------------
 1 | // This file contains the javascript that is run when the notebook is loaded.
 2 | // It contains some requirejs configuration and the `load_ipython_extension`
 3 | // which is required for any notebook extension.
 4 | 
 5 | // Configure requirejs
 6 | if (window.require) {
 7 |     window.require.config({
 8 |         map: {
 9 |             "*" : {
10 |                 "bonobo-jupyter": "nbextensions/bonobo-jupyter/index",
11 |                 "jupyter-js-widgets": "nbextensions/jupyter-js-widgets/extension"
12 |             }
13 |         }
14 |     });
15 | }
16 | 
17 | // Export the required load_ipython_extention
18 | module.exports = {
19 |     load_ipython_extension: function() {}
20 | };
21 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = True
 3 | omit =
 4 |     bonobo/examples/**
 5 |     bonobo/ext/**
 6 | 
 7 | [report]
 8 | # Regexes for lines to exclude from consideration
 9 | exclude_lines =
10 |     # Have to re-enable the standard pragma
11 |     pragma: no cover
12 | 
13 |     # Don't complain about missing debug-only code:
14 |     def __repr__
15 |     if self\.debug
16 | 
17 |     # Don't complain if tests don't hit defensive assertion code:
18 |     raise AbstractError
19 |     raise AssertionError
20 |     raise NotImplementedError
21 | 
22 |     # Don't complain if non-runnable code isn't run:
23 |     if 0:
24 |     if __name__ == .__main__.:
25 | 
26 | ignore_errors = True
27 | 
28 | [html]
29 | directory = docs/_build/html/coverage
30 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/relations.html:
--------------------------------------------------------------------------------
 1 | <div class="relations">
 2 | <h3>Related Topics</h3>
 3 | <ul>
 4 |   <li><a href="{{ pathto(master_doc) }}">Documentation overview</a><ul>
 5 |   {%- for parent in parents %}
 6 |   <li><a href="{{ parent.link|e }}">{{ parent.title }}</a><ul>
 7 |   {%- endfor %}
 8 |     {%- if prev %}
 9 |       <li>Previous: <a href="{{ prev.link|e }}" title="{{ _('previous chapter')
10 |         }}">{{ prev.title }}</a></li>
11 |     {%- endif %}
12 |     {%- if next %}
13 |       <li>Next: <a href="{{ next.link|e }}" title="{{ _('next chapter')
14 |         }}">{{ next.title }}</a></li>
15 |     {%- endif %}
16 |   {%- for parent in parents %}
17 |   </ul></li>
18 |   {%- endfor %}
19 |   </ul></li>
20 | </ul>
21 | </div>
22 | 


--------------------------------------------------------------------------------
/bonobo/contrib/django/utils.py:
--------------------------------------------------------------------------------
 1 | def create_or_update(model, *, defaults=None, save=True, **kwargs):
 2 |     """
 3 |     Create or update a django model instance.
 4 | 
 5 |     :param model:
 6 |     :param defaults:
 7 |     :param kwargs:
 8 |     :return: object, created, updated
 9 | 
10 |     """
11 |     obj, created = model._default_manager.get_or_create(defaults=defaults, **kwargs)
12 | 
13 |     updated = False
14 |     if not created:
15 |         if defaults:
16 |             for k, v in defaults.items():
17 |                 if getattr(obj, k) != v:
18 |                     setattr(obj, k, v)
19 |                     updated = True
20 | 
21 |         if updated and save:
22 |             obj.save()
23 | 
24 |     return obj, created, updated
25 | 


--------------------------------------------------------------------------------
/bonobo/nodes/filter.py:
--------------------------------------------------------------------------------
 1 | from bonobo.config import Configurable, Method
 2 | from bonobo.constants import NOT_MODIFIED
 3 | 
 4 | 
 5 | class Filter(Configurable):
 6 |     """Filter out hashes from the stream depending on the :attr:`filter` callable return value, when called with the
 7 |     current hash as parameter.
 8 | 
 9 |     Can be used as a decorator on a filter callable.
10 | 
11 |     .. attribute:: filter
12 | 
13 |         A callable used to filter lines.
14 | 
15 |         If the callable returns a true-ish value, the input will be passed unmodified to the next items.
16 | 
17 |         Otherwise, it'll be burnt.
18 | 
19 |     """
20 | 
21 |     filter = Method()
22 | 
23 |     def __call__(self, *args, **kwargs):
24 |         if self.filter(*args, **kwargs):
25 |             return NOT_MODIFIED
26 | 


--------------------------------------------------------------------------------
/tests/ext/test_ods.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import patch
 2 | 
 3 | from bonobo.contrib.opendatasoft import OpenDataSoftAPI
 4 | from bonobo.util.objects import ValueHolder
 5 | 
 6 | 
 7 | class ResponseMock:
 8 |     def __init__(self, json_value):
 9 |         self.json_value = json_value
10 |         self.count = 0
11 | 
12 |     def json(self):
13 |         if self.count:
14 |             return {}
15 |         else:
16 |             self.count += 1
17 |             return {"records": self.json_value}
18 | 
19 | 
20 | def test_read_from_opendatasoft_api():
21 |     extract = OpenDataSoftAPI(dataset="test-a-set")
22 |     with patch("requests.get", return_value=ResponseMock([{"fields": {"foo": "bar"}}, {"fields": {"foo": "zab"}}])):
23 |         for line in extract("http://example.com/", ValueHolder(0)):
24 |             assert "foo" in line
25 | 


--------------------------------------------------------------------------------
/tests/features/test_inherit.py:
--------------------------------------------------------------------------------
 1 | from bonobo.util.envelopes import AppendingEnvelope
 2 | from bonobo.util.testing import BufferingNodeExecutionContext
 3 | 
 4 | messages = [("Hello",), ("Goodbye",)]
 5 | 
 6 | 
 7 | def append(*args):
 8 |     return AppendingEnvelope("!")
 9 | 
10 | 
11 | def test_inherit():
12 |     with BufferingNodeExecutionContext(append) as context:
13 |         context.write_sync(*messages)
14 | 
15 |     assert context.get_buffer() == list(map(lambda x: x + ("!",), messages))
16 | 
17 | 
18 | def test_inherit_bag_tuple():
19 |     with BufferingNodeExecutionContext(append) as context:
20 |         context.set_input_fields(["message"])
21 |         context.write_sync(*messages)
22 | 
23 |     assert context.get_output_fields() == ("message", "0")
24 |     assert context.get_buffer() == list(map(lambda x: x + ("!",), messages))
25 | 


--------------------------------------------------------------------------------
/bonobo/examples/files/json_handlers.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import bonobo
 4 | from bonobo import examples
 5 | from bonobo.examples.files.services import get_services
 6 | 
 7 | 
 8 | def get_graph(*, _limit=None, _print=False):
 9 |     graph = bonobo.Graph()
10 | 
11 |     trunk = graph.add_chain(
12 |         bonobo.JsonReader("theaters.json", fs="fs.static"), *((bonobo.Limit(_limit),) if _limit else ())
13 |     )
14 | 
15 |     if _print:
16 |         graph.add_chain(bonobo.PrettyPrinter(), _input=trunk.output)
17 | 
18 |     graph.add_chain(bonobo.JsonWriter("theaters.output.json", fs="fs.output"), _input=trunk.output)
19 |     graph.add_chain(bonobo.LdjsonWriter("theaters.output.ldjson", fs="fs.output"), _input=trunk.output)
20 | 
21 |     return graph
22 | 
23 | 
24 | if __name__ == "__main__":
25 |     sys.exit(examples.run(get_graph, get_services))
26 | 


--------------------------------------------------------------------------------
/bonobo/config/functools.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import itertools
 3 | 
 4 | from bonobo.config.services import use
 5 | from bonobo.util import get_name
 6 | 
 7 | 
 8 | def transformation_factory(f):
 9 |     @functools.wraps(f)
10 |     def _transformation_factory(*args, **kwargs):
11 |         retval = f(*args, **kwargs)
12 |         retval.__name__ = f.__name__ + "({})".format(
13 |             ", ".join(itertools.chain(map(repr, args), ("{}={!r}".format(k, v) for k, v in kwargs.items())))
14 |         )
15 |         return retval
16 | 
17 |     _transformation_factory._partial = True
18 | 
19 |     return _transformation_factory
20 | 
21 | 
22 | class partial(functools.partial):
23 |     @property
24 |     def __name__(self):
25 |         return get_name(self.func)
26 | 
27 |     def using(self, *service_names):
28 |         return use(*service_names)(self)
29 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from alabaster import _version as version
 4 | 
 5 | 
 6 | def get_path():
 7 |     """
 8 |     Shortcut for users whose theme is next to their conf.py.
 9 |     """
10 |     # Theme directory is defined as our parent directory
11 |     return os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
12 | 
13 | 
14 | def update_context(app, pagename, templatename, context, doctree):
15 |     context["alabaster_version"] = version.__version__
16 | 
17 | 
18 | def setup(app):
19 |     # add_html_theme is new in Sphinx 1.6+
20 |     if hasattr(app, "add_html_theme"):
21 |         theme_path = os.path.abspath(os.path.dirname(__file__))
22 |         app.add_html_theme("alabaster", theme_path)
23 |     app.connect("html-page-context", update_context)
24 |     return {"version": version.__version__, "parallel_read_safe": True}
25 | 


--------------------------------------------------------------------------------
/bonobo/execution/strategies/base.py:
--------------------------------------------------------------------------------
 1 | from bonobo.execution.contexts.graph import GraphExecutionContext
 2 | 
 3 | 
 4 | class Strategy:
 5 |     """
 6 |     Base class for execution strategies.
 7 | 
 8 |     """
 9 | 
10 |     GraphExecutionContextType = GraphExecutionContext
11 | 
12 |     def __init__(self, GraphExecutionContextType=None):
13 |         self.GraphExecutionContextType = GraphExecutionContextType or self.GraphExecutionContextType
14 | 
15 |     def create_graph_execution_context(self, graph, *args, GraphExecutionContextType=None, **kwargs):
16 |         if not len(graph):
17 |             raise ValueError("You provided an empty graph, which does not really make sense. Please add some nodes.")
18 |         return (GraphExecutionContextType or self.GraphExecutionContextType)(graph, *args, **kwargs)
19 | 
20 |     def execute(self, graph, *args, **kwargs):
21 |         raise NotImplementedError
22 | 


--------------------------------------------------------------------------------
/bonobo/execution/events.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. data:: START
 3 | 
 4 |     Event dispatched before execution starts.
 5 | 
 6 | .. data:: STARTED
 7 | 
 8 |     Event dispatched after execution starts.
 9 | 
10 | .. data:: TICK
11 | 
12 |     Event dispatched while execution runs, on a regular basis (on each "tick").
13 | 
14 | .. data:: STOP
15 | 
16 |     Event dispatched before execution stops.
17 | 
18 | .. data:: STOPPED
19 | 
20 |     Event dispatched after execution stops.
21 | 
22 | .. data:: KILL
23 | 
24 |     Event dispatched when execution is killed.
25 | 
26 | """
27 | 
28 | from whistle import Event
29 | 
30 | START = "execution.start"
31 | STARTED = "execution.started"
32 | TICK = "execution.tick"
33 | STOP = "execution.stop"
34 | STOPPED = "execution.stopped"
35 | KILL = "execution.kill"
36 | 
37 | 
38 | class ExecutionEvent(Event):
39 |     def __init__(self, context):
40 |         self.context = context
41 | 


--------------------------------------------------------------------------------
/docs/_templates/sidebarinfos.html:
--------------------------------------------------------------------------------
 1 | <h3>Stay Informed</h3>
 2 | 
 3 | <p><a href="http://eepurl.com/csHFKL" target="_blank">Join announcements list</a>.</p>
 4 | 
 5 | <p><a href="https://twitter.com/bonobo_etl" class="twitter-follow-button" data-show-count="false">Follow @bonobo_etl</a> <script>!function(d,s,id){var js,fjs=d.getElementsByTagName(s)[0],p=/^http:/.test(d.location)?'http':'https';if(!d.getElementById(id)){js=d.createElement(s);js.id=id;js.src=p+'://platform.twitter.com/widgets.js';fjs.parentNode.insertBefore(js,fjs);}}(document, 'script', 'twitter-wjs');</script></p>
 6 | 
 7 | <p>
 8 |     <iframe src="http://ghbtns.com/github-btn.html?user=python-bonobo&repo=bonobo&type=watch&count=true&size=small"
 9 |             allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
10 |     <script async defer src="https://bonobo-slack.herokuapp.com/slackin.js"></script>
11 | </p>
12 | 


--------------------------------------------------------------------------------
/bonobo/examples/types/strings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example on how to use symple python strings to communicate between transformations.
 3 | 
 4 | .. graphviz::
 5 | 
 6 |     digraph {
 7 |         rankdir = LR;
 8 |         stylesheet = "../_static/graphs.css";
 9 | 
10 |         BEGIN [shape="point"];
11 |         BEGIN -> "extract()" -> "transform(s: str)" -> "load(s: str)";
12 |     }
13 | 
14 | """
15 | from random import randint
16 | 
17 | import bonobo
18 | 
19 | 
20 | def extract():
21 |     yield "foo"
22 |     yield "bar"
23 |     yield "baz"
24 | 
25 | 
26 | def transform(s):
27 |     return "{} ({})".format(s.title(), randint(10, 99))
28 | 
29 | 
30 | def load(s):
31 |     print(s)
32 | 
33 | 
34 | def get_graph():
35 |     return bonobo.Graph(extract, transform, load)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     parser = bonobo.get_argument_parser()
40 |     with bonobo.parse_args(parser):
41 |         bonobo.run(get_graph())
42 | 


--------------------------------------------------------------------------------
/tests/commands/test_init.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from bonobo.commands.init import InitCommand
 6 | from bonobo.util.testing import all_runners
 7 | 
 8 | 
 9 | @all_runners
10 | def test_init_file(runner, tmpdir):
11 |     target = tmpdir.join("foo.py")
12 |     target_filename = str(target)
13 |     runner("init", target_filename)
14 |     assert os.path.exists(target_filename)
15 | 
16 |     out, err = runner("run", target_filename)
17 |     assert out.replace("\n", " ").strip() == "Hello World"
18 |     assert not err
19 | 
20 | 
21 | @all_runners
22 | @pytest.mark.parametrize("template", InitCommand.TEMPLATES)
23 | def test_init_file_templates(runner, template, tmpdir):
24 |     target = tmpdir.join("foo.py")
25 |     target_filename = str(target)
26 |     runner("init", target_filename)
27 |     assert os.path.exists(target_filename)
28 |     out, err = runner("run", target_filename)
29 |     assert not err
30 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | -e .[dev]
 2 | -r requirements.txt
 3 | alabaster==0.7.12
 4 | arrow==0.14.2
 5 | atomicwrites==1.3.0
 6 | attrs==19.1.0
 7 | babel==2.7.0
 8 | binaryornot==0.4.4
 9 | certifi==2019.6.16
10 | chardet==3.0.4
11 | click==7.0
12 | cookiecutter==1.5.1
13 | coverage==4.5.3
14 | docutils==0.14
15 | future==0.17.1
16 | idna==2.8
17 | imagesize==1.1.0
18 | importlib-metadata==0.18
19 | jinja2-time==0.2.0
20 | jinja2==2.10.1
21 | markupsafe==1.1.1
22 | more-itertools==7.1.0
23 | packaging==19.0
24 | pathlib2==2.3.4
25 | pluggy==0.12.0
26 | poyo==0.4.2
27 | py==1.8.0
28 | pygments==2.4.2
29 | pyparsing==2.4.0
30 | pytest-cov==2.7.1
31 | pytest-timeout==1.3.3
32 | pytest==4.6.4
33 | python-dateutil==2.8.0
34 | pytz==2019.1
35 | requests==2.22.0
36 | six==1.12.0
37 | snowballstemmer==1.9.0
38 | sphinx-sitemap==0.2
39 | sphinx==1.8.5
40 | sphinxcontrib-websupport==1.1.2
41 | urllib3==1.25.3
42 | wcwidth==0.1.7
43 | whichcraft==0.6.0
44 | zipp==0.5.2
45 | 


--------------------------------------------------------------------------------
/bonobo/util/envelopes.py:
--------------------------------------------------------------------------------
 1 | from bonobo.structs.tokens import Flag
 2 | 
 3 | F_INHERIT = Flag("Inherit")
 4 | 
 5 | F_NOT_MODIFIED = Flag("NotModified")
 6 | F_NOT_MODIFIED.must_be_first = True
 7 | F_NOT_MODIFIED.must_be_last = True
 8 | F_NOT_MODIFIED.allows_data = False
 9 | 
10 | 
11 | class Envelope:
12 |     def __init__(self, content, *, flags=None, **options):
13 |         self._content = content
14 |         self._flags = set(flags or ())
15 |         self._options = options
16 | 
17 |     def unfold(self):
18 |         return self._content, self._flags, self._options
19 | 
20 | 
21 | class AppendingEnvelope(Envelope):
22 |     def __init__(self, content, **options):
23 |         super().__init__(content, flags={F_INHERIT}, **options)
24 | 
25 | 
26 | class UnchangedEnvelope(Envelope):
27 |     def __init__(self, **options):
28 |         super().__init__(None, flags={F_NOT_MODIFIED}, **options)
29 | 
30 | 
31 | def isenvelope(mixed):
32 |     return isinstance(mixed, Envelope)
33 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | set SPHINXPROJ=Bonobo
13 | 
14 | if "%1" == "" goto help
15 | 
16 | %SPHINXBUILD% >NUL 2>NUL
17 | if errorlevel 9009 (
18 | 	echo.
19 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
20 | 	echo.installed, then set the SPHINXBUILD environment variable to point
21 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
22 | 	echo.may add the Sphinx directory to PATH.
23 | 	echo.
24 | 	echo.If you don't have Sphinx installed, grab it from
25 | 	echo.http://sphinx-doc.org/
26 | 	exit /b 1
27 | )
28 | 
29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
30 | goto end
31 | 
32 | :help
33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
34 | 
35 | :end
36 | popd
37 | 


--------------------------------------------------------------------------------
/readthedocs-conda.yml:
--------------------------------------------------------------------------------
 1 | name: py35
 2 | dependencies:
 3 | - pip=9.0.1
 4 | - python=3.5
 5 | - setuptools=36.5.0
 6 | - wheel=0.29.0
 7 | - pip:
 8 |   - appdirs==1.4.3
 9 |   - certifi==2017.11.5
10 |   - chardet==3.0.4
11 |   - colorama==0.3.9
12 |   - fs==2.0.17
13 |   - graphviz==0.8.2
14 |   - idna==2.6
15 |   - jinja2==2.10
16 |   - markupsafe==1.0
17 |   - mondrian==0.6.1
18 |   - packaging==16.8
19 |   - pbr==3.1.1
20 |   - psutil==5.4.3
21 |   - pyparsing==2.2.0
22 |   - python-slugify==1.2.4
23 |   - pytz==2017.3
24 |   - requests==2.18.4
25 |   - six==1.11.0
26 |   - stevedore==1.28.0
27 |   - unidecode==1.0.22
28 |   - urllib3==1.22
29 |   - whistle==1.0.0
30 |   # for contribs
31 |   - django>=2,<3
32 |   # for extensions
33 |   - bonobo-docker>=0.6,<0.7
34 |   - bonobo-sqlalchemy>=0.6,<0.7
35 |   # for docs
36 |   - alabaster==0.7.10
37 |   - sphinx-sitemap==0.2
38 |   - sphinx==1.6.5
39 |   - sphinxcontrib-websupport==1.0.1
40 |   # for examples
41 |   - pycountry ==17.9.23
42 | 
43 | 


--------------------------------------------------------------------------------
/docs/_templates/sidebarintro.html:
--------------------------------------------------------------------------------
 1 | <h3>About Bonobo</h3>
 2 | <p>
 3 |     Bonobo is a data-processing toolkit for python 3.5+, your swiss-army knife for everyday's data.
 4 | </p>
 5 | 
 6 | <h3>Other Formats</h3>
 7 | <p>
 8 |     Download the docs...
 9 | </p>
10 | <ul>
11 |     <li><a href="http://readthedocs.org/projects/bonobo/downloads/pdf/master/" title="Bonobo ETL documentation as PDF">... as PDF</a></li>
12 |     <li><a href="http://readthedocs.org/projects/bonobo/downloads/htmlzip/master/" title="Bonobo ETL documentation as zipped HTML">... as zipped HTML</a></li>
13 |     <li><a href="http://readthedocs.org/projects/bonobo/downloads/epub/master/" title="Bonobo ETL documentation as EPUB">... as EPUB</a></li>
14 | </ul>
15 | 
16 | <h3>Useful Links</h3>
17 | <ul>
18 |     <li><a href="https://www.bonobo-project.org/">Bonobo's homepage</a></li>
19 |     <li><a href="http://pypi.python.org/pypi/bonobo">Package on PyPI</a></li>
20 |     <li><a href="http://github.com/python-bonobo/bonobo">Source code on GitHub</a></li>
21 | </ul>
22 | 


--------------------------------------------------------------------------------
/requirements-jupyter.txt:
--------------------------------------------------------------------------------
 1 | -e .[jupyter]
 2 | -r requirements.txt
 3 | appnope==0.1.0
 4 | attrs==19.1.0
 5 | backcall==0.1.0
 6 | bleach==3.1.0
 7 | decorator==4.4.0
 8 | defusedxml==0.6.0
 9 | entrypoints==0.3
10 | ipykernel==5.1.1
11 | ipython-genutils==0.2.0
12 | ipython==7.6.1
13 | ipywidgets==6.0.1
14 | jedi==0.14.1
15 | jinja2==2.10.1
16 | jsonschema==3.0.1
17 | jupyter-client==5.3.1
18 | jupyter-console==6.0.0
19 | jupyter-core==4.5.0
20 | jupyter==1.0.0
21 | markupsafe==1.1.1
22 | mistune==0.8.4
23 | nbconvert==5.5.0
24 | nbformat==4.4.0
25 | notebook==6.0.0
26 | pandocfilters==1.4.2
27 | parso==0.5.1
28 | pexpect==4.7.0
29 | pickleshare==0.7.5
30 | prometheus-client==0.7.1
31 | prompt-toolkit==2.0.9
32 | ptyprocess==0.6.0
33 | pygments==2.4.2
34 | pyrsistent==0.15.3
35 | python-dateutil==2.8.0
36 | pyzmq==18.0.2
37 | qtconsole==4.5.1
38 | send2trash==1.5.0
39 | six==1.12.0
40 | terminado==0.8.2
41 | testpath==0.4.2
42 | tornado==6.0.3
43 | traitlets==4.3.2
44 | wcwidth==0.1.7
45 | webencodings==0.5.1
46 | widgetsnbextension==2.0.1
47 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Thanks for submitting an issue!
 2 | 
 3 | * If this is a **feature request**, please make sure you explain the context, the goal, and why it is something that would go into bonobo core. Drafting some bits of spec is a good idea too, even if it's very draft-y.
 4 |   
 5 | * If this is a **bug report**, make sure you describe the expected and actual behaviour, eventually some minimal code or steps to reproduce the problem.
 6 |   
 7 | * If this is a **question**, please post it on slack overflow, and simply paste the question URL in an issue or in the slack channel. Also, when you get an answer, please consider contributing back the bits of documentation you would have loved to find in the first place.
 8 | 
 9 | ## Versions
10 | 
11 | * Bonobo version:
12 | 
13 |     `bonobo version --all`
14 | 
15 | * Python version:
16 | 
17 |     `python -c "import platform,sys; print(platform.python_implementation() + ' ' + sys.version)"`
18 | 
19 | * Platform:
20 | 
21 |     `uname -a`
22 |     `/etc/lsb-release`
23 |     ...
24 |   
25 | 


--------------------------------------------------------------------------------
/bonobo/util/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The Util API, located under the :mod:`bonobo.util` namespace, contains helpers functions and decorators to work with
 3 | and inspect transformations, graphs, and nodes.
 4 | 
 5 | """
 6 | from bonobo.util.collections import cast, ensure_tuple, sortedlist, tuplize
 7 | from bonobo.util.compat import deprecated, deprecated_alias
 8 | from bonobo.util.inspect import (
 9 |     inspect_node, isconfigurable, isconfigurabletype, iscontextprocessor, isdict, ismethod, isoption, istuple, istype
10 | )
11 | from bonobo.util.objects import ValueHolder, get_attribute_or_create, get_name
12 | 
13 | # Bonobo's util API
14 | __all__ = [
15 |     "ValueHolder",
16 |     "cast",
17 |     "deprecated",
18 |     "deprecated_alias",
19 |     "ensure_tuple",
20 |     "get_attribute_or_create",
21 |     "get_name",
22 |     "inspect_node",
23 |     "isconfigurable",
24 |     "isconfigurabletype",
25 |     "iscontextprocessor",
26 |     "isdict",
27 |     "ismethod",
28 |     "isoption",
29 |     "istype",
30 |     "sortedlist",
31 |     "tuplize",
32 | ]
33 | 


--------------------------------------------------------------------------------
/docs/extension/selenium.rst:
--------------------------------------------------------------------------------
 1 | Working with Selenium
 2 | =====================
 3 | 
 4 | .. include:: _alpha.rst
 5 | 
 6 | Writing web crawlers with Bonobo and Selenium is easy.
 7 | 
 8 | First, install **bonobo-selenium**:
 9 | 
10 | .. code-block:: shell-session
11 | 
12 |     $ pip install bonobo-selenium
13 | 
14 | The idea is to have one callable crawl one thing and delegate drill downs to callables further away in the chain.
15 | 
16 | An example chain could be:
17 | 
18 | .. graphviz::
19 | 
20 |     digraph {
21 |         rankdir = LR;
22 |         login -> paginate -> list -> details -> "ExcelWriter(...)";
23 |     }
24 | 
25 | Where each step would do the following:
26 | 
27 | * `login()` is in charge to open an authenticated session in the browser.
28 | * `paginate()` open each page of a fictive list and pass it to next.
29 | * `list()` take every list item and yield it.
30 | * `details()` extract the data you're interested in.
31 | * ... and the writer saves it somewhere.
32 | 
33 | Installation
34 | ::::::::::::
35 | 
36 | Overview
37 | ::::::::
38 | 
39 | Details
40 | :::::::
41 | 


--------------------------------------------------------------------------------
/bonobo/constants.py:
--------------------------------------------------------------------------------
 1 | """
 2 | .. data:: BEGIN
 3 | 
 4 |     **BEGIN** token marks the entrypoint of graphs, and all extractors will be connected to this node.
 5 | 
 6 |     Without this, it would be impossible for an execution to actually start anything, as it's the marker that tells
 7 |     |bonobo| which node to actually call when the execution starts.
 8 | 
 9 | .. data:: NOT_MODIFIED
10 | 
11 |     **NOT_MODIFIED** is a special value you can return or yield from a transformation to tell bonobo to reuse
12 |     the input data as output.
13 | 
14 |     As a convention, all loaders should return this, so loaders can be chained.
15 | 
16 | .. data:: EMPTY
17 | 
18 |     Shortcut for "empty tuple". It's often much more clear to write (especially in a test) `write(EMPTY)` than
19 |     `write(())`, although strictly equivalent.
20 | 
21 | 
22 | """
23 | from bonobo.structs.tokens import Token
24 | from bonobo.util.envelopes import UnchangedEnvelope
25 | 
26 | BEGIN = Token("Begin")
27 | END = Token("End")
28 | 
29 | NOT_MODIFIED = UnchangedEnvelope()
30 | 
31 | EMPTY = tuple()
32 | 
33 | TICK_PERIOD = 0.2
34 | 


--------------------------------------------------------------------------------
/docs/contribute/release.rst:
--------------------------------------------------------------------------------
 1 | Releases
 2 | ========
 3 | 
 4 | WORK IN PROGRESS, THIS DOC IS UNFINISHED AND JUST RAW NOTES TO HELP ME RELEASING STUFF.
 5 | 
 6 | How to make a patch release?
 7 | ::::::::::::::::::::::::::::
 8 | 
 9 | For now, reference at http://rdc.li/r
10 | 
11 | Additional checklist:
12 | 
13 | * make format
14 | 
15 | How to make a minor or major release?
16 | :::::::::::::::::::::::::::::::::::::
17 | 
18 | Releases above patch level are more complex, because we did not find a way not to hardcode the version number in a bunch
19 | of files, and because a few dependant services (source control, continuous integration, code coverage, documentation
20 | builder ...) also depends on version numbers.
21 | 
22 | Checklist:
23 | 
24 | * Files
25 | * Github
26 | 
27 | 
28 | Recipes
29 | :::::::
30 | 
31 | Get current minor::
32 | 
33 |     git semver | python -c 'import sys; print(".".join(sys.stdin.read().strip().split(".")[0:2]))'
34 | 
35 | Open git with all files containing current minor::
36 | 
37 |     ack `git semver | python -c 'import sys; print("\\\\.".join(sys.stdin.read().strip().split(".")[0:2]))'` | vim -
38 | 
39 | 


--------------------------------------------------------------------------------
/bonobo/config/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The Config API, located under the :mod:`bonobo.config` namespace, contains all the tools you need to create
 3 | configurable transformations, either class-based or function-based.
 4 | 
 5 | """
 6 | 
 7 | from bonobo.config.configurables import Configurable
 8 | from bonobo.config.functools import partial, transformation_factory
 9 | from bonobo.config.options import Method, Option
10 | from bonobo.config.processors import ContextProcessor, use_context, use_context_processor, use_no_input, use_raw_input
11 | from bonobo.config.services import Container, Exclusive, Service, create_container, use
12 | from bonobo.util import deprecated_alias
13 | 
14 | requires = deprecated_alias("requires", use)
15 | 
16 | # Bonobo's Config API
17 | __all__ = [
18 |     "Configurable",
19 |     "Container",
20 |     "ContextProcessor",
21 |     "Exclusive",
22 |     "Method",
23 |     "Option",
24 |     "Service",
25 |     "create_container",
26 |     "partial",
27 |     "requires",
28 |     "transformation_factory",
29 |     "use",
30 |     "use_context",
31 |     "use_context_processor",
32 |     "use_no_input",
33 |     "use_raw_input",
34 | ]
35 | 


--------------------------------------------------------------------------------
/bonobo/plugins/__init__.py:
--------------------------------------------------------------------------------
 1 | class Plugin:
 2 |     """
 3 |     A plugin is an extension to the core behavior of bonobo. If you're writing transformations, you should not need
 4 |     to use this interface.
 5 | 
 6 |     For examples, you can read bonobo.plugins.console.ConsoleOutputPlugin, or bonobo.plugins.jupyter.JupyterOutputPlugin
 7 |     that respectively permits an interactive output on an ANSI console and a rich output in a jupyter notebook. Note
 8 |     that you most probably won't instanciate them by yourself at runtime, as it's the default behaviour of bonobo to use
 9 |     them if your in a compatible context (aka an interactive terminal for the console plugin, or a jupyter notebook for
10 |     the notebook plugin.)
11 | 
12 |     Warning: THE PLUGIN API IS PRE-ALPHA AND WILL EVOLVE BEFORE 1.0, DO NOT RELY ON IT BEING STABLE!
13 | 
14 |     """
15 | 
16 |     def register(self, dispatcher):
17 |         """
18 |         :param dispatcher: whistle.EventDispatcher
19 |         """
20 |         pass
21 | 
22 |     def unregister(self, dispatcher):
23 |         """
24 |         :param dispatcher: whistle.EventDispatcher
25 |         """
26 |         pass
27 | 


--------------------------------------------------------------------------------
/tests/util/test_collections.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bonobo.util import ensure_tuple, sortedlist
 4 | from bonobo.util.collections import cast, tuple_or_const, tuplize
 5 | 
 6 | 
 7 | def test_sortedlist():
 8 |     l = sortedlist()
 9 |     l.insort(2)
10 |     l.insort(1)
11 |     l.insort(3)
12 |     l.insort(2)
13 |     assert l == [1, 2, 2, 3]
14 | 
15 | 
16 | def test_tuple_or_const():
17 |     assert tuple_or_const(()) == ()
18 |     assert tuple_or_const((1,)) == (1,)
19 |     assert tuple_or_const((1, 2)) == (1, 2)
20 |     assert tuple_or_const([1, 2]) == (1, 2)
21 |     assert tuple_or_const("aaa") == ("aaa",)
22 | 
23 | 
24 | def test_ensure_tuple():
25 |     assert ensure_tuple("a") == ("a",)
26 |     assert ensure_tuple(("a",)) == ("a",)
27 |     assert ensure_tuple(()) is ()
28 | 
29 | 
30 | @pytest.mark.parametrize("tuplize", [tuplize, cast(tuple)])
31 | def test_tuplize(tuplize):
32 |     tuplized_lambda = tuplize(lambda: [1, 2, 3])
33 |     assert tuplized_lambda() == (1, 2, 3)
34 | 
35 |     @tuplize
36 |     def some_generator():
37 |         yield "c"
38 |         yield "b"
39 |         yield "a"
40 | 
41 |     assert some_generator() == ("c", "b", "a")
42 | 


--------------------------------------------------------------------------------
/docs/_static/custom.css:
--------------------------------------------------------------------------------
 1 | svg {
 2 |     border: 2px solid green
 3 | }
 4 | 
 5 | div.related {
 6 |     width: 940px;
 7 |     margin: 30px auto 0 auto;
 8 | }
 9 | 
10 | @media screen and (max-width: 875px) {
11 |     div.related {
12 |         visibility: hidden;
13 |         display: none;
14 |     }
15 | }
16 | 
17 | .brand {
18 |     font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
19 |     font-size: 0.9em;
20 | }
21 | 
22 | div.sphinxsidebar h3 {
23 |     margin: 30px 0 10px 0;
24 | }
25 | 
26 | div.admonition p.admonition-title {
27 |     font-family: 'Ubuntu', 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif;
28 | }
29 | 
30 | div.sphinxsidebarwrapper {
31 |     padding: 0;
32 | }
33 | 
34 | div.note {
35 |     border: 0;
36 | }
37 | 
38 | .last {
39 |     margin-bottom: 0 !important;
40 | }
41 | 
42 | div.admonition {
43 |     padding: 16px;
44 | }
45 | 
46 | pre {
47 |     padding: 16px;
48 |     border: 1px solid #ddd;
49 |     background-color: #fafafa;
50 | }
51 | 
52 | .section > dl {
53 |     border: 1px solid #ddd;
54 |     background-color: #fafafa;
55 |     margin: 16px 0;
56 |     padding: 16px;
57 | }


--------------------------------------------------------------------------------
/docs/history.rst:
--------------------------------------------------------------------------------
 1 | History
 2 | =======
 3 | 
 4 | |bonobo| is a full rewrite of **rdc.etl**, aimed at modern python versions (3.5+).
 5 | 
 6 | **rdc.etl** is a now deprecated python 2.7+ ETL library for which development started in 2012, and was opensourced in
 7 | 2013 (see `first commit <https://github.com/rdcli/rdc.etl/commit/fdbc11c0ee7f6b97322693bd0051d63677b06a93>`_).
 8 | 
 9 | Although the first commit in |bonobo| happened late 2016, it's based on a lot of code, learnings and experience that
10 | happened because of **rdc.etl**.
11 | 
12 | It would have been counterproductive to migrate the same codebase:
13 | 
14 |   * a lot of mistakes were impossible to fix in a backward compatible way (for example, transformations were stateful,
15 |     making them more complicated to write and impossible to reuse, a lot of effort was used to make the components have
16 |     multi-inputs and multi-outputs, although in 99% of the case it's useless, etc.).
17 |   * we also wanted to develop something that took advantage of modern python versions, hence the choice of 3.5+.
18 | 
19 | **rdc.etl** still runs data transformation jobs, in both python 2.7 and 3, and we reuse whatever is possible to
20 | continue building |bonobo|.
21 | 
22 | 


--------------------------------------------------------------------------------
/benchmarks/parameters.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Compare passing a dict to passing a dict as kwargs to a stupid transformation
 3 | 
 4 | Last results (1 mill calls):
 5 | 
 6 | j1 1.5026444319955772
 7 | k1 1.8377482700016117
 8 | j2 1.1962292949901894
 9 | k2 1.5545833489886718
10 | j3 1.0014333260041894
11 | k3 1.353256585993222
12 | 
13 | """
14 | import json
15 | import timeit
16 | 
17 | 
18 | def j1(d):
19 |     return {"prepend": "foo", **d, "append": "bar"}
20 | 
21 | 
22 | def k1(**d):
23 |     return {"prepend": "foo", **d, "append": "bar"}
24 | 
25 | 
26 | def j2(d):
27 |     return {**d}
28 | 
29 | 
30 | def k2(**d):
31 |     return {**d}
32 | 
33 | 
34 | def j3(d):
35 |     return None
36 | 
37 | 
38 | def k3(**d):
39 |     return None
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     import timeit
44 | 
45 |     with open("person.json") as f:
46 |         json_data = json.load(f)
47 | 
48 |     for i in 1, 2, 3:
49 |         print(
50 |             "j{}".format(i), timeit.timeit("j{}({!r})".format(i, json_data), setup="from __main__ import j{}".format(i))
51 |         )
52 |         print(
53 |             "k{}".format(i),
54 |             timeit.timeit("k{}(**{!r})".format(i, json_data), setup="from __main__ import k{}".format(i)),
55 |         )
56 | 


--------------------------------------------------------------------------------
/bonobo/util/compat.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import warnings
 3 | 
 4 | 
 5 | def deprecated_alias(alias, func):
 6 |     @functools.wraps(func)
 7 |     def new_func(*args, **kwargs):
 8 |         warnings.simplefilter("always", DeprecationWarning)  # turn off filter
 9 |         warnings.warn(
10 |             "Call to deprecated function alias {}, use {} instead.".format(alias, func.__name__),
11 |             category=DeprecationWarning,
12 |             stacklevel=2,
13 |         )
14 |         warnings.simplefilter("default", DeprecationWarning)  # reset filter
15 |         return func(*args, **kwargs)
16 | 
17 |     return new_func
18 | 
19 | 
20 | def deprecated(func):
21 |     """This is a decorator which can be used to mark functions
22 |     as deprecated. It will result in a warning being emmitted
23 |     when the function is used."""
24 | 
25 |     @functools.wraps(func)
26 |     def new_func(*args, **kwargs):
27 |         warnings.simplefilter("always", DeprecationWarning)  # turn off filter
28 |         warnings.warn(
29 |             "Call to deprecated function {}.".format(func.__name__), category=DeprecationWarning, stacklevel=2
30 |         )
31 |         warnings.simplefilter("default", DeprecationWarning)  # reset filter
32 |         return func(*args, **kwargs)
33 | 
34 |     return new_func
35 | 


--------------------------------------------------------------------------------
/bonobo/util/statistics.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class WithStatistics:
 5 |     def __init__(self, *names):
 6 |         self.statistics_names = names
 7 |         self.statistics = {name: 0 for name in names}
 8 | 
 9 |     def get_statistics(self, *args, **kwargs):
10 |         return ((name, self.statistics[name]) for name in self.statistics_names)
11 | 
12 |     def get_statistics_as_string(self, *args, **kwargs):
13 |         stats = tuple("{0}={1}".format(name, cnt) for name, cnt in self.get_statistics(*args, **kwargs) if cnt > 0)
14 |         return (kwargs.get("prefix", "") + " ".join(stats)) if len(stats) else ""
15 | 
16 |     def increment(self, name, *, amount=1):
17 |         self.statistics[name] += amount
18 | 
19 | 
20 | class Timer:
21 |     """
22 |     Context manager used to time execution of stuff.
23 |     """
24 | 
25 |     def __enter__(self):
26 |         self.__start = time.time()
27 |         return self
28 | 
29 |     def __exit__(self, type=None, value=None, traceback=None):  # lgtm [py/special-method-wrong-signature]
30 |         # Error handling here
31 |         self.__finish = time.time()
32 | 
33 |     @property
34 |     def duration(self):
35 |         return self.__finish - self.__start
36 | 
37 |     def __str__(self):
38 |         return str(int(self.duration * 1000) / 1000.0) + "s"
39 | 


--------------------------------------------------------------------------------
/bonobo/plugins/jupyter.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from bonobo.contrib.jupyter.widget import BonoboWidget
 4 | from bonobo.execution import events
 5 | from bonobo.plugins import Plugin
 6 | 
 7 | try:
 8 |     import IPython.core.display
 9 | except ImportError as e:
10 |     logging.exception(
11 |         "You must install Jupyter to use the bonobo Jupyter extension. Easiest way is to install the "
12 |         'optional "jupyter" dependencies with «pip install bonobo[jupyter]», but you can also install a '
13 |         "specific version by yourself."
14 |     )
15 | 
16 | 
17 | class JupyterOutputPlugin(Plugin):
18 |     def register(self, dispatcher):
19 |         dispatcher.add_listener(events.START, self.setup)
20 |         dispatcher.add_listener(events.TICK, self.tick)
21 |         dispatcher.add_listener(events.STOPPED, self.tick)
22 | 
23 |     def unregister(self, dispatcher):
24 |         dispatcher.remove_listener(events.STOPPED, self.tick)
25 |         dispatcher.remove_listener(events.TICK, self.tick)
26 |         dispatcher.remove_listener(events.START, self.setup)
27 | 
28 |     def setup(self, event):
29 |         self.widget = BonoboWidget()
30 |         IPython.core.display.display(self.widget)
31 | 
32 |     def tick(self, event):
33 |         self.widget.value = [event.context[i].as_dict() for i in event.context.graph.topologically_sorted_indexes]
34 | 


--------------------------------------------------------------------------------
/bonobo/commands/download.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import re
 3 | 
 4 | import requests
 5 | 
 6 | import bonobo
 7 | from bonobo.commands import BaseCommand
 8 | 
 9 | EXAMPLES_BASE_URL = "https://raw.githubusercontent.com/python-bonobo/bonobo/master/bonobo/examples/"
10 | """The URL to our git repository, in raw mode."""
11 | 
12 | 
13 | class DownloadCommand(BaseCommand):
14 |     def handle(self, *, path, **options):
15 |         if not path.startswith("examples"):
16 |             raise ValueError("Download command currently supports examples only")
17 |         examples_path = re.sub("^examples/", "", path)
18 |         output_path = bonobo.get_examples_path(examples_path)
19 |         with _open_url(EXAMPLES_BASE_URL + examples_path) as response, open(output_path, "wb") as fout:
20 |             for chunk in response.iter_content(io.DEFAULT_BUFFER_SIZE):
21 |                 fout.write(chunk)
22 |         self.logger.info("Download saved to {}".format(output_path))
23 | 
24 |     def add_arguments(self, parser):
25 |         parser.add_argument("path", help="The relative path of the thing to download.")
26 | 
27 | 
28 | def _open_url(url):
29 |     """Open a HTTP connection to the URL and return a file-like object."""
30 |     response = requests.get(url, stream=True)
31 |     if response.status_code != 200:
32 |         raise IOError("Unable to download {}, HTTP {}".format(url, response.status_code))
33 |     return response
34 | 


--------------------------------------------------------------------------------
/benchmarks/person.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@context": "http://schema.org",
 3 |   "@type": "MusicEvent",
 4 |   "location": {
 5 |     "@type": "MusicVenue",
 6 |     "name": "Chicago Symphony Center",
 7 |     "address": "220 S. Michigan Ave, Chicago, Illinois, USA"
 8 |   },
 9 |   "name": "Shostakovich Leningrad",
10 |   "offers": {
11 |     "@type": "Offer",
12 |     "url": "/examples/ticket/12341234",
13 |     "price": "40",
14 |     "priceCurrency": "USD",
15 |     "availability": "http://schema.org/InStock"
16 |   },
17 |   "performer": [
18 |     {
19 |       "@type": "MusicGroup",
20 |       "name": "Chicago Symphony Orchestra",
21 |       "sameAs": [
22 |         "http://cso.org/",
23 |         "http://en.wikipedia.org/wiki/Chicago_Symphony_Orchestra"
24 |       ]
25 |     },
26 |     {
27 |       "@type": "Person",
28 |       "image": "/examples/jvanzweden_s.jpg",
29 |       "name": "Jaap van Zweden",
30 |       "sameAs": "http://www.jaapvanzweden.com/"
31 |     }
32 |   ],
33 |   "startDate": "2014-05-23T20:00",
34 |   "workPerformed": [
35 |     {
36 |       "@type": "CreativeWork",
37 |       "name": "Britten Four Sea Interludes and Passacaglia from Peter Grimes",
38 |       "sameAs": "http://en.wikipedia.org/wiki/Peter_Grimes"
39 |     },
40 |     {
41 |       "@type": "CreativeWork",
42 |       "name": "Shostakovich Symphony No. 7 (Leningrad)",
43 |       "sameAs": "http://en.wikipedia.org/wiki/Symphony_No._7_(Shostakovich)"
44 |     }
45 |   ]
46 | }
47 | 


--------------------------------------------------------------------------------
/tests/commands/test_download.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from unittest.mock import patch
 3 | 
 4 | import pytest
 5 | 
 6 | from bonobo.commands.download import EXAMPLES_BASE_URL
 7 | from bonobo.util.testing import all_runners
 8 | 
 9 | 
10 | @all_runners
11 | def test_download_works_for_examples(runner):
12 |     expected_bytes = b"hello world"
13 | 
14 |     class MockResponse(object):
15 |         def __init__(self):
16 |             self.status_code = 200
17 | 
18 |         def iter_content(self, *args, **kwargs):
19 |             return [expected_bytes]
20 | 
21 |         def __enter__(self):
22 |             return self
23 | 
24 |         def __exit__(self, *args, **kwargs):
25 |             pass
26 | 
27 |     fout = io.BytesIO()
28 |     fout.close = lambda: None
29 | 
30 |     with patch("bonobo.commands.download._open_url") as mock_open_url, patch(
31 |         "bonobo.commands.download.open"
32 |     ) as mock_open:
33 |         mock_open_url.return_value = MockResponse()
34 |         mock_open.return_value = fout
35 |         runner("download", "examples/datasets/coffeeshops.txt")
36 |     expected_url = EXAMPLES_BASE_URL + "datasets/coffeeshops.txt"
37 |     mock_open_url.assert_called_once_with(expected_url)
38 | 
39 |     assert fout.getvalue() == expected_bytes
40 | 
41 | 
42 | @all_runners
43 | def test_download_fails_non_example(runner):
44 |     with pytest.raises(ValueError):
45 |         runner("download", "something/entirely/different.txt")
46 | 


--------------------------------------------------------------------------------
/tests/plugins/test_console.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock
 2 | 
 3 | from whistle import EventDispatcher
 4 | 
 5 | import bonobo
 6 | from bonobo.execution import events
 7 | from bonobo.execution.contexts.graph import GraphExecutionContext
 8 | from bonobo.plugins.console import ConsoleOutputPlugin
 9 | 
10 | 
11 | def test_register_unregister():
12 |     plugin = ConsoleOutputPlugin()
13 |     dispatcher = EventDispatcher()
14 | 
15 |     plugin.register(dispatcher)
16 |     assert plugin.setup in dispatcher.get_listeners(events.START)
17 |     assert plugin.tick in dispatcher.get_listeners(events.TICK)
18 |     assert plugin.teardown in dispatcher.get_listeners(events.STOPPED)
19 |     plugin.unregister(dispatcher)
20 |     assert plugin.setup not in dispatcher.get_listeners(events.START)
21 |     assert plugin.tick not in dispatcher.get_listeners(events.TICK)
22 |     assert plugin.teardown not in dispatcher.get_listeners(events.STOPPED)
23 | 
24 | 
25 | def test_one_pass():
26 |     plugin = ConsoleOutputPlugin()
27 |     dispatcher = EventDispatcher()
28 |     plugin.register(dispatcher)
29 | 
30 |     graph = bonobo.Graph()
31 |     context = MagicMock(spec=GraphExecutionContext(graph))
32 | 
33 |     dispatcher.dispatch(events.START, events.ExecutionEvent(context))
34 |     dispatcher.dispatch(events.TICK, events.ExecutionEvent(context))
35 |     dispatcher.dispatch(events.STOPPED, events.ExecutionEvent(context))
36 | 
37 |     plugin.unregister(dispatcher)
38 | 


--------------------------------------------------------------------------------
/tests/nodes/io/test_pickle.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | import pytest
 4 | 
 5 | from bonobo import PickleReader, PickleWriter
 6 | from bonobo.constants import EMPTY
 7 | from bonobo.execution.contexts.node import NodeExecutionContext
 8 | from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester
 9 | 
10 | pickle_tester = FilesystemTester("pkl", mode="wb")
11 | pickle_tester.input_data = pickle.dumps([["a", "b", "c"], ["a foo", "b foo", "c foo"], ["a bar", "b bar", "c bar"]])
12 | 
13 | 
14 | def test_write_pickled_dict_to_file(tmpdir):
15 |     fs, filename, services = pickle_tester.get_services_for_writer(tmpdir)
16 | 
17 |     with NodeExecutionContext(PickleWriter(filename), services=services) as context:
18 |         context.write_sync({"foo": "bar"}, {"foo": "baz", "ignore": "this"})
19 | 
20 |     with fs.open(filename, "rb") as fp:
21 |         assert pickle.loads(fp.read()) == {"foo": "bar"}
22 | 
23 |     with pytest.raises(AttributeError):
24 |         getattr(context, "file")
25 | 
26 | 
27 | def test_read_pickled_list_from_file(tmpdir):
28 |     fs, filename, services = pickle_tester.get_services_for_reader(tmpdir)
29 | 
30 |     with BufferingNodeExecutionContext(PickleReader(filename), services=services) as context:
31 |         context.write_sync(EMPTY)
32 | 
33 |     output = context.get_buffer()
34 |     assert context.get_output_fields() == ("a", "b", "c")
35 |     assert output == [("a foo", "b foo", "c foo"), ("a bar", "b bar", "c bar")]
36 | 


--------------------------------------------------------------------------------
/bonobo/util/api.py:
--------------------------------------------------------------------------------
 1 | from bonobo.util import get_name
 2 | 
 3 | 
 4 | class ApiHelper:
 5 |     # TODO __all__ kwarg only
 6 |     def __init__(self, __all__):
 7 |         self.__all__ = __all__
 8 | 
 9 |     def register(self, x, graph=False):
10 |         """Register a function as being part of an API, then returns the original function."""
11 | 
12 |         if graph:
13 |             # This function must comply to the "graph" API interface, meaning it can bahave like bonobo.run.
14 |             from inspect import signature
15 | 
16 |             parameters = list(signature(x).parameters)
17 |             required_parameters = {"plugins", "services", "strategy"}
18 |             assert (
19 |                 len(parameters) > 0 and parameters[0] == "graph"
20 |             ), 'First parameter of a graph api function must be "graph".'
21 |             assert (
22 |                 required_parameters.intersection(parameters) == required_parameters
23 |             ), "Graph api functions must define the following parameters: " + ", ".join(sorted(required_parameters))
24 | 
25 |         self.__all__.append(get_name(x))
26 |         return x
27 | 
28 |     def register_graph(self, x):
29 |         return self.register(x, graph=True)
30 | 
31 |     def register_group(self, *args, check=None):
32 |         check = set(check) if check else None
33 |         for attr in args:
34 |             self.register(attr)
35 |             if check:
36 |                 check.remove(get_name(attr))
37 |         assert not (check and len(check))
38 | 


--------------------------------------------------------------------------------
/docs/reference/commands.rst:
--------------------------------------------------------------------------------
 1 | Command-line
 2 | ============
 3 | 
 4 | 
 5 | Bonobo Convert
 6 | ::::::::::::::
 7 | 
 8 | Build a simple bonobo graph with one reader and one writer, then execute it, allowing to use bonobo in "no code" mode
 9 | for simple file format conversions.
10 | 
11 | Syntax: `bonobo convert [-r reader] input_filename [-w writer] output_filename`
12 | 
13 | .. todo::
14 | 
15 |     add a way to override default options of reader/writers, add a way to add "filters", for example this could be used
16 |     to read from csv and write to csv too (or other format) but adding a geocoder filter that would add some fields.
17 | 
18 | 
19 | Bonobo Inspect
20 | ::::::::::::::
21 | 
22 | Inspects a bonobo graph source files. For now, only support graphviz output.
23 | 
24 | Syntax: `bonobo inspect [--graph|-g] filename`
25 | 
26 | Requires graphviz if you want to generate an actual graph picture, although the command itself depends on nothing.
27 | 
28 | 
29 | Bonobo Run
30 | ::::::::::
31 | 
32 | Run a transformation graph.
33 | 
34 | Syntax: `bonobo run [-c cmd | -m mod | file | -] [arg]`
35 | 
36 | .. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too.
37 | 
38 | 
39 | Bonobo RunC
40 | :::::::::::
41 | 
42 | Run a transformation graph in a docker container.
43 | 
44 | Syntax: `bonobo runc [-c cmd | -m mod | file | -] [arg]`
45 | 
46 | .. todo:: implement -m, check if -c is of any use and if yes, implement it too. Implement args, too.
47 | 
48 | Requires `bonobo-docker`, install with `docker` extra: `pip install bonobo[docker]`.
49 | 
50 | 


--------------------------------------------------------------------------------
/bonobo/nodes/throttle.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import time
 3 | 
 4 | from bonobo.config import Configurable, ContextProcessor, Method, Option
 5 | 
 6 | 
 7 | class RateLimitBucket(threading.Thread):
 8 |     daemon = True
 9 | 
10 |     @property
11 |     def stopped(self):
12 |         return self._stop_event.is_set()
13 | 
14 |     def __init__(self, initial=1, period=1, amount=1):
15 |         super(RateLimitBucket, self).__init__()
16 |         self.semaphore = threading.BoundedSemaphore(initial)
17 |         self.amount = amount
18 |         self.period = period
19 | 
20 |         self._stop_event = threading.Event()
21 | 
22 |     def stop(self):
23 |         self._stop_event.set()
24 | 
25 |     def run(self):
26 |         while not self.stopped:
27 |             time.sleep(self.period)
28 |             for _ in range(self.amount):
29 |                 self.semaphore.release()
30 | 
31 |     def wait(self):
32 |         return self.semaphore.acquire()
33 | 
34 | 
35 | class RateLimited(Configurable):
36 |     handler = Method()
37 | 
38 |     initial = Option(int, positional=True, default=1)
39 |     period = Option(int, positional=True, default=1)
40 |     amount = Option(int, positional=True, default=1)
41 | 
42 |     @ContextProcessor
43 |     def bucket(self, context):
44 |         bucket = RateLimitBucket(self.initial, self.amount, self.period)
45 |         bucket.start()
46 |         yield bucket
47 |         bucket.stop()
48 |         bucket.join()
49 | 
50 |     def __call__(self, bucket, *args, **kwargs):
51 |         bucket.wait()
52 |         return self.handler(*args, **kwargs)
53 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/src/bonobo.js:
--------------------------------------------------------------------------------
 1 | var widgets = require('jupyter-js-widgets');
 2 | var _ = require('underscore');
 3 | 
 4 | // Custom Model. Custom widgets models must at least provide default values
 5 | // for model attributes, including `_model_name`, `_view_name`, `_model_module`
 6 | // and `_view_module` when different from the base class.
 7 | //
 8 | // When serialiazing entire widget state for embedding, only values different from the
 9 | // defaults will be specified.
10 | 
11 | const BonoboModel = widgets.DOMWidgetModel.extend({
12 |     defaults: _.extend({}, widgets.DOMWidgetModel.prototype.defaults, {
13 |         _model_name: 'BonoboModel',
14 |         _view_name: 'BonoboView',
15 |         _model_module: 'bonobo',
16 |         _view_module: 'bonobo',
17 |         value: []
18 |     })
19 | });
20 | 
21 | 
22 | // Custom View. Renders the widget model.
23 | const BonoboView = widgets.DOMWidgetView.extend({
24 |     render: function () {
25 |         this.value_changed();
26 |         this.model.on('change:value', this.value_changed, this);
27 |     },
28 | 
29 |     value_changed: function () {
30 |         this.$el.html(
31 |             '<div class="rendered_html"><table style="margin: 0; border: 1px solid black;">' + this.model.get('value').map((key, i) => {
32 |                 return `<tr><td>${key.status}</td><td>${key.name}</td><td>${key.stats}</td><td>${key.flags}</td></tr>`
33 |             }).join('\n') + '</table></div>'
34 |         );
35 |     },
36 | });
37 | 
38 | 
39 | module.exports = {
40 |     BonoboModel: BonoboModel,
41 |     BonoboView: BonoboView
42 | };
43 | 


--------------------------------------------------------------------------------
/bonobo/commands/templates/default.py-tpl:
--------------------------------------------------------------------------------
 1 | import bonobo
 2 | 
 3 | 
 4 | def extract():
 5 |     """Placeholder, change, rename, remove... """
 6 |     yield 'hello'
 7 |     yield 'world'
 8 | 
 9 | 
10 | def transform(*args):
11 |     """Placeholder, change, rename, remove... """
12 |     yield tuple(
13 |         map(str.title, args)
14 |     )
15 | 
16 | 
17 | def load(*args):
18 |     """Placeholder, change, rename, remove... """
19 |     print(*args)
20 | 
21 | 
22 | def get_graph(**options):
23 |     """
24 |     This function builds the graph that needs to be executed.
25 | 
26 |     :return: bonobo.Graph
27 | 
28 |     """
29 |     graph = bonobo.Graph()
30 |     (
31 |         graph
32 |         >> extract
33 |         >> transform
34 |         >> load
35 |     )
36 |     return graph
37 | 
38 | 
39 | def get_services(**options):
40 |     """
41 |     This function builds the services dictionary, which is a simple dict of names-to-implementation used by bonobo
42 |     for runtime injection.
43 | 
44 |     It will be used on top of the defaults provided by bonobo (fs, http, ...). You can override those defaults, or just
45 |     let the framework define them. You can also define your own services and naming is up to you.
46 | 
47 |     :return: dict
48 |     """
49 |     return {}
50 | 
51 | 
52 | # The __main__ block actually execute the graph.
53 | if __name__ == '__main__':
54 |     parser = bonobo.get_argument_parser()
55 |     with bonobo.parse_args(parser) as options:
56 |         bonobo.run(
57 |             get_graph(**options),
58 |             services=get_services(**options)
59 |         )
60 | 


--------------------------------------------------------------------------------
/docs/extension/jupyter.rst:
--------------------------------------------------------------------------------
 1 | Working with Jupyter
 2 | ====================
 3 | 
 4 | .. include:: _beta.rst
 5 | 
 6 | There is a builtin plugin that integrates (somewhat minimallistically, for now) bonobo within jupyter notebooks, so
 7 | you can read the execution status of a graph within a nice (ok, not so nice) html/javascript widget.
 8 | 
 9 | Installation
10 | ::::::::::::
11 | 
12 | Install `bonobo` with the **jupyter** extra::
13 | 
14 |     pip install bonobo[jupyter]
15 | 
16 | Install the jupyter extension::
17 | 
18 |     jupyter nbextension enable --py --sys-prefix widgetsnbextension
19 |     jupyter nbextension enable --py --sys-prefix bonobo.contrib.jupyter
20 | 
21 | Development
22 | :::::::::::
23 | 
24 | You should favor yarn over npm to install node packages. If you prefer to use npm, it's up to you to adapt the code.
25 | 
26 | To install the widget for development, make sure you're using an editable install of bonobo (see install document)::
27 | 
28 |     jupyter nbextension install --py --symlink --sys-prefix bonobo.contrib.jupyter
29 |     jupyter nbextension enable --py --sys-prefix bonobo.contrib.jupyter
30 | 
31 | If you want to change the javascript, you should run webpack in watch mode in some terminal::
32 | 
33 |     cd bonobo/ext/jupyter/js
34 |     yarn install
35 |     ./node_modules/.bin/webpack --watch
36 | 
37 | To compile the widget into a distributable version (which gets packaged on PyPI when a release is made), just run
38 | webpack::
39 | 
40 |     ./node_modules/.bin/webpack
41 | 
42 | 
43 | Source code
44 | :::::::::::
45 | 
46 | https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/jupyter
47 | 


--------------------------------------------------------------------------------
/tests/nodes/io/test_file.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from bonobo import FileReader, FileWriter
 4 | from bonobo.constants import EMPTY
 5 | from bonobo.execution.contexts.node import NodeExecutionContext
 6 | from bonobo.util.testing import BufferingNodeExecutionContext, FilesystemTester
 7 | 
 8 | txt_tester = FilesystemTester("txt")
 9 | txt_tester.input_data = "Hello\nWorld\n"
10 | 
11 | 
12 | def test_file_writer_contextless(tmpdir):
13 |     fs, filename, services = txt_tester.get_services_for_writer(tmpdir)
14 | 
15 |     with FileWriter(path=filename).open(fs) as fp:
16 |         fp.write("Yosh!")
17 | 
18 |     with fs.open(filename) as fp:
19 |         assert fp.read() == "Yosh!"
20 | 
21 | 
22 | @pytest.mark.parametrize(
23 |     "lines,output",
24 |     [(("ACME",), "ACME"), (("Foo", "Bar", "Baz"), "Foo\nBar\nBaz")],  # one line...  # more than one line...
25 | )
26 | def test_file_writer_in_context(tmpdir, lines, output):
27 |     fs, filename, services = txt_tester.get_services_for_writer(tmpdir)
28 | 
29 |     with NodeExecutionContext(FileWriter(path=filename), services=services) as context:
30 |         context.write_sync(*lines)
31 | 
32 |     with fs.open(filename) as fp:
33 |         assert fp.read() == output
34 | 
35 | 
36 | def test_file_reader(tmpdir):
37 |     fs, filename, services = txt_tester.get_services_for_reader(tmpdir)
38 | 
39 |     with BufferingNodeExecutionContext(FileReader(path=filename), services=services) as context:
40 |         context.write_sync(EMPTY)
41 | 
42 |     output = context.get_buffer()
43 |     assert len(output) == 2
44 |     assert output[0] == ("Hello",)
45 |     assert output[1] == ("World",)
46 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/coffeeshops.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import bonobo
 4 | from bonobo import examples
 5 | from bonobo.contrib.opendatasoft import OpenDataSoftAPI as ODSReader
 6 | from bonobo.examples import get_services
 7 | from bonobo.structs.graphs import PartialGraph
 8 | 
 9 | 
10 | def get_graph(graph=None, *, _limit=(), _print=()):
11 |     """
12 |     Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields,
13 |     reorders the fields and formats to json and csv files.
14 | 
15 |     """
16 |     graph = graph or bonobo.Graph()
17 | 
18 |     producer = (
19 |         graph.get_cursor()
20 |         >> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr")
21 |         >> PartialGraph(*_limit)
22 |         >> bonobo.UnpackItems(0)
23 |         >> bonobo.Rename(name="nom_du_cafe", address="adresse", zipcode="arrondissement")
24 |         >> bonobo.Format(city="Paris", country="France")
25 |         >> bonobo.OrderFields(["name", "address", "zipcode", "city", "country", "geometry", "geoloc"])
26 |         >> PartialGraph(*_print)
27 |     )
28 | 
29 |     # Comma separated values.
30 |     graph.get_cursor(producer.output) >> bonobo.CsvWriter(
31 |         "coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter=","
32 |     )
33 | 
34 |     # Standard JSON
35 |     graph.get_cursor(producer.output) >> bonobo.JsonWriter(path="coffeeshops.json")
36 | 
37 |     # Line-delimited JSON
38 |     graph.get_cursor(producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson")
39 | 
40 |     return graph
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     sys.exit(examples.run(get_graph, get_services))
45 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/fablabs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extracts a list of fablabs in the world, restricted to the ones in france, then format its both for a nice console output
 3 | and a flat txt file.
 4 | 
 5 | .. graphviz::
 6 | 
 7 |     digraph {
 8 |         rankdir = LR;
 9 |         stylesheet = "../_static/graphs.css";
10 | 
11 |         BEGIN [shape="point"];
12 |         BEGIN -> "ODS()" -> "normalize" -> "filter_france" -> "Tee()" -> "JsonWriter()";
13 |     }
14 | 
15 | """
16 | 
17 | import json
18 | import sys
19 | 
20 | import bonobo
21 | from bonobo import examples
22 | from bonobo.contrib.opendatasoft import OpenDataSoftAPI
23 | from bonobo.examples import get_services
24 | 
25 | try:
26 |     import pycountry
27 | except ImportError as exc:
28 |     raise ImportError('You must install package "pycountry" to run this example.') from exc
29 | 
30 | API_DATASET = "fablabs@public-us"
31 | ROWS = 100
32 | 
33 | 
34 | def _getlink(x):
35 |     return x.get("url", None)
36 | 
37 | 
38 | def normalize(row):
39 |     result = {
40 |         **row,
41 |         "links": list(filter(None, map(_getlink, json.loads(row.get("links"))))),
42 |         "country": pycountry.countries.get(alpha_2=row.get("country_code", "").upper()).name,
43 |     }
44 |     return result
45 | 
46 | 
47 | def get_graph(graph=None, *, _limit=(), _print=()):
48 |     graph = graph or bonobo.Graph()
49 |     graph.add_chain(
50 |         OpenDataSoftAPI(dataset=API_DATASET),
51 |         *_limit,
52 |         normalize,
53 |         bonobo.UnpackItems(0),
54 |         *_print,
55 |         bonobo.JsonWriter(path="fablabs.json"),
56 |     )
57 |     return graph
58 | 
59 | 
60 | if __name__ == "__main__":
61 |     sys.exit(examples.run(get_graph, get_services))
62 | 


--------------------------------------------------------------------------------
/docs/guide/debugging.rst:
--------------------------------------------------------------------------------
 1 | Debugging
 2 | =========
 3 | 
 4 | .. note::
 5 | 
 6 |     This document writing is in progress, but its content should be correct (but succint).
 7 | 
 8 | Using a debugger (pdb...)
 9 | :::::::::::::::::::::::::
10 | 
11 | Using a debugger works (as in any python piece of code), but you must be aware that each node runs in a separate thread,
12 | which means a few things:
13 | 
14 | * If a breakpoint happens in a thread, then this thread will stop, but all other threads will continue running. This
15 |   can be especially annoying if you try to use the pdb REPL for example, as your prompt will be overriden a few
16 |   times/second by the current excution statistics.
17 | 
18 |   To avoid that, you can run bonobo with `QUIET=1` in environment, to hide statistics.
19 | 
20 | * If your breakpoint never happens (although it's at the very beginning of your transformation), it may mean that
21 |   something happens out of the transform. The :class:`bonobo.execution.contexts.NodeExecutionContext` instance that
22 |   surrounds your transformation may be stuck in its `while True: transform()` loop.
23 | 
24 |   Break one level higher
25 | 
26 | 
27 | Using printing statements
28 | :::::::::::::::::::::::::
29 | 
30 | Of course, you can :obj:`print` things.
31 | 
32 | You can even add :obj:`print` statements in graphs, to :obj:`print` once per row.
33 | 
34 | A better :obj:`print` is available though, suitable for both flow-based data processing and human eyes.
35 | Check :class:`bonobo.PrettyPrinter`.
36 | 
37 | 
38 | Inspecting graphs
39 | :::::::::::::::::
40 | 
41 | * Using the console: `bonobo inspect --graph`.
42 | * Using Jupyter notebook: install the extension and just display a graph.
43 | 
44 | 
45 | .. include:: _next.rst
46 | 


--------------------------------------------------------------------------------
/tests/commands/test_run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest.mock import patch
 3 | 
 4 | from bonobo import get_examples_path
 5 | from bonobo.util.testing import all_runners
 6 | 
 7 | 
 8 | @all_runners
 9 | def test_run(runner):
10 |     out, err = runner("run", "--quiet", get_examples_path("types/strings.py"))
11 |     out = out.split("\n")
12 |     assert out[0].startswith("Foo ")
13 |     assert out[1].startswith("Bar ")
14 |     assert out[2].startswith("Baz ")
15 | 
16 | 
17 | @all_runners
18 | def test_run_module(runner):
19 |     out, err = runner("run", "--quiet", "-m", "bonobo.examples.types.strings")
20 |     out = out.split("\n")
21 |     assert out[0].startswith("Foo ")
22 |     assert out[1].startswith("Bar ")
23 |     assert out[2].startswith("Baz ")
24 | 
25 | 
26 | @all_runners
27 | def test_run_path(runner):
28 |     out, err = runner("run", "--quiet", get_examples_path("types"))
29 |     out = out.split("\n")
30 |     assert out[0].startswith("Foo ")
31 |     assert out[1].startswith("Bar ")
32 |     assert out[2].startswith("Baz ")
33 | 
34 | 
35 | @all_runners
36 | def test_install_requirements_for_dir(runner):
37 |     dirname = get_examples_path("types")
38 |     with patch("bonobo.commands.run._install_requirements") as install_mock:
39 |         runner("run", "--install", dirname)
40 |     install_mock.assert_called_once_with(os.path.join(dirname, "requirements.txt"))
41 | 
42 | 
43 | @all_runners
44 | def test_install_requirements_for_file(runner):
45 |     dirname = get_examples_path("types")
46 |     with patch("bonobo.commands.run._install_requirements") as install_mock:
47 |         runner("run", "--install", os.path.join(dirname, "strings.py"))
48 |     install_mock.assert_called_once_with(os.path.join(dirname, "requirements.txt"))
49 | 


--------------------------------------------------------------------------------
/bonobo/execution/strategies/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Execution strategies define how an actual job execution will happen. Default and recommended strategy is "threadpool",
 3 | for now, which leverage a :obj:`concurrent.futures.ThreadPoolExecutor` to run each node in a separate thread.
 4 | 
 5 | In the future, the two strategies that would really benefit bonobo are subprocess and dask/dask.distributed. Please be
 6 | at home if you want to give it a shot.
 7 | 
 8 | """
 9 | from bonobo.execution.strategies.executor import (
10 |     AsyncThreadPoolExecutorStrategy, ProcessPoolExecutorStrategy, ThreadPoolExecutorStrategy
11 | )
12 | from bonobo.execution.strategies.naive import NaiveStrategy
13 | 
14 | __all__ = ["create_strategy"]
15 | 
16 | STRATEGIES = {
17 |     "naive": NaiveStrategy,
18 |     "processpool": ProcessPoolExecutorStrategy,
19 |     "threadpool": ThreadPoolExecutorStrategy,
20 |     "aio_threadpool": AsyncThreadPoolExecutorStrategy,
21 | }
22 | 
23 | DEFAULT_STRATEGY = "threadpool"
24 | 
25 | 
26 | def create_strategy(name=None):
27 |     """
28 |     Create a strategy, or just returns it if it's already one.
29 | 
30 |     :param name: 
31 |     :return: Strategy
32 |     """
33 |     import logging
34 |     from bonobo.execution.strategies.base import Strategy
35 | 
36 |     if isinstance(name, Strategy):
37 |         return name
38 | 
39 |     if name is None:
40 |         name = DEFAULT_STRATEGY
41 | 
42 |     logging.debug("Creating execution strategy {!r}...".format(name))
43 | 
44 |     try:
45 |         factory = STRATEGIES[name]
46 |     except KeyError as exc:
47 |         raise RuntimeError(
48 |             "Invalid strategy {}. Available choices: {}.".format(repr(name), ", ".join(sorted(STRATEGIES.keys())))
49 |         ) from exc
50 | 
51 |     return factory()
52 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/static/passwd.txt:
--------------------------------------------------------------------------------
 1 | root:x:0:0:root:/root:/bin/bash
 2 | daemon:x:105:1:daemon:/usr/sbin:/usr/sbin/nologin
 3 | bin:x:2:2:bin:/bin:/usr/sbin/nologin
 4 | sys:x:3:3:sys:/dev:/usr/sbin/nologin
 5 | sync:x:4:65534:sync:/bin:/bin/sync
 6 | games:x:5:60:games:/usr/games:/usr/sbin/nologin
 7 | man:x:6:12:man:/var/cache/man:/usr/sbin/nologin
 8 | lp:x:7:7:lp:/var/spool/lpd:/usr/sbin/nologin
 9 | mail:x:0:8:mail:/var/mail:/usr/sbin/nologin
10 | news:x:9:9:news:/var/spool/news:/usr/sbin/nologin
11 | uucp:x:10:10:uucp:/var/spool/uucp:/usr/sbin/nologin
12 | proxy:x:13:13:proxy:/bin:/usr/sbin/nologin
13 | www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin
14 | backup:x:33:34:backup:/var/backups:/usr/sbin/nologin
15 | list:x:38:38:Mailing List Manager:/var/list:/usr/sbin/nologin
16 | irc:x:39:39:ircd:/var/run/ircd:/usr/sbin/nologin
17 | gnats:x:41:41:Gnats Bug-Reporting System (admin):/var/lib/gnats:/usr/sbin/nologin
18 | nobody:x:65534:65534:nobody:/nonexistent:/usr/sbin/nologin
19 | systemd-timesync:x:33:103:systemd Time Synchronization,,,:/run/systemd:/bin/false
20 | systemd-network:x:101:104:systemd Network Management,,,:/run/systemd/netif:/bin/false
21 | systemd-resolve:x:102:105:systemd Resolver,,,:/run/systemd/resolve:/bin/false
22 | systemd-bus-proxy:x:103:106:systemd Bus Proxy,,,:/run/systemd:/bin/false
23 | sshd:x:104:65534::/var/run/sshd:/usr/sbin/nologin
24 | ntp:x:105:110::/home/ntp:/bin/false
25 | postfix:x:105:112::/var/spool/postfix:/bin/false
26 | messagebus:x:107:114::/var/run/dbus:/bin/false
27 | debian-security-support:x:108:115:Debian security support check,,,:/var/lib/debian-security-support:/bin/false
28 | snmp:x:109:116::/var/lib/snmp:/usr/sbin/nologin
29 | postgres:x:105:117:PostgreSQL administrator,,,:/var/lib/postgresql:/bin/bash
30 | redis:x:111:118::/var/lib/redis:/bin/false


--------------------------------------------------------------------------------
/bonobo/nodes/io/base.py:
--------------------------------------------------------------------------------
 1 | from bonobo.config import Configurable, ContextProcessor, Option, Service
 2 | 
 3 | 
 4 | def filesystem_path(path: str):
 5 |     if path.startswith("/"):
 6 |         raise ValueError(
 7 |             "File path should not be absolute. If you really need to provide absolute paths, then you must pass a "
 8 |             "filesystem instance that is bound to your filesystem root and provide a relative path from there."
 9 |         )
10 |     return str(path)
11 | 
12 | 
13 | class FileHandler(Configurable):
14 |     """Abstract component factory for file-related components.
15 | 
16 |     Args:
17 |         fs (str): service name to use for filesystem.
18 |         path (str): which path to use within the provided filesystem.
19 |         eol (str): which character to use to separate lines.
20 |         mode (str): which mode to use when opening the file.
21 |         encoding (str): which encoding to use when opening the file.
22 |     """
23 | 
24 |     path = Option(
25 |         filesystem_path, required=True, positional=True, __doc__="Path to use within the provided filesystem."
26 |     )  # type: str
27 |     eol = Option(str, default="\n", __doc__="Character to use as line separator.")  # type: str
28 |     mode = Option(str, __doc__="What mode to use for open() call.")  # type: str
29 |     encoding = Option(str, default="utf-8", __doc__="Encoding.")  # type: str
30 |     fs = Service("fs", __doc__="The filesystem instance to use.")  # type: str
31 | 
32 |     @ContextProcessor
33 |     def file(self, context, *, fs):
34 |         with self.open(fs) as file:
35 |             yield file
36 | 
37 |     def open(self, fs):
38 |         return fs.open(self.path, self.mode, encoding=self.encoding)
39 | 
40 | 
41 | class Reader:
42 |     pass
43 | 
44 | 
45 | class Writer:
46 |     pass
47 | 


--------------------------------------------------------------------------------
/bonobo/__init__.py:
--------------------------------------------------------------------------------
 1 | # Bonobo data-processing toolkit.
 2 | #
 3 | # Bonobo is a line-by-line data-processing toolkit for python 3.5+ emphasizing simplicity and atomicity of data
 4 | # transformations using a simple directed graph of python callables.
 5 | #
 6 | # Licensed under Apache License 2.0, read the LICENSE file in the root of the source tree.
 7 | 
 8 | import sys
 9 | from pathlib import Path
10 | 
11 | from bonobo._api import *
12 | from bonobo._api import (
13 |     CsvReader, CsvWriter, FileReader, FileWriter, Filter, FixedWindow, Format, Graph, JsonReader, JsonWriter,
14 |     LdjsonReader, LdjsonWriter, Limit, MapFields, OrderFields, PickleReader, PickleWriter, PrettyPrinter, RateLimited,
15 |     Rename, SetFields, Tee, UnpackItems, __all__, __doc__, count, create_reader, create_strategy, create_writer,
16 |     get_argument_parser, get_examples_path, identity, inspect, noop, open_examples_fs, open_fs, parse_args, run
17 | )
18 | from bonobo._version import __version__
19 | 
20 | if sys.version_info < (3, 5):
21 |     raise RuntimeError("Python 3.5+ is required to use Bonobo.")
22 | 
23 | 
24 | __all__ = ["__version__"] + __all__
25 | with (Path(__file__).parent / "bonobo.svg").open() as f:
26 |     __logo__ = f.read()
27 | __doc__ = __doc__  # lgtm [py/redundant-assignment]
28 | __version__ = __version__  # lgtm [py/redundant-assignment]
29 | 
30 | 
31 | def _repr_html_():
32 |     """This allows to easily display a version snippet in Jupyter."""
33 |     from bonobo.commands.version import get_versions
34 | 
35 |     return (
36 |         '<div style="padding: 8px;">'
37 |         '  <div style="float: left; width: 20px; height: 20px;">{}</div>'
38 |         '  <pre style="white-space: nowrap; padding-left: 8px">{}</pre>'
39 |         "</div>"
40 |     ).format(__logo__, "<br/>".join(get_versions(all=True)))
41 | 
42 | 
43 | del sys, Path, f
44 | 


--------------------------------------------------------------------------------
/bonobo/commands/version.py:
--------------------------------------------------------------------------------
 1 | from mondrian import humanizer
 2 | 
 3 | from bonobo.commands import BaseCommand
 4 | 
 5 | 
 6 | def get_versions(*, all=False, quiet=None):
 7 |     import bonobo
 8 |     from bonobo.util.pkgs import bonobo_packages
 9 | 
10 |     yield _format_version(bonobo, quiet=quiet)
11 | 
12 |     if all:
13 |         for name in sorted(bonobo_packages):
14 |             if name != "bonobo":
15 |                 try:
16 |                     mod = __import__(name.replace("-", "_"))
17 |                     try:
18 |                         yield _format_version(mod, name=name, quiet=quiet)
19 |                     except Exception as exc:
20 |                         yield "{} ({})".format(name, exc)
21 |                 except ImportError as exc:
22 |                     yield "{} is not importable ({}).".format(name, exc)
23 | 
24 | 
25 | class VersionCommand(BaseCommand):
26 |     @humanizer.humanize()
27 |     def handle(self, *, all=False, quiet=False):
28 |         for line in get_versions(all=all, quiet=quiet):
29 |             print(line)
30 | 
31 |     def add_arguments(self, parser):
32 |         parser.add_argument("--all", "-a", action="store_true")
33 |         parser.add_argument("--quiet", "-q", action="count")
34 | 
35 | 
36 | def _format_version(mod, *, name=None, quiet=False):
37 |     from bonobo.util.pkgs import bonobo_packages
38 | 
39 |     args = {
40 |         "name": name or mod.__name__,
41 |         "version": mod.__version__,
42 |         "location": bonobo_packages[name or mod.__name__].location,
43 |     }
44 | 
45 |     if not quiet:
46 |         return "{name} v.{version} (in {location})".format(**args)
47 |     if quiet < 2:
48 |         return "{name} {version}".format(**args)
49 |     if quiet < 3:
50 |         return "{version}".format(**args)
51 | 
52 |     raise RuntimeError("Hard to be so quiet...")
53 | 


--------------------------------------------------------------------------------
/tests/config/test_processors.py:
--------------------------------------------------------------------------------
 1 | from operator import attrgetter
 2 | 
 3 | from bonobo.config import Configurable
 4 | from bonobo.config.processors import ContextCurrifier, ContextProcessor, resolve_processors, use_context_processor
 5 | 
 6 | 
 7 | class CP1(Configurable):
 8 |     @ContextProcessor
 9 |     def c(self):
10 |         yield
11 | 
12 |     @ContextProcessor
13 |     def a(self):
14 |         yield "this is A"
15 | 
16 |     @ContextProcessor
17 |     def b(self, a):
18 |         yield a.upper()[:-1] + "b"
19 | 
20 |     def __call__(self, a, b):
21 |         return a, b
22 | 
23 | 
24 | class CP2(CP1):
25 |     @ContextProcessor
26 |     def f(self):
27 |         pass
28 | 
29 |     @ContextProcessor
30 |     def e(self):
31 |         pass
32 | 
33 |     @ContextProcessor
34 |     def d(self):
35 |         pass
36 | 
37 | 
38 | class CP3(CP2):
39 |     @ContextProcessor
40 |     def c(self):
41 |         pass
42 | 
43 |     @ContextProcessor
44 |     def b(self):
45 |         pass
46 | 
47 | 
48 | def get_all_processors_names(cls):
49 |     return list(map(attrgetter("__name__"), resolve_processors(cls)))
50 | 
51 | 
52 | def test_inheritance_and_ordering():
53 |     assert get_all_processors_names(CP1) == ["c", "a", "b"]
54 |     assert get_all_processors_names(CP2) == ["c", "a", "b", "f", "e", "d"]
55 |     assert get_all_processors_names(CP3) == ["c", "a", "b", "f", "e", "d", "c", "b"]
56 | 
57 | 
58 | def test_setup_teardown():
59 |     o = CP1()
60 |     stack = ContextCurrifier(o)
61 |     stack.setup()
62 |     assert o(*stack.args) == ("this is A", "THIS IS b")
63 |     stack.teardown()
64 | 
65 | 
66 | def test_processors_on_func():
67 |     def cp(context):
68 |         yield context
69 | 
70 |     @use_context_processor(cp)
71 |     def node(context):
72 |         pass
73 | 
74 |     assert get_all_processors_names(node) == ["cp"]
75 | 


--------------------------------------------------------------------------------
/docs/tutorial/index.rst:
--------------------------------------------------------------------------------
 1 | First steps
 2 | ===========
 3 | 
 4 | Bonobo is an ETL (Extract-Transform-Load) framework for python 3.5. The goal is to define data-transformations, with
 5 | python code in charge of handling similar shaped independent lines of data.
 6 | 
 7 | Bonobo *is not* a statistical or data-science tool. If you're looking for a data-analysis tool in python, use Pandas.
 8 | 
 9 | Bonobo is a lean manufacturing assembly line for data that let you focus on the actual work instead of the plumbery
10 | (execution contexts, parallelism, error handling, console output, logging, ...).
11 | 
12 | Bonobo uses simple python and should be quick and easy to learn.
13 | 
14 | **Tutorials**
15 | 
16 | .. toctree::
17 |     :maxdepth: 1
18 | 
19 |     1-init
20 |     2-jobs
21 |     3-files
22 |     4-services
23 |     5-packaging
24 | 
25 | **What's next?**
26 | 
27 | Once you're familiar with all the base concepts, you can...
28 | 
29 | * Read the :doc:`Guides </guide/index>` to have a deep dive in each concept.
30 | * Explore the :doc:`Extensions </extension/index>` to widen the possibilities:
31 | 
32 |   * :doc:`/extension/django`
33 |   * :doc:`/extension/docker`
34 |   * :doc:`/extension/jupyter`
35 |   * :doc:`/extension/sqlalchemy`
36 | 
37 | * Open the :doc:`References </reference/index>` and start hacking like crazy.
38 | 
39 | **You're not alone!**
40 | 
41 | Good documentation is not easy to write.
42 | 
43 | Although all content here should be accurate, you may feel a lack of completeness, for which we plead guilty and
44 | apologize.
45 | 
46 | If you're stuck, please come to the `Bonobo Slack Channel <https://bonobo-slack.herokuapp.com/>`_ and we'll figure it
47 | out.
48 | 
49 | If you're not stuck but had trouble understanding something, please consider contributing to the docs (using GitHub
50 | pull requests).
51 | 
52 | .. include:: _wip_note.rst
53 | 


--------------------------------------------------------------------------------
/bonobo/errors.py:
--------------------------------------------------------------------------------
 1 | from bonobo.util import get_name
 2 | 
 3 | 
 4 | class InactiveIOError(IOError):
 5 |     pass
 6 | 
 7 | 
 8 | class InactiveReadableError(InactiveIOError):
 9 |     pass
10 | 
11 | 
12 | class InactiveWritableError(InactiveIOError):
13 |     pass
14 | 
15 | 
16 | class ValidationError(RuntimeError):
17 |     def __init__(self, inst, message):
18 |         super(ValidationError, self).__init__(
19 |             "Validation error in {class_name}: {message}".format(class_name=type(inst).__name__, message=message)
20 |         )
21 | 
22 | 
23 | class ProhibitedOperationError(RuntimeError):
24 |     pass
25 | 
26 | 
27 | class ConfigurationError(Exception):
28 |     pass
29 | 
30 | 
31 | class UnrecoverableError(Exception):
32 |     """Flag for errors that must interrupt the workflow, either because they will happen for sure on each node run, or
33 |     because you know that your transformation has no point continuing running after a bad event."""
34 | 
35 | 
36 | class AbstractError(UnrecoverableError, NotImplementedError):
37 |     """Abstract error is a convenient error to declare a method as "being left as an exercise for the reader"."""
38 | 
39 |     def __init__(self, method):
40 |         super().__init__(
41 |             "Call to abstract method {class_name}.{method_name}(...): missing implementation.".format(
42 |                 class_name=get_name(method.__self__), method_name=get_name(method)
43 |             )
44 |         )
45 | 
46 | 
47 | class UnrecoverableTypeError(UnrecoverableError, TypeError):
48 |     pass
49 | 
50 | 
51 | class UnrecoverableAttributeError(UnrecoverableError, AttributeError):
52 |     pass
53 | 
54 | 
55 | class UnrecoverableValueError(UnrecoverableError, ValueError):
56 |     pass
57 | 
58 | 
59 | class UnrecoverableRuntimeError(UnrecoverableError, RuntimeError):
60 |     pass
61 | 
62 | 
63 | class UnrecoverableNotImplementedError(UnrecoverableError, NotImplementedError):
64 |     pass
65 | 
66 | 
67 | class MissingServiceImplementationError(UnrecoverableError, KeyError):
68 |     pass
69 | 


--------------------------------------------------------------------------------
/tests/config/test_methods_partial.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock
 2 | 
 3 | from bonobo.config import Configurable, ContextProcessor, Method, Option
 4 | from bonobo.util.inspect import inspect_node
 5 | 
 6 | 
 7 | class Bobby(Configurable):
 8 |     handler = Method()
 9 |     handler2 = Method()
10 |     foo = Option(positional=True)
11 |     bar = Option(required=False)
12 | 
13 |     @ContextProcessor
14 |     def think(self, context):
15 |         yield "different"
16 | 
17 |     def __call__(self, think, *args, **kwargs):
18 |         self.handler("1", *args, **kwargs)
19 |         self.handler2("2", *args, **kwargs)
20 | 
21 | 
22 | def test_partial():
23 |     C = Bobby
24 | 
25 |     # inspect the configurable class
26 |     with inspect_node(C) as ci:
27 |         assert ci.type == Bobby
28 |         assert not ci.instance
29 |         assert len(ci.options) == 4
30 |         assert len(ci.processors) == 1
31 |         assert not ci.partial
32 | 
33 |     # instanciate a partial instance ...
34 |     f1 = MagicMock()
35 |     C = C(f1)
36 | 
37 |     with inspect_node(C) as ci:
38 |         assert ci.type == Bobby
39 |         assert not ci.instance
40 |         assert len(ci.options) == 4
41 |         assert len(ci.processors) == 1
42 |         assert ci.partial
43 |         assert ci.partial[0] == (f1,)
44 |         assert not len(ci.partial[1])
45 | 
46 |     # instanciate a more complete partial instance ...
47 |     f2 = MagicMock()
48 |     C = C(f2)
49 | 
50 |     with inspect_node(C) as ci:
51 |         assert ci.type == Bobby
52 |         assert not ci.instance
53 |         assert len(ci.options) == 4
54 |         assert len(ci.processors) == 1
55 |         assert ci.partial
56 |         assert ci.partial[0] == (f1, f2)
57 |         assert not len(ci.partial[1])
58 | 
59 |     c = C("foo")
60 | 
61 |     with inspect_node(c) as ci:
62 |         assert ci.type == Bobby
63 |         assert ci.instance
64 |         assert len(ci.options) == 4
65 |         assert len(ci.processors) == 1
66 |         assert not ci.partial
67 | 


--------------------------------------------------------------------------------
/tests/test_execution.py:
--------------------------------------------------------------------------------
 1 | from bonobo.config.processors import use_context_processor
 2 | from bonobo.constants import BEGIN, END
 3 | from bonobo.execution.contexts.graph import GraphExecutionContext
 4 | from bonobo.execution.strategies import NaiveStrategy
 5 | from bonobo.structs.graphs import Graph
 6 | 
 7 | 
 8 | def generate_integers():
 9 |     yield from range(10)
10 | 
11 | 
12 | def square(i):
13 |     return i ** 2
14 | 
15 | 
16 | def results(f, context):
17 |     results = yield list()
18 |     context.parent.results = results
19 | 
20 | 
21 | @use_context_processor(results)
22 | def push_result(results, i):
23 |     results.append(i)
24 | 
25 | 
26 | chain = (generate_integers, square, push_result)
27 | 
28 | 
29 | def test_empty_execution_context():
30 |     graph = Graph()
31 | 
32 |     ctx = GraphExecutionContext(graph)
33 |     assert not len(ctx.nodes)
34 |     assert not len(ctx.plugins)
35 | 
36 |     assert not ctx.alive
37 | 
38 | 
39 | def test_execution():
40 |     graph = Graph()
41 |     graph.add_chain(*chain)
42 | 
43 |     strategy = NaiveStrategy()
44 |     ctx = strategy.execute(graph)
45 | 
46 |     assert ctx.results == [1, 4, 9, 16, 25, 36, 49, 64, 81]
47 | 
48 | 
49 | def test_simple_execution_context():
50 |     graph = Graph()
51 |     graph.add_chain(*chain)
52 | 
53 |     context = GraphExecutionContext(graph)
54 |     assert len(context.nodes) == len(chain)
55 |     assert not len(context.plugins)
56 | 
57 |     for i, node in enumerate(chain):
58 |         assert context[i].wrapped is node
59 | 
60 |     assert not context.alive
61 |     assert not context.started
62 |     assert not context.stopped
63 | 
64 |     context.write(BEGIN, (), END)
65 | 
66 |     assert not context.alive
67 |     assert not context.started
68 |     assert not context.stopped
69 | 
70 |     context.start()
71 | 
72 |     assert context.alive
73 |     assert context.started
74 |     assert not context.stopped
75 | 
76 |     context.stop()
77 | 
78 |     assert not context.alive
79 |     assert context.started
80 |     assert context.stopped
81 | 


--------------------------------------------------------------------------------
/bonobo/examples/datasets/__main__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import bonobo
 4 | from bonobo import examples
 5 | from bonobo.examples import get_datasets_dir, get_minor_version, get_services
 6 | from bonobo.examples.datasets.coffeeshops import get_graph as get_coffeeshops_graph
 7 | from bonobo.examples.datasets.fablabs import get_graph as get_fablabs_graph
 8 | 
 9 | graph_factories = {"coffeeshops": get_coffeeshops_graph, "fablabs": get_fablabs_graph}
10 | 
11 | if __name__ == "__main__":
12 |     parser = examples.get_argument_parser()
13 |     parser.add_argument("--target", "-t", choices=graph_factories.keys(), nargs="+")
14 |     parser.add_argument("--sync", action="store_true", default=False)
15 | 
16 |     with bonobo.parse_args(parser) as options:
17 |         graph_options = examples.get_graph_options(options)
18 |         graph_names = list(options["target"] if options["target"] else sorted(graph_factories.keys()))
19 | 
20 |         # Create a graph with all requested subgraphs
21 |         graph = bonobo.Graph()
22 |         for name in graph_names:
23 |             graph = graph_factories[name](graph, **graph_options)
24 | 
25 |         bonobo.run(graph, services=get_services())
26 | 
27 |         if options["sync"]:
28 |             # TODO: when parallel option for node will be implemented, need to be rewriten to use a graph.
29 |             import boto3
30 | 
31 |             s3 = boto3.client("s3")
32 | 
33 |             local_dir = get_datasets_dir()
34 |             for root, dirs, files in os.walk(local_dir):
35 |                 for filename in files:
36 |                     local_path = os.path.join(root, filename)
37 |                     relative_path = os.path.relpath(local_path, local_dir)
38 |                     s3_path = os.path.join(get_minor_version(), relative_path)
39 | 
40 |                     try:
41 |                         s3.head_object(Bucket="bonobo-examples", Key=s3_path)
42 |                     except Exception:
43 |                         s3.upload_file(local_path, "bonobo-examples", s3_path, ExtraArgs={"ACL": "public-read"})
44 | 


--------------------------------------------------------------------------------
/docs/reference/examples.rst:
--------------------------------------------------------------------------------
 1 | Examples
 2 | ========
 3 | 
 4 | There are a few examples bundled with **bonobo**.
 5 | 
 6 | You'll find them under the :mod:`bonobo.examples` package, and you can run them directly as modules:
 7 | 
 8 | .. code-block:: shell-session
 9 | 
10 |     $ bonobo run -m bonobo.examples.module
11 | 
12 | 
13 | or
14 | 
15 | .. code-block:: shell-session
16 | 
17 |     $ python -m bonobo.examples.module
18 | 
19 | 
20 | 
21 | .. toctree::
22 |     :maxdepth: 4
23 | 
24 |     examples/tutorials
25 | 
26 | 
27 | Datasets
28 | ::::::::
29 | 
30 | 
31 | .. module:: bonobo.examples.datasets
32 | 
33 | The :mod:`bonobo.examples.datasets` package contains examples that generates datasets locally for other examples to
34 | use. As of today, we commit the content of those datasets to git, even if that may be a bad idea, so all the examples
35 | are easily runnable. Later, we'll see if we favor a "missing dependency exception" approach.
36 | 
37 | 
38 | Coffeeshops
39 | -----------
40 | 
41 | .. automodule:: bonobo.examples.datasets.coffeeshops
42 |     :members:
43 |     :undoc-members:
44 |     :show-inheritance:
45 | 
46 | Fablabs
47 | -------
48 | 
49 | .. automodule:: bonobo.examples.datasets.fablabs
50 |     :members:
51 |     :undoc-members:
52 |     :show-inheritance:
53 | 
54 | Types
55 | :::::
56 | 
57 | Strings
58 | -------
59 | 
60 | .. automodule:: bonobo.examples.types.strings
61 |     :members: graph, extract, transform, load
62 |     :undoc-members:
63 |     :show-inheritance:
64 | 
65 | 
66 | Dicts
67 | -----
68 | 
69 | .. automodule:: bonobo.examples.types.dicts
70 |     :members: graph, extract, transform, load
71 |     :undoc-members:
72 |     :show-inheritance:
73 | 
74 | 
75 | Bags
76 | ----
77 | 
78 | .. automodule:: bonobo.examples.types.bags
79 |     :members: graph, extract, transform, load
80 |     :undoc-members:
81 |     :show-inheritance:
82 | 
83 | 
84 | Utils
85 | :::::
86 | 
87 | Count
88 | -----
89 | 
90 | .. automodule:: bonobo.examples.nodes.count
91 |     :members:
92 |     :undoc-members:
93 |     :show-inheritance:
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/tests/execution/contexts/test_execution_contexts_graph.py:
--------------------------------------------------------------------------------
 1 | from bonobo import Graph
 2 | from bonobo.constants import BEGIN, EMPTY, END
 3 | from bonobo.execution.contexts import GraphExecutionContext
 4 | 
 5 | 
 6 | def raise_an_error(*args, **kwargs):
 7 |     raise Exception("Careful, man, there's a beverage here!")
 8 | 
 9 | 
10 | def raise_an_unrecoverrable_error(*args, **kwargs):
11 |     raise Exception("You are entering a world of pain!")
12 | 
13 | 
14 | def test_lifecycle_of_empty_graph():
15 |     graph = Graph()
16 |     with GraphExecutionContext(graph) as context:
17 |         assert context.started
18 |         assert context.alive
19 |         assert not context.stopped
20 |     assert context.started
21 |     assert not context.alive
22 |     assert context.stopped
23 |     assert not context.xstatus
24 | 
25 | 
26 | def test_lifecycle_of_nonempty_graph():
27 |     graph = Graph([1, 2, 3], print)
28 |     with GraphExecutionContext(graph) as context:
29 |         assert context.started
30 |         assert context.alive
31 |         assert not context.stopped
32 |     assert context.started
33 |     assert not context.alive
34 |     assert context.stopped
35 |     assert not context.xstatus
36 | 
37 | 
38 | def test_lifecycle_of_graph_with_recoverable_error():
39 |     graph = Graph([1, 2, 3], raise_an_error, print)
40 |     with GraphExecutionContext(graph) as context:
41 |         assert context.started
42 |         assert context.alive
43 |         assert not context.stopped
44 |     assert context.started
45 |     assert not context.alive
46 |     assert context.stopped
47 |     assert not context.xstatus
48 | 
49 | 
50 | def test_lifecycle_of_graph_with_unrecoverable_error():
51 |     graph = Graph([1, 2, 3], raise_an_unrecoverrable_error, print)
52 |     with GraphExecutionContext(graph) as context:
53 |         assert context.started and context.alive and not context.stopped
54 |         context.write(BEGIN, EMPTY, END)
55 |         context.loop()
56 |     assert context.started
57 |     assert not context.alive
58 |     assert context.stopped
59 |     assert not context.xstatus
60 | 


--------------------------------------------------------------------------------
/tests/structs/test_inputs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | #
 3 | # Copyright 2012-2014 Romain Dorgueil
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from queue import Empty
18 | 
19 | import pytest
20 | 
21 | from bonobo.constants import BEGIN, END
22 | from bonobo.errors import InactiveReadableError, InactiveWritableError
23 | from bonobo.structs.inputs import Input
24 | 
25 | 
26 | def test_input_runlevels():
27 |     q = Input()
28 | 
29 |     # Before BEGIN, noone should be able to write in an Input queue.
30 |     assert not q.alive
31 |     with pytest.raises(InactiveWritableError):
32 |         q.put("hello, unborn queue.")
33 | 
34 |     # Begin
35 |     q.put(BEGIN)
36 |     assert q.alive and q._runlevel == 1
37 |     q.put("foo")
38 | 
39 |     # Second Begin
40 |     q.put(BEGIN)
41 |     assert q.alive and q._runlevel == 2
42 |     q.put("bar")
43 |     q.put(END)
44 | 
45 |     # FIFO
46 |     assert q.get() == "foo"
47 |     assert q.get() == "bar"
48 | 
49 |     # self.assertEqual(q.alive, False) XXX queue don't know it's dead yet, but it is ...
50 |     # Async get raises Empty (End is not returned)
51 |     with pytest.raises(Empty):
52 |         q.get(block=False)
53 |     assert q.alive
54 | 
55 |     # Before killing, let's slide some data in.
56 |     q.put("baz")
57 | 
58 |     # Now kill the queue...
59 |     q.put(END)
60 |     with pytest.raises(InactiveWritableError):
61 |         q.put("foo")
62 | 
63 |     # Still can get remaining data
64 |     assert q.get() == "baz"
65 |     with pytest.raises(InactiveReadableError):
66 |         q.get()
67 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/about.html:
--------------------------------------------------------------------------------
 1 | {% if theme_logo %}
 2 | <p class="logo">
 3 |   <a href="{{ pathto(master_doc) }}">
 4 |     <img class="logo" src="{{ pathto('_static/' ~ theme_logo, 1) }}" alt="Logo"/>
 5 |     {% if theme_logo_name|lower == 'true' %}
 6 |     <h1 class="logo logo-name">{{ project }}</h1>
 7 |     {% endif %}
 8 |   </a>
 9 | </p>
10 | {% else %}
11 | <h1 class="logo"><a href="{{ pathto(master_doc) }}">{{ project }}</a></h1>
12 | {% endif %}
13 | 
14 | {% if theme_description %}
15 | <p class="blurb">{{ theme_description }}</p>
16 | {% endif %}
17 | 
18 | {% if theme_github_user and theme_github_repo %}
19 | {% if theme_github_button|lower == 'true' %}
20 | <p>
21 | <iframe src="https://ghbtns.com/github-btn.html?user={{ theme_github_user }}&repo={{ theme_github_repo }}&type={{ theme_github_type }}&count={{ theme_github_count }}&size=large&v=2"
22 |   allowtransparency="true" frameborder="0" scrolling="0" width="200px" height="35px"></iframe>
23 | </p>
24 | {% endif %}
25 | {% endif %}
26 | 
27 | {% if theme_travis_button|lower != 'false' %}
28 | {% if theme_travis_button|lower == 'true' %}
29 |     {% set path = theme_github_user + '/' + theme_github_repo %}
30 | {% else %}
31 |     {% set path = theme_travis_button %}
32 | {% endif %}
33 | <p>
34 | <a href="https://travis-ci.org/{{ path }}">
35 |     <img
36 |         alt="https://secure.travis-ci.org/{{ path }}.svg?branch={{ theme_badge_branch }}"
37 |         src="https://secure.travis-ci.org/{{ path }}.svg?branch={{ theme_badge_branch }}"
38 |     />
39 | </a>
40 | </p>
41 | {% endif %}
42 | 
43 | {% if theme_codecov_button|lower != 'false' %}
44 | {% if theme_codecov_button|lower == 'true' %}
45 |     {% set path = theme_github_user + '/' + theme_github_repo %}
46 | {% else %}
47 |     {% set path = theme_codecov_button %}
48 | {% endif %}
49 | <p>
50 | <a href="https://codecov.io/github/{{ path }}">
51 |     <img
52 |     alt="https://codecov.io/github/{{ path }}/coverage.svg?branch={{ theme_badge_branch }}"
53 |     src="https://codecov.io/github/{{ path }}/coverage.svg?branch={{ theme_badge_branch }}"
54 |     />
55 | </a>
56 | </p>
57 | {% endif %}
58 | 


--------------------------------------------------------------------------------
/bonobo/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | 
 4 | import mondrian
 5 | 
 6 | from bonobo import settings
 7 | from bonobo.commands.base import BaseCommand, BaseGraphCommand
 8 | 
 9 | 
10 | def entrypoint(args=None):
11 |     """
12 |     Main callable for "bonobo" entrypoint.
13 | 
14 |     Will load commands from "bonobo.commands" entrypoints, using stevedore.
15 | 
16 |     """
17 | 
18 |     mondrian.setup(excepthook=True)
19 |     logger = logging.getLogger()
20 |     logger.setLevel(settings.LOGGING_LEVEL.get())
21 | 
22 |     parser = argparse.ArgumentParser()
23 |     parser.add_argument("--debug", "-D", action="store_true")
24 | 
25 |     subparsers = parser.add_subparsers(dest="command")
26 |     subparsers.required = True
27 | 
28 |     commands = {}
29 | 
30 |     def register_extension(ext):
31 |         nonlocal commands
32 | 
33 |         try:
34 |             parser = subparsers.add_parser(ext.name)
35 |             if isinstance(ext.plugin, type) and issubclass(ext.plugin, BaseCommand):
36 |                 # current way, class based.
37 |                 cmd = ext.plugin()
38 |                 cmd.add_arguments(parser)
39 |                 cmd.__name__ = ext.name
40 |                 commands[ext.name] = cmd.handle
41 |             else:
42 |                 # old school, function based.
43 |                 commands[ext.name] = ext.plugin(parser)
44 |         except Exception:
45 |             logger.exception("Error while loading command {}.".format(ext.name))
46 | 
47 |     from stevedore import ExtensionManager
48 | 
49 |     mgr = ExtensionManager(namespace="bonobo.commands")
50 |     mgr.map(register_extension)
51 | 
52 |     parsed_args = parser.parse_args(args).__dict__
53 | 
54 |     if parsed_args.pop("debug", False):
55 |         settings.DEBUG.set(True)
56 |         settings.LOGGING_LEVEL.set(logging.DEBUG)
57 |         logger.setLevel(settings.LOGGING_LEVEL.get())
58 | 
59 |     logger.debug("Command: " + parsed_args["command"] + " Arguments: " + repr(parsed_args))
60 | 
61 |     # Get command handler, execute, rince.
62 |     command = commands[parsed_args.pop("command")]
63 |     command(**parsed_args)
64 | 
65 |     return 0
66 | 


--------------------------------------------------------------------------------
/docs/_templates/base.html:
--------------------------------------------------------------------------------
 1 | {%- extends "alabaster/layout.html" %}
 2 | 
 3 | 
 4 | {%- block extrahead %}
 5 | {{ super() }}
 6 | <style>
 7 | </style>
 8 | <link href="https://fonts.googleapis.com/css?family=Ubuntu" rel="stylesheet">
 9 | {% endblock %}
10 | 
11 | {%- block footer %}
12 | {{ relbar() }}
13 | 
14 | <div class="footer">
15 |     &copy; 2012-2018, <a href="https://romain.dorgueil.net" target="_blank">Romain Dorgueil</a> |
16 |     <a href="https://www.bonobo-project.org/" target="_blank">Bonobo ETL</a>
17 |   
18 |     {%- if show_source and has_source and sourcename %}		
19 |       | <a href="{{ pathto('_sources/' + sourcename, true)|e }}" rel="nofollow" target="_blank">{{ _('Page source') }}</a>		
20 |     {%- endif %}
21 | </div>
22 | 
23 | <a href="https://github.com/python-bonobo/bonobo" class="github">
24 |     <img style="position: absolute; top: 0; right: 0; border: 0;"
25 |          src="{{ pathto('_static/' ~ theme_github_banner, 1) if theme_github_banner|lower != 'true' else 'https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png' }}"
26 |          alt="Fork me on GitHub" class="github"/>
27 | </a>
28 | 
29 | {% if theme_analytics_id %}
30 | <script type="text/javascript">
31 |     var _gaq = _gaq || [];
32 |     _gaq.push(['_setAccount', '{{ theme_analytics_id }}']);
33 |     _gaq.push(['_setDomainName', 'none']);
34 |     _gaq.push(['_setAllowLinker', true]);
35 |     _gaq.push(['_trackPageview']);
36 |     (function () {
37 |         var ga = document.createElement('script');
38 |         ga.type = 'text/javascript';
39 |         ga.async = true;
40 |         ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
41 |         var s = document.getElementsByTagName('script')[0];
42 |         s.parentNode.insertBefore(ga, s);
43 |     })();
44 | </script>
45 | {% endif %}
46 | <script async src="https://www.googletagmanager.com/gtag/js?id=UA-4678258-14"></script>
47 | <script>
48 |   window.dataLayer = window.dataLayer || [];
49 |   function gtag(){dataLayer.push(arguments);}
50 |   gtag('js', new Date());
51 | 
52 |   gtag('config', 'UA-4678258-14');
53 | </script>
54 | {%- endblock %}
55 | 


--------------------------------------------------------------------------------
/bonobo/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import bonobo
 4 | from bonobo.execution.strategies import DEFAULT_STRATEGY, STRATEGIES
 5 | from bonobo.util.statistics import Timer
 6 | 
 7 | 
 8 | def get_argument_parser(parser=None):
 9 |     parser = bonobo.get_argument_parser(parser=parser)
10 | 
11 |     parser.add_argument("--limit", "-l", type=int, default=None, help="If set, limits the number of processed lines.")
12 |     parser.add_argument(
13 |         "--print", "-p", action="store_true", default=False, help="If set, pretty prints before writing to output file."
14 |     )
15 | 
16 |     parser.add_argument("--strategy", "-s", type=str, choices=STRATEGIES.keys(), default=DEFAULT_STRATEGY)
17 | 
18 |     return parser
19 | 
20 | 
21 | def get_graph_options(options):
22 |     _limit = options.pop("limit", None)
23 |     _print = options.pop("print", False)
24 | 
25 |     return {"_limit": (bonobo.Limit(_limit),) if _limit else (), "_print": (bonobo.PrettyPrinter(),) if _print else ()}
26 | 
27 | 
28 | def run(get_graph, get_services, *, parser=None):
29 |     parser = parser or get_argument_parser()
30 | 
31 |     with bonobo.parse_args(parser) as options:
32 |         with Timer() as timer:
33 |             print("Options:", " ".join("{}={}".format(k, v) for k, v in sorted(options.items())))
34 |             retval = bonobo.run(
35 |                 get_graph(**get_graph_options(options)), services=get_services(), strategy=options["strategy"]
36 |             )
37 |         print("Execution time:", timer)
38 |         print("Return value:", retval)
39 |         print("XStatus:", retval.xstatus)
40 |         return retval.xstatus
41 | 
42 | 
43 | def get_minor_version():
44 |     return ".".join(bonobo.__version__.split(".")[:2])
45 | 
46 | 
47 | def get_datasets_dir(*dirs):
48 |     home_dir = os.path.expanduser("~")
49 |     target_dir = os.path.join(home_dir, ".cache/bonobo", get_minor_version(), *dirs)
50 |     os.makedirs(target_dir, exist_ok=True)
51 |     return target_dir
52 | 
53 | 
54 | def get_services():
55 |     return {
56 |         "fs": bonobo.open_fs(get_datasets_dir("datasets")),
57 |         "fs.static": bonobo.open_examples_fs("datasets", "static"),
58 |     }
59 | 


--------------------------------------------------------------------------------
/bonobo/examples/files/pickle_handlers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This example shows how a different file system service can be injected
 3 | into a transformation (as compressing pickled objects often makes sense
 4 | anyways).  The pickle itself contains a list of lists as follows:
 5 | 
 6 | ```
 7 | [
 8 |     ['category', 'sms'],
 9 |     ['ham', 'Go until jurong point, crazy..'],
10 |     ['ham', 'Ok lar... Joking wif u oni...'],
11 |     ['spam', 'Free entry in 2 a wkly comp to win...'],
12 |     ['ham', 'U dun say so early hor... U c already then say...'],
13 |     ['ham', 'Nah I don't think he goes to usf, he lives around here though'],
14 |     ['spam', 'FreeMsg Hey there darling it's been 3 week's now...'],
15 |     ...
16 | ]
17 | ```
18 | 
19 | where the first column categorizes and sms as "ham" or "spam".  The second
20 | column contains the sms itself.
21 | 
22 | Data set taken from:
23 | https://www.kaggle.com/uciml/sms-spam-collection-dataset/downloads/sms-spam-collection-dataset.zip
24 | 
25 | The transformation (1) reads the pickled data, (2) marks and shortens
26 | messages categorized as spam, and (3) prints the output.
27 | 
28 | """
29 | 
30 | import sys
31 | 
32 | from fs.tarfs import TarFS
33 | 
34 | import bonobo
35 | from bonobo import examples
36 | 
37 | 
38 | def cleanse_sms(category, sms):
39 |     if category == "spam":
40 |         sms_clean = "**MARKED AS SPAM** " + sms[0:50] + ("..." if len(sms) > 50 else "")
41 |     elif category == "ham":
42 |         sms_clean = sms
43 |     else:
44 |         raise ValueError("Unknown category {!r}.".format(category))
45 | 
46 |     return category, sms, sms_clean
47 | 
48 | 
49 | def get_graph(*, _limit=(), _print=()):
50 |     graph = bonobo.Graph()
51 | 
52 |     graph.add_chain(
53 |         # spam.pkl is within the gzipped tarball
54 |         bonobo.PickleReader("spam.pkl"),
55 |         *_limit,
56 |         cleanse_sms,
57 |         *_print,
58 |     )
59 | 
60 |     return graph
61 | 
62 | 
63 | def get_services():
64 |     return {**examples.get_services(), "fs": TarFS(bonobo.get_examples_path("datasets", "static", "spam.tgz"))}
65 | 
66 | 
67 | if __name__ == "__main__":
68 |     sys.exit(examples.run(get_graph, get_services))
69 | 


--------------------------------------------------------------------------------
/tests/test_settings.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from os import environ
 3 | from unittest.mock import patch
 4 | 
 5 | import pytest
 6 | 
 7 | from bonobo import settings
 8 | from bonobo.errors import ValidationError
 9 | 
10 | TEST_SETTING = "TEST_SETTING"
11 | 
12 | 
13 | def test_to_bool():
14 |     assert not settings.to_bool("")
15 |     assert not settings.to_bool("FALSE")
16 |     assert not settings.to_bool("NO")
17 |     assert not settings.to_bool("0")
18 | 
19 |     assert settings.to_bool("yup")
20 |     assert settings.to_bool("True")
21 |     assert settings.to_bool("yes")
22 |     assert settings.to_bool("1")
23 | 
24 | 
25 | def test_setting():
26 |     s = settings.Setting(TEST_SETTING)
27 |     assert s.get() is None
28 | 
29 |     with patch.dict(environ, {TEST_SETTING: "hello"}):
30 |         assert s.get() is None
31 |         s.clear()
32 |         assert s.get() == "hello"
33 | 
34 |     s = settings.Setting(TEST_SETTING, default="nope")
35 |     assert s.get() is "nope"
36 | 
37 |     with patch.dict(environ, {TEST_SETTING: "hello"}):
38 |         assert s.get() == "nope"
39 |         s.clear()
40 |         assert s.get() == "hello"
41 | 
42 |     s = settings.Setting(TEST_SETTING, default=0, validator=lambda x: x == 42)
43 |     with pytest.raises(ValidationError):
44 |         assert s.get() is 0
45 | 
46 |     s.set(42)
47 | 
48 |     with pytest.raises(ValidationError):
49 |         s.set(21)
50 | 
51 | 
52 | def test_default_settings():
53 |     settings.clear_all()
54 | 
55 |     assert settings.DEBUG.get() is False
56 |     assert settings.PROFILE.get() is False
57 |     assert settings.QUIET.get() is False
58 |     assert settings.LOGGING_LEVEL.get() == logging._checkLevel("INFO")
59 | 
60 |     with patch.dict(environ, {"DEBUG": "t"}):
61 |         settings.clear_all()
62 |         assert settings.LOGGING_LEVEL.get() == logging._checkLevel("DEBUG")
63 | 
64 |     settings.clear_all()
65 | 
66 | 
67 | def test_check():
68 |     settings.check()
69 |     with patch.dict(environ, {"DEBUG": "t", "PROFILE": "t", "QUIET": "t"}):
70 |         settings.clear_all()
71 |         with pytest.raises(RuntimeError):
72 |             settings.check()
73 |     settings.clear_all()
74 | 


--------------------------------------------------------------------------------
/bonobo/nodes/io/pickle.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | 
 3 | from bonobo.config import Option, use_context
 4 | from bonobo.constants import NOT_MODIFIED
 5 | from bonobo.nodes.io.base import FileHandler
 6 | from bonobo.nodes.io.file import FileReader, FileWriter
 7 | 
 8 | 
 9 | class PickleHandler(FileHandler):
10 |     """
11 | 
12 |     .. attribute:: item_names
13 | 
14 |         The names of the items in the pickle, if it is not defined in the first item of the pickle.
15 | 
16 |     """
17 | 
18 |     fields = Option(tuple, required=False)
19 | 
20 | 
21 | @use_context
22 | class PickleReader(FileReader, PickleHandler):
23 |     """
24 |     Reads a Python pickle object and yields the items in dicts.
25 |     """
26 | 
27 |     mode = Option(str, default="rb")
28 | 
29 |     def read(self, file, context, *, fs):
30 |         data = pickle.load(file)
31 | 
32 |         # if the data is not iterable, then wrap the object in a list so it may be iterated
33 |         if isinstance(data, dict):
34 |             is_dict = True
35 |             iterator = iter(data.items())
36 |         else:
37 |             is_dict = False
38 |             try:
39 |                 iterator = iter(data)
40 |             except TypeError:
41 |                 iterator = iter([data])
42 | 
43 |         if not context.output_type:
44 |             context.set_output_fields(self.fields or next(iterator))
45 |         fields = context.get_output_fields()
46 |         fields_length = len(fields)
47 | 
48 |         for row in iterator:
49 |             if len(row) != fields_length:
50 |                 raise ValueError("Received an object with {} items, expected {}.".format(len(row), fields_length))
51 | 
52 |             yield tuple(row.values() if is_dict else row)
53 | 
54 |     __call__ = read
55 | 
56 | 
57 | @use_context
58 | class PickleWriter(FileWriter, PickleHandler):
59 |     mode = Option(str, default="wb")
60 | 
61 |     def write(self, file, context, item, *, fs):
62 |         """
63 |         Write a pickled item to the opened file.
64 |         """
65 |         context.setdefault("lineno", 0)
66 |         file.write(pickle.dumps(item))
67 |         context.lineno += 1
68 |         return NOT_MODIFIED
69 | 
70 |     __call__ = write
71 | 


--------------------------------------------------------------------------------
/docs/reference/settings.rst:
--------------------------------------------------------------------------------
 1 | Settings & Environment
 2 | ======================
 3 | 
 4 | .. module:: bonobo.settings
 5 | 
 6 | All settings that you can find in the :mod:`bonobo.settings` module. You can override those settings using
 7 | environment variables. For you own settings and configuration values, see the :doc:`/guide/environment` guide.
 8 | 
 9 | Debug
10 | :::::
11 | 
12 | :Purpose: Sets the debug mode, which is more verbose. Loglevel will be lowered to DEBUG instead of INFO.
13 | :Environment: `DEBUG`
14 | :Setting: `bonobo.settings.DEBUG`
15 | :Default: `False`
16 | 
17 | Profile
18 | :::::::
19 | 
20 | :Purpose: Sets profiling, which adds memory/cpu usage output. Not yet fully implemented. It is expected that setting
21 |           this to true will have a non-neglictible performance impact.
22 | :Environment: `PROFILE`
23 | :Setting: `bonobo.settings.PROFILE`
24 | :Default: `False`
25 | 
26 | Quiet
27 | :::::
28 | 
29 | :Purpose: Sets the quiet mode, which ask any output to be computer parsable. Formating will be removed, but it will
30 |           allow to use unix pipes, etc. Not yet fully implemented, few transformations already use it. Probably, it
31 |           should be the default on non-interactive terminals.
32 | :Environment: `QUIET`
33 | :Setting: `bonobo.settings.QUIET`
34 | :Default: `False`
35 | 
36 | Logging Level
37 | :::::::::::::
38 | 
39 | :Purpose: Sets the python minimum logging level.
40 | :Environment: `LOGGING_LEVEL`
41 | :Setting: `bonobo.settings.LOGGING_LEVEL`
42 | :Default: `DEBUG` if DEBUG is False, otherwise `INFO`
43 | :Values: `CRITICAL`, `FATAL`, `ERROR`, `WARNING`, `INFO`, `DEBUG`, `NOTSET`
44 | 
45 | I/O Format
46 | ::::::::::
47 | 
48 | :Purpose: Sets default input/output format for builtin transformations. It can be overriden on each node. The `kwargs`
49 |           value means that each node will try to read its input from keywords arguments (and write similar formated
50 |           output), while `arg0` means it will try to read its input from the first positional argument (and write
51 |           similar formated output).
52 | :Environment: `IOFORMAT`
53 | :Setting: `bonobo.settings.IOFORMAT`
54 | :Default: `kwargs`
55 | :Values: `kwargs`, `arg0`
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/bonobo/contrib/opendatasoft/__init__.py:
--------------------------------------------------------------------------------
 1 | from urllib.parse import urlencode
 2 | 
 3 | import requests  # todo: make this a service so we can substitute it ?
 4 | 
 5 | from bonobo.config import Option
 6 | from bonobo.config.configurables import Configurable
 7 | from bonobo.config.processors import ContextProcessor
 8 | from bonobo.util.objects import ValueHolder
 9 | 
10 | 
11 | def path_str(path):
12 |     return path if path.startswith("/") else "/" + path
13 | 
14 | 
15 | class OpenDataSoftAPI(Configurable):
16 |     dataset = Option(str, positional=True)
17 |     endpoint = Option(str, required=False, default="{scheme}://{netloc}{path}")
18 |     scheme = Option(str, required=False, default="https")
19 |     netloc = Option(str, required=False, default="data.opendatasoft.com")
20 |     path = Option(path_str, required=False, default="/api/records/1.0/search/")
21 |     rows = Option(int, required=False, default=500)
22 |     limit = Option(int, required=False)
23 |     timezone = Option(str, required=False, default="Europe/Paris")
24 |     kwargs = Option(dict, required=False, default=dict)
25 | 
26 |     @ContextProcessor
27 |     def compute_path(self, context):
28 |         params = (("dataset", self.dataset), ("timezone", self.timezone)) + tuple(sorted(self.kwargs.items()))
29 |         yield self.endpoint.format(scheme=self.scheme, netloc=self.netloc, path=self.path) + "?" + urlencode(params)
30 | 
31 |     @ContextProcessor
32 |     def start(self, context, base_url):
33 |         yield ValueHolder(0)
34 | 
35 |     def __call__(self, base_url, start, *args, **kwargs):
36 |         while (not self.limit) or (self.limit > start):
37 |             url = "{}&start={start}&rows={rows}".format(
38 |                 base_url, start=start.value, rows=self.rows if not self.limit else min(self.rows, self.limit - start)
39 |             )
40 |             resp = requests.get(url)
41 |             records = resp.json().get("records", [])
42 | 
43 |             if not len(records):
44 |                 break
45 | 
46 |             for row in records:
47 |                 yield {**row.get("fields", {}), "geometry": row.get("geometry", {}), "recordid": row.get("recordid")}
48 | 
49 |             start += self.rows
50 | 
51 | 
52 | __all__ = ["OpenDataSoftAPI"]
53 | 


--------------------------------------------------------------------------------
/bonobo/util/resolvers.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This package is considered private, and should only be used within bonobo.
 3 | 
 4 | """
 5 | 
 6 | import json
 7 | import os
 8 | import runpy
 9 | 
10 | import bonobo
11 | from bonobo.util import cast
12 | 
13 | 
14 | class _RequiredModule:
15 |     def __init__(self, dct):
16 |         self.__dict__ = dct
17 | 
18 | 
19 | class _ModulesRegistry(dict):
20 |     @property
21 |     def pathname(self):
22 |         return os.getcwd()
23 | 
24 |     def require(self, name):
25 |         if name not in self:
26 |             bits = name.split(".")
27 |             filename = os.path.join(self.pathname, *bits[:-1], bits[-1] + ".py")
28 |             self[name] = _RequiredModule(runpy.run_path(filename, run_name=name))
29 |         return self[name]
30 | 
31 | 
32 | def _parse_option(option):
33 |     """
34 |     Parse a 'key=val' option string into a python (key, val) pair
35 | 
36 |     :param option: str
37 |     :return: tuple
38 |     """
39 |     try:
40 |         key, val = option.split("=", 1)
41 |     except ValueError:
42 |         return option, True
43 | 
44 |     try:
45 |         val = json.loads(val)
46 |     except json.JSONDecodeError:
47 |         pass
48 | 
49 |     return key, val
50 | 
51 | 
52 | def _resolve_options(options=None):
53 |     """
54 |     Resolve a collection of option strings (eventually coming from command line) into a python dictionary.
55 | 
56 |     :param options: tuple[str]
57 |     :return: dict
58 |     """
59 |     if options:
60 |         return dict(map(_parse_option, options))
61 |     return dict()
62 | 
63 | 
64 | @cast(tuple)
65 | def _resolve_transformations(transformations):
66 |     """
67 |     Resolve a collection of strings into the matching python objects, defaulting to bonobo namespace if no package is provided.
68 | 
69 |     Syntax for each string is path.to.package:attribute
70 | 
71 |     :param transformations: tuple(str)
72 |     :return: tuple(object)
73 |     """
74 |     registry = _ModulesRegistry()
75 |     transformations = transformations or []
76 |     for t in transformations:
77 |         try:
78 |             mod, attr = t.split(":", 1)
79 |             yield getattr(registry.require(mod), attr)
80 |         except ValueError:
81 |             yield getattr(bonobo, t)
82 | 


--------------------------------------------------------------------------------
/bonobo/util/errors.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | from contextlib import contextmanager
 4 | from sys import exc_info
 5 | 
 6 | from mondrian import term
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | @contextmanager
12 | def sweeten_errors():
13 |     try:
14 |         yield
15 |     except Exception as exc:
16 |         SPACES = 2
17 |         w = term.white
18 |         prefix = w("║" + " " * (SPACES - 1))
19 |         suffix = w(" " * (SPACES - 1) + "║")
20 | 
21 |         pre_re = re.compile("([^`]*)`([^`]*)`([^`]*)")
22 | 
23 |         def format_arg(arg):
24 |             length = len(pre_re.sub("\\1\\2\\3", arg))
25 | 
26 |             arg = pre_re.sub(w("\\1") + term.bold("\\2") + w("\\3"), arg)
27 |             arg = re.sub(r"^  \$ (.*)", term.lightblack("  $ ") + term.reset("\\1"), arg)
28 | 
29 |             return (arg, length)
30 | 
31 |         def f(*args):
32 |             return "".join(args)
33 | 
34 |         term_width, term_height = term.get_size()
35 |         line_length = min(80, term_width)
36 |         for arg in exc.args:
37 |             line_length = max(min(line_length, len(arg) + 2 * SPACES), 120)
38 | 
39 |         print(f(w("╔" + "═" * (line_length - 2) + "╗")))
40 |         for i, arg in enumerate(exc.args):
41 | 
42 |             if i == 1:
43 |                 print(f(prefix, " " * (line_length - 2 * SPACES), suffix))
44 | 
45 |             arg_formatted, arg_length = format_arg(arg)
46 |             if not i:
47 |                 # first line
48 |                 print(
49 |                     f(
50 |                         prefix,
51 |                         term.red_bg(term.bold(" " + type(exc).__name__ + " ")),
52 |                         " ",
53 |                         w(arg_formatted),
54 |                         " " * (line_length - (arg_length + 3 + len(type(exc).__name__) + 2 * SPACES)),
55 |                         suffix,
56 |                     )
57 |                 )
58 |             else:
59 |                 # other lines
60 |                 print(f(prefix, arg_formatted + " " * (line_length - arg_length - 2 * SPACES), suffix))
61 | 
62 |         print(f(w("╚" + "═" * (line_length - 2) + "╝")))
63 | 
64 |         logging.getLogger().debug("This error was caused by the following exception chain.", exc_info=exc_info())
65 | 


--------------------------------------------------------------------------------
/tests/examples/test_example_change_some_fields.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | import bonobo
 4 | from bonobo.config import use_raw_input
 5 | from bonobo.execution.contexts import GraphExecutionContext
 6 | from bonobo.util.bags import BagType
 7 | 
 8 | Extracted = namedtuple("Extracted", ["id", "name", "value"])
 9 | ExtractedBT = BagType("ExtractedBT", ["id", "name", "value"])
10 | 
11 | 
12 | def extract_nt():
13 |     yield Extracted(id=1, name="Guido", value=".py")
14 |     yield Extracted(id=2, name="Larry", value=".pl")
15 |     yield Extracted(id=3, name="Dennis", value=".c")
16 |     yield Extracted(id=4, name="Yukihiro", value=".rb")
17 | 
18 | 
19 | def extract_bt():
20 |     yield ExtractedBT(id=1, name="Guido", value=".py")
21 |     yield ExtractedBT(id=2, name="Larry", value=".pl")
22 |     yield ExtractedBT(id=3, name="Dennis", value=".c")
23 |     yield ExtractedBT(id=4, name="Yukihiro", value=".rb")
24 | 
25 | 
26 | def transform_using_args(id, name, value):
27 |     yield Extracted(id=id * 2, name=name, value=name.lower() + value)
28 | 
29 | 
30 | @use_raw_input
31 | def transform_nt(row):
32 |     yield row._replace(name=row.name.upper())
33 | 
34 | 
35 | def StoreInList(buffer: list):
36 |     def store_in_list(*args, buffer=buffer):
37 |         buffer.append(args)
38 | 
39 |     return store_in_list
40 | 
41 | 
42 | def test_execution():
43 |     graph = bonobo.Graph()
44 | 
45 |     result_args = []
46 |     result_nt = []
47 |     result_bt = []
48 | 
49 |     graph.add_chain(extract_nt, transform_using_args, StoreInList(result_args))
50 |     graph.add_chain(transform_nt, StoreInList(result_nt), _input=extract_nt)
51 |     graph.add_chain(extract_bt, transform_using_args, StoreInList(result_bt))
52 | 
53 |     with GraphExecutionContext(graph) as context:
54 |         context.run_until_complete()
55 | 
56 |     assert result_args == [
57 |         (2, "Guido", "guido.py"),
58 |         (4, "Larry", "larry.pl"),
59 |         (6, "Dennis", "dennis.c"),
60 |         (8, "Yukihiro", "yukihiro.rb"),
61 |     ]
62 | 
63 |     assert result_nt == [(1, "GUIDO", ".py"), (2, "LARRY", ".pl"), (3, "DENNIS", ".c"), (4, "YUKIHIRO", ".rb")]
64 | 
65 |     assert result_bt == [
66 |         (2, "Guido", "guido.py"),
67 |         (4, "Larry", "larry.pl"),
68 |         (6, "Dennis", "dennis.c"),
69 |         (8, "Yukihiro", "yukihiro.rb"),
70 |     ]
71 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/static/extension.js:
--------------------------------------------------------------------------------
 1 | define(function() { return /******/ (function(modules) { // webpackBootstrap
 2 | /******/ 	// The module cache
 3 | /******/ 	var installedModules = {};
 4 | 
 5 | /******/ 	// The require function
 6 | /******/ 	function __webpack_require__(moduleId) {
 7 | 
 8 | /******/ 		// Check if module is in cache
 9 | /******/ 		if(installedModules[moduleId])
10 | /******/ 			return installedModules[moduleId].exports;
11 | 
12 | /******/ 		// Create a new module (and put it into the cache)
13 | /******/ 		var module = installedModules[moduleId] = {
14 | /******/ 			exports: {},
15 | /******/ 			id: moduleId,
16 | /******/ 			loaded: false
17 | /******/ 		};
18 | 
19 | /******/ 		// Execute the module function
20 | /******/ 		modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
21 | 
22 | /******/ 		// Flag the module as loaded
23 | /******/ 		module.loaded = true;
24 | 
25 | /******/ 		// Return the exports of the module
26 | /******/ 		return module.exports;
27 | /******/ 	}
28 | 
29 | 
30 | /******/ 	// expose the modules object (__webpack_modules__)
31 | /******/ 	__webpack_require__.m = modules;
32 | 
33 | /******/ 	// expose the module cache
34 | /******/ 	__webpack_require__.c = installedModules;
35 | 
36 | /******/ 	// __webpack_public_path__
37 | /******/ 	__webpack_require__.p = "";
38 | 
39 | /******/ 	// Load entry module and return exports
40 | /******/ 	return __webpack_require__(0);
41 | /******/ })
42 | /************************************************************************/
43 | /******/ ([
44 | /* 0 */
45 | /***/ (function(module, exports) {
46 | 
47 | 	// This file contains the javascript that is run when the notebook is loaded.
48 | 	// It contains some requirejs configuration and the `load_ipython_extension`
49 | 	// which is required for any notebook extension.
50 | 
51 | 	// Configure requirejs
52 | 	if (window.require) {
53 | 	    window.require.config({
54 | 	        map: {
55 | 	            "*" : {
56 | 	                "bonobo-jupyter": "nbextensions/bonobo-jupyter/index",
57 | 	                "jupyter-js-widgets": "nbextensions/jupyter-js-widgets/extension"
58 | 	            }
59 | 	        }
60 | 	    });
61 | 	}
62 | 
63 | 	// Export the required load_ipython_extention
64 | 	module.exports = {
65 | 	    load_ipython_extension: function() {}
66 | 	};
67 | 
68 | 
69 | /***/ })
70 | /******/ ])});;


--------------------------------------------------------------------------------
/tests/nodes/test_casts.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from typing import Callable
 3 | 
 4 | import pytest
 5 | 
 6 | from bonobo.constants import EMPTY
 7 | from bonobo.util.bags import BagType
 8 | from bonobo.util.envelopes import Envelope
 9 | from bonobo.util.testing import BufferingNodeExecutionContext
10 | 
11 | MyTuple = namedtuple("MyTuple", ["a", "b", "c"])
12 | MyBag = BagType("MyBag", ["a", "b", "c"])
13 | 
14 | 
15 | class MyCustomType:
16 |     def __init__(self, *args):
17 |         self.args = args
18 | 
19 |     def as_tuple(self):
20 |         return MyBag(*self.args)
21 | 
22 | 
23 | @pytest.mark.parametrize(
24 |     ["factory", "expected", "expected_item0"],
25 |     [
26 |         [lambda: (1, 2, 3), tuple, int],
27 |         [lambda: Envelope((1, 2, 3)), tuple, int],
28 |         [lambda: MyTuple(1, 2, 3), MyTuple, int],
29 |         [lambda: Envelope(MyTuple(1, 2, 3)), MyTuple, int],
30 |         [lambda: MyBag(1, 2, 3), MyBag, int],
31 |         [lambda: Envelope(MyBag(1, 2, 3)), MyBag, int],
32 |         [lambda: MyCustomType(1, 2, 3), tuple, MyCustomType],
33 |         [lambda: Envelope(MyCustomType(1, 2, 3)), tuple, MyCustomType],
34 |     ],
35 | )
36 | def test_casts_after_output(factory: Callable, expected, expected_item0):
37 |     def transform():
38 |         yield factory()
39 |         yield factory()
40 | 
41 |     with BufferingNodeExecutionContext(transform) as context:
42 |         context.write_sync(EMPTY)
43 | 
44 |     result = context.get_buffer()
45 |     assert expected == type(result[0])
46 |     assert expected_item0 == type(result[0][0])
47 |     assert expected == type(result[1])
48 |     assert expected_item0 == type(result[1][0])
49 | 
50 | 
51 | def test_cast_after_returning_custom_type():
52 |     def transform():
53 |         yield MyCustomType(1, 2, 3)
54 |         yield MyCustomType(4, 5, 6)
55 | 
56 |     with BufferingNodeExecutionContext(transform) as context:
57 |         context.write_sync(EMPTY)
58 |     result = context.get_buffer()
59 |     assert tuple == type(result[0])
60 |     assert tuple == type(result[1])
61 |     assert MyCustomType == type(result[0][0])
62 |     assert MyCustomType == type(result[1][0])
63 | 
64 |     with BufferingNodeExecutionContext(MyCustomType.as_tuple) as context:
65 |         context.write_sync(*result)
66 |     result = context.get_buffer()
67 |     assert MyBag == type(result[0])
68 |     assert MyBag == type(result[1])
69 | 


--------------------------------------------------------------------------------
/RELEASE-0.6.rst:
--------------------------------------------------------------------------------
 1 | Problems
 2 | ========
 3 | 
 4 | Failed to display Jupyter Widget of type BonoboWidget.
 5 | If you're reading this message in Jupyter Notebook or JupyterLab, it may mean that the widgets JavaScript is still loading. If this message persists, it likely means that the widgets JavaScript library is either not installed or not enabled. See the Jupyter Widgets Documentation for setup instructions.
 6 | If you're reading this message in another notebook frontend (for example, a static rendering on GitHub or NBViewer), it may mean that your frontend doesn't currently support widgets.
 7 | 
 8 | .. code-block:: shell-session
 9 | 
10 |     $ jupyter nbextension enable --py widgetsnbextension
11 |     $ jupyter nbextension install --py --symlink bonobo.contrib.jupyter
12 |     $ jupyter nbextension enable --py bonobo.contrib.jupyter
13 | 
14 | 
15 | Todo
16 | ====
17 | 
18 | * Pretty printer
19 | 
20 | 
21 | Options for Bags
22 | ================
23 | 
24 | tuple only
25 | 
26 | pros : simple
27 | cons :
28 | - how to name columns / store headers ?
29 | - how to return a dictionary
30 | 
31 | 
32 | 
33 | yield keys('foo', 'bar', 'baz')
34 | 
35 | 
36 | yield 'a', 'b', 'c'
37 | 
38 | 
39 | CHANGELOG
40 | =========
41 | 
42 | * Bags changed to something way closer to namedtuples.
43 |   * Better at managing memory
44 |   * Less flexible for kwargs usage, but much more standard and portable from one to another version of python
45 |   * More future proof for different execution strategies
46 |   * May lead to changes in your current transformation
47 | 
48 | * A given transformation now have an input and a output "type" which is either manually set by the user or
49 |   detected from the first item sent through a queue. It is a restiction on how bonobo can be used, but
50 |   will help having better predicatability.
51 | 
52 | * No more "graph" instance detection. This was misleading for new users, and not really pythonic. The
53 |   recommended way to start with bonobo is just to use one python file with a __main__ block, and if the
54 |   project grows, include this file in a package, either new or existing one. The init cli changed to
55 |   help you generate files or packages. That also means that we do not generate things with cookiecutter
56 |   anymore.
57 | 
58 | * Jupyter enhancements
59 | 
60 | * Graphviz support
61 | 
62 | * New nodes in stdlib
63 | 
64 | * Registry, used for conversions but also for your own integrations.
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/docs/guide/future/services.rst:
--------------------------------------------------------------------------------
 1 | Services
 2 | ========
 3 | 
 4 | .. warning::
 5 | 
 6 |    This is a "future" document, that does not exist, it's only kept here not to lose the data until we organize better
 7 |    documentation versioning.
 8 | 
 9 | Future and proposals
10 | ::::::::::::::::::::
11 | 
12 | This is a first implementation and it will evolve. Base concepts will stay the same though.
13 | 
14 | May or may not happen, depending on discussions.
15 | 
16 | * Singleton or prototype based injection (to use spring terminology, see
17 |   https://www.tutorialspoint.com/spring/spring_bean_scopes.htm), allowing smart factory usage and efficient sharing of
18 |   resources.
19 | * Lazily resolved parameters, eventually overriden by command line or environment, so you can for example override the
20 |   database DSN or target filesystem on command line (or with shell environment vars).
21 | * Pool based locks that ensure that only one (or n) transformations are using a given service at the same time.
22 | * Simple config implementation, using a python file for config (ex: bonobo run ... --services=services_prod.py).
23 | * Default configuration for services, using an optional callable (`def get_services(args): ...`). Maybe tie default
24 |   configuration to graph, but not really a fan because this is unrelated to graph logic.
25 | * Default implementation for a service in a transformation or in the descriptor. Maybe not a good idea, because it
26 |   tends to push forward multiple instances of the same thing, but maybe...
27 | 
28 |   A few ideas on how it can be implemented, from the user perspective.
29 | 
30 |   .. code-block:: python
31 | 
32 |       # using call
33 |       http = Service('http.client')(requests)
34 | 
35 |       # using more explicit call
36 |       http = Service('http.client').set_default_impl(requests)
37 | 
38 |       # using a decorator
39 |       @Service('http.client')
40 |       def http(self, services):
41 |           import requests
42 |           return requests
43 | 
44 |       # as a default in a subclass of Service
45 |       class HttpService(Service):
46 |           def get_default_impl(self, services):
47 |               import requests
48 |               return requests
49 | 
50 |       # ... then use it as another service
51 |       http = HttpService('http.client')
52 | 
53 | 
54 | This is under development, let us know what you think (slack may be a good place for this).
55 | The basics already work, and you can try it.
56 | 
57 | 


--------------------------------------------------------------------------------
/bonobo/nodes/io/json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from collections import OrderedDict
 3 | 
 4 | from bonobo.config import Method
 5 | from bonobo.config.processors import ContextProcessor, use_context
 6 | from bonobo.constants import NOT_MODIFIED
 7 | from bonobo.nodes.io.base import FileHandler
 8 | from bonobo.nodes.io.file import FileReader, FileWriter
 9 | 
10 | 
11 | class JsonHandler(FileHandler):
12 |     eol = ",\n"
13 |     prefix, suffix = "[", "]"
14 | 
15 | 
16 | class LdjsonHandler(FileHandler):
17 |     eol = "\n"
18 |     prefix, suffix = "", ""
19 | 
20 | 
21 | class JsonReader(JsonHandler, FileReader):
22 |     @Method(positional=False)
23 |     def loader(self, file):
24 |         return json.loads(file)
25 | 
26 |     def read(self, file, *, fs):
27 |         yield from self.loader(file.read())
28 | 
29 |     __call__ = read
30 | 
31 | 
32 | class LdjsonReader(LdjsonHandler, JsonReader):
33 |     """
34 |     Read a stream of line-delimited JSON objects (one object per line).
35 | 
36 |     Not to be mistaken with JSON-LD (where LD stands for linked data).
37 | 
38 |     """
39 | 
40 |     def read(self, file, *, fs):
41 |         yield from map(self.loader, file)
42 | 
43 |     __call__ = read
44 | 
45 | 
46 | @use_context
47 | class JsonWriter(JsonHandler, FileWriter):
48 |     @ContextProcessor
49 |     def envelope(self, context, file, *, fs):
50 |         file.write(self.prefix)
51 |         yield
52 |         file.write(self.suffix)
53 | 
54 |     def write(self, file, context, *args, fs):
55 |         """
56 |         Write a json row on the next line of file pointed by ctx.file.
57 | 
58 |         :param ctx:
59 |         :param row:
60 |         """
61 |         context.setdefault("lineno", 0)
62 |         fields = context.get_input_fields()
63 | 
64 |         if fields:
65 |             prefix = self.eol if context.lineno else ""
66 |             self._write_line(file, prefix + json.dumps(OrderedDict(zip(fields, args))))
67 |             context.lineno += 1
68 |         else:
69 |             for arg in args:
70 |                 prefix = self.eol if context.lineno else ""
71 |                 self._write_line(file, prefix + json.dumps(arg))
72 |                 context.lineno += 1
73 | 
74 |         return NOT_MODIFIED
75 | 
76 |     __call__ = write
77 | 
78 | 
79 | @use_context
80 | class LdjsonWriter(LdjsonHandler, JsonWriter):
81 |     """
82 |     Write a stream of Line-delimited JSON objects (one object per line).
83 | 
84 |     Not to be mistaken with JSON-LD (where LD stands for linked data).
85 | 
86 |     """
87 | 


--------------------------------------------------------------------------------
/bonobo/contrib/google/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import httplib2
 4 | from apiclient import discovery
 5 | from oauth2client import client, tools
 6 | from oauth2client.file import Storage
 7 | from oauth2client.tools import argparser
 8 | 
 9 | # https://developers.google.com/api-client-library/python/guide/aaa_oauth
10 | # pip install google-api-python-client (1.6.4)
11 | 
12 | 
13 | HOME_DIR = os.path.expanduser("~")
14 | GOOGLE_SECRETS = os.path.join(HOME_DIR, ".cache/secrets/client_secrets.json")
15 | 
16 | 
17 | def get_credentials(*, scopes):
18 |     """Gets valid user credentials from storage.
19 | 
20 |     If nothing has been stored, or if the stored credentials are invalid,
21 |     the OAuth2 flow is completed to obtain the new credentials.
22 | 
23 |     Returns:
24 |         Credentials, the obtained credential.
25 |     """
26 |     credential_dir = os.path.join(HOME_DIR, ".cache", __package__, "credentials")
27 |     if not os.path.exists(credential_dir):
28 |         os.makedirs(credential_dir)
29 |     credential_path = os.path.join(credential_dir, "googleapis.json")
30 | 
31 |     store = Storage(credential_path)
32 |     credentials = store.get()
33 | 
34 |     # see https://developers.google.com/api-client-library/python/auth/web-app
35 |     # kw: "incremental scopes"
36 |     if not credentials or credentials.invalid or not credentials.has_scopes(scopes):
37 |         flow = client.flow_from_clientsecrets(GOOGLE_SECRETS, scopes)
38 |         flow.user_agent = "Bonobo ETL (https://www.bonobo-project.org/)"
39 |         flags = argparser.parse_args(["--noauth_local_webserver"])
40 |         credentials = tools.run_flow(flow, store, flags)
41 |         print("Storing credentials to " + credential_path)
42 |     return credentials
43 | 
44 | 
45 | def get_google_spreadsheets_api_client(scopes=("https://www.googleapis.com/auth/spreadsheets",)):
46 |     credentials = get_credentials(scopes=scopes)
47 |     http = credentials.authorize(httplib2.Http())
48 |     discoveryUrl = "https://sheets.googleapis.com/$discovery/rest?version=v4"
49 |     return discovery.build("sheets", "v4", http=http, discoveryServiceUrl=discoveryUrl, cache_discovery=False)
50 | 
51 | 
52 | def get_google_people_api_client(scopes=("https://www.googleapis.com/auth/contacts",)):
53 |     credentials = get_credentials(scopes=scopes)
54 |     http = credentials.authorize(httplib2.Http())
55 |     discoveryUrl = "https://people.googleapis.com/$discovery/rest?version=v1"
56 |     return discovery.build("people", "v1", http=http, discoveryServiceUrl=discoveryUrl, cache_discovery=False)
57 | 


--------------------------------------------------------------------------------
/bonobo/contrib/jupyter/js/webpack.config.js:
--------------------------------------------------------------------------------
 1 | var version = require('./package.json').version;
 2 | 
 3 | // Custom webpack loaders are generally the same for all webpack bundles, hence
 4 | // stored in a separate local variable.
 5 | var loaders = [
 6 |     {test: /\.json$/, loader: 'json-loader'},
 7 | ];
 8 | 
 9 | 
10 | module.exports = [
11 |     {
12 |         // Notebook extension
13 |         //
14 |         // This bundle only contains the part of the JavaScript that is run on
15 |         // load of the notebook. This section generally only performs
16 |         // some configuration for requirejs, and provides the legacy
17 |         // "load_ipython_extension" function which is required for any notebook
18 |         // extension.
19 |         //
20 |         entry: './src/extension.js',
21 |         output: {
22 |             filename: 'extension.js',
23 |             path: '../static',
24 |             libraryTarget: 'amd'
25 |         }
26 |     },
27 |     {
28 |         // Bundle for the notebook containing the custom widget views and models
29 |         //
30 |         // This bundle contains the implementation for the custom widget views and
31 |         // custom widget.
32 |         // It must be an amd module
33 |         //
34 |         entry: './src/index.js',
35 |         output: {
36 |             filename: 'index.js',
37 |             path: '../static',
38 |             libraryTarget: 'amd'
39 |         },
40 |         devtool: 'source-map',
41 |         module: {
42 |             loaders: loaders
43 |         },
44 |         externals: ['jupyter-js-widgets']
45 |     },
46 |     {
47 |         // Embeddable jupyter-widget-example bundle
48 |         //
49 |         // This bundle is generally almost identical to the notebook bundle
50 |         // containing the custom widget views and models.
51 |         //
52 |         // The only difference is in the configuration of the webpack public path
53 |         // for the static assets.
54 |         //
55 |         // It will be automatically distributed by unpkg to work with the static
56 |         // widget embedder.
57 |         //
58 |         // The target bundle is always `dist/index.js`, which is the path required
59 |         // by the custom widget embedder.
60 |         //
61 |         entry: './src/embed.js',
62 |         output: {
63 |             filename: 'index.js',
64 |             path: './dist/',
65 |             libraryTarget: 'amd',
66 |             publicPath: 'https://unpkg.com/jupyter-widget-example@' + version + '/dist/'
67 |         },
68 |         devtool: 'source-map',
69 |         module: {
70 |             loaders: loaders
71 |         },
72 |         externals: ['jupyter-js-widgets']
73 |     }
74 | ];
75 | 


--------------------------------------------------------------------------------
/bonobo/commands/run.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import bonobo
 4 | from bonobo.commands import BaseGraphCommand
 5 | 
 6 | 
 7 | class RunCommand(BaseGraphCommand):
 8 |     install = False
 9 |     handler = staticmethod(bonobo.run)
10 | 
11 |     def add_arguments(self, parser):
12 |         super(RunCommand, self).add_arguments(parser)
13 | 
14 |         verbosity_group = parser.add_mutually_exclusive_group()
15 |         verbosity_group.add_argument("--quiet", "-q", action="store_true")
16 |         verbosity_group.add_argument("--verbose", "-v", action="store_true")
17 | 
18 |         parser.add_argument("--install", "-I", action="store_true")
19 | 
20 |     def parse_options(self, *, quiet=False, verbose=False, install=False, **options):
21 |         from bonobo import settings
22 | 
23 |         settings.QUIET.set_if_true(quiet)
24 |         settings.DEBUG.set_if_true(verbose)
25 |         self.install = install
26 |         return options
27 | 
28 |     def _run_path(self, file):
29 |         # add install logic
30 |         if self.install:
31 |             if os.path.isdir(file):
32 |                 requirements = os.path.join(file, "requirements.txt")
33 |             else:
34 |                 requirements = os.path.join(os.path.dirname(file), "requirements.txt")
35 |             _install_requirements(requirements)
36 | 
37 |         return super()._run_path(file)
38 | 
39 |     def _run_module(self, mod):
40 |         # install not implemented for a module, not sure it even make sense.
41 |         if self.install:
42 |             raise RuntimeError("--install behaviour when running a module is not defined.")
43 | 
44 |         return super()._run_module(mod)
45 | 
46 | 
47 | def register_generic_run_arguments(parser, required=True):
48 |     """
49 |     Only there for backward compatibility with third party extensions.
50 |     TODO: This should be deprecated (using the @deprecated decorator) in 0.7, and removed in 0.8 or 0.9.
51 |     """
52 |     dummy_command = BaseGraphCommand()
53 |     dummy_command.required = required
54 |     dummy_command.add_arguments(parser)
55 |     return parser
56 | 
57 | 
58 | def _install_requirements(requirements):
59 |     """Install requirements given a path to requirements.txt file."""
60 |     import importlib
61 |     import pip
62 | 
63 |     pip.main(["install", "-r", requirements])
64 |     # Some shenanigans to be sure everything is importable after this, especially .egg-link files which
65 |     # are referenced in *.pth files and apparently loaded by site.py at some magic bootstrap moment of the
66 |     # python interpreter.
67 |     pip.utils.pkg_resources = importlib.reload(pip.utils.pkg_resources)
68 |     import site
69 | 
70 |     importlib.reload(site)
71 | 


--------------------------------------------------------------------------------
/docs/reference/api/bonobo.rst:
--------------------------------------------------------------------------------
  1 | :mod:`Bonobo <bonobo>`
  2 | ======================
  3 | 
  4 | .. currentmodule:: bonobo
  5 | 
  6 | :Module: :mod:`bonobo`
  7 | 
  8 | 
  9 | .. automodule:: bonobo
 10 |    :no-members:
 11 | 
 12 | 
 13 | 
 14 | Graphs
 15 | ::::::
 16 | 
 17 | * :class:`bonobo.structs.graphs.Graph` 
 18 | 
 19 | 
 20 | Nodes
 21 | :::::
 22 | 
 23 | * :class:`bonobo.nodes.CsvReader` 
 24 | * :class:`bonobo.nodes.CsvWriter` 
 25 | * :class:`bonobo.nodes.FileReader` 
 26 | * :class:`bonobo.nodes.FileWriter` 
 27 | * :class:`bonobo.nodes.Filter` 
 28 | * :class:`bonobo.nodes.FixedWindow` 
 29 | * :func:`bonobo.nodes.Format` 
 30 | * :class:`bonobo.nodes.JsonReader` 
 31 | * :class:`bonobo.nodes.JsonWriter` 
 32 | * :class:`bonobo.nodes.LdjsonReader` 
 33 | * :class:`bonobo.nodes.LdjsonWriter` 
 34 | * :class:`bonobo.nodes.Limit` 
 35 | * :func:`bonobo.nodes.MapFields` 
 36 | * :func:`bonobo.nodes.OrderFields` 
 37 | * :class:`bonobo.nodes.PickleReader` 
 38 | * :class:`bonobo.nodes.PickleWriter` 
 39 | * :class:`bonobo.nodes.PrettyPrinter` 
 40 | * :class:`bonobo.nodes.RateLimited` 
 41 | * :func:`bonobo.nodes.Rename` 
 42 | * :func:`bonobo.nodes.SetFields` 
 43 | * :func:`bonobo.nodes.Tee` 
 44 | * :func:`bonobo.nodes.UnpackItems` 
 45 | * :func:`bonobo.nodes.count` 
 46 | * :func:`bonobo.nodes.identity` 
 47 | * :func:`bonobo.nodes.noop` 
 48 | 
 49 | 
 50 | Other top-level APIs
 51 | ::::::::::::::::::::
 52 | 
 53 | * :func:`bonobo.create_reader` 
 54 | * :func:`bonobo.create_strategy` 
 55 | * :func:`bonobo.create_writer` 
 56 | * :func:`bonobo.get_argument_parser` 
 57 | * :func:`bonobo.get_examples_path` 
 58 | * :func:`bonobo.inspect` 
 59 | * :func:`bonobo.open_examples_fs` 
 60 | * :func:`bonobo.open_fs` 
 61 | * :func:`bonobo.parse_args` 
 62 | * :func:`bonobo.run` 
 63 | 
 64 | 
 65 | create_reader
 66 | -------------
 67 | 
 68 | .. autofunction:: bonobo.create_reader
 69 | 
 70 | 
 71 | create_strategy
 72 | ---------------
 73 | 
 74 | .. autofunction:: bonobo.create_strategy
 75 | 
 76 | 
 77 | create_writer
 78 | -------------
 79 | 
 80 | .. autofunction:: bonobo.create_writer
 81 | 
 82 | 
 83 | get_argument_parser
 84 | -------------------
 85 | 
 86 | .. autofunction:: bonobo.get_argument_parser
 87 | 
 88 | 
 89 | get_examples_path
 90 | -----------------
 91 | 
 92 | .. autofunction:: bonobo.get_examples_path
 93 | 
 94 | 
 95 | inspect
 96 | -------
 97 | 
 98 | .. autofunction:: bonobo.inspect
 99 | 
100 | 
101 | open_examples_fs
102 | ----------------
103 | 
104 | .. autofunction:: bonobo.open_examples_fs
105 | 
106 | 
107 | open_fs
108 | -------
109 | 
110 | .. autofunction:: bonobo.open_fs
111 | 
112 | 
113 | parse_args
114 | ----------
115 | 
116 | .. autofunction:: bonobo.parse_args
117 | 
118 | 
119 | run
120 | ---
121 | 
122 | .. autofunction:: bonobo.run
123 | 
124 |    


--------------------------------------------------------------------------------
/docs/extension/django.rst:
--------------------------------------------------------------------------------
 1 | .. currentmodule:: bonobo.contrib.django
 2 | 
 3 | Working with Django
 4 | ===================
 5 | 
 6 | |bonobo| provides a lightweight integration with django, to allow to include ETL pipelines in your django management
 7 | commands.
 8 | 
 9 | Quick start
10 | :::::::::::
11 | 
12 | To write a django management command that runs |bonobo| job(s), just extend :class:`ETLCommand`
13 | instead of :class:`django.core.management.base.BaseCommand`, and override the :meth:`ETLCommand.get_graph` method:
14 | 
15 | .. code-block:: python
16 | 
17 |     import bonobo
18 |     from bonobo.contrib.django import ETLCommand
19 | 
20 |     class Command(ETLCommand):
21 |         def get_graph(self, **options):
22 |             graph = bonobo.Graph()
23 |             graph.add_chain(...)
24 |             return graph
25 | 
26 | Services
27 | --------
28 | 
29 | You can override :meth:`ETLCommand.get_services` to provide your service implementations.
30 | 
31 | One common recipe to do so is to import it from somewhere else and override it as a :obj:`staticmethod`:
32 | 
33 | .. code-block:: python
34 | 
35 |     import bonobo
36 |     from bonobo.contrib.django import ETLCommand
37 | 
38 |     from myproject.services import get_services
39 | 
40 |     class Command(ETLCommand):
41 |         get_services = staticmethod(get_services)
42 | 
43 |         def get_graph(...):
44 |             ...
45 | 
46 | 
47 | Multiple graphs
48 | ---------------
49 | 
50 | The :meth:`ETLCommand.get_graph` method can also be implemented as a generator. In this case, each element yielded must
51 | be a graph, and each graph will be executed in order:
52 | 
53 | .. code-block:: python
54 | 
55 |     import bonobo
56 |     from bonobo.contrib.django import ETLCommand
57 | 
58 |     class Command(ETLCommand):
59 |         def get_graph(self, **options):
60 |             yield bonobo.Graph(...)
61 |             yield bonobo.Graph(...)
62 |             yield bonobo.Graph(...)
63 | 
64 | This is especially helpful in two major cases:
65 | 
66 | * You must ensure that one job is finished before the next is run, and thus you can't add both graph's nodes in the
67 |   same graph.
68 | * You want to change which graph is run depending on command line arguments.
69 | 
70 | 
71 | Command line arguments
72 | ----------------------
73 | 
74 | Like with regular django management commands, you can add arguments to the argument parser by overriding
75 | :meth:`ETLCommand.add_arguments`.
76 | 
77 | The only difference with django is that the provided argument parser will already have arguments added to handle
78 | environment.
79 | 
80 | 
81 | Reference
82 | :::::::::
83 | 
84 | :mod:`bonobo.contrib.django`
85 | ----------------------------
86 | 
87 | .. automodule:: bonobo.contrib.django
88 | 
89 | Source code
90 | :::::::::::
91 | 
92 | https://github.com/python-bonobo/bonobo/tree/master/bonobo/contrib/django
93 | 
94 | 


--------------------------------------------------------------------------------
/bin/imgcat:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # tmux requires unrecognized OSC sequences to be wrapped with DCS tmux;
  4 | # <sequence> ST, and for all ESCs in <sequence> to be replaced with ESC ESC. It
  5 | # only accepts ESC backslash for ST.
  6 | function print_osc() {
  7 |     if [[ $TERM == screen* ]] ; then
  8 |         printf "\033Ptmux;\033\033]"
  9 |     else
 10 |         printf "\033]"
 11 |     fi
 12 | }
 13 | 
 14 | # More of the tmux workaround described above.
 15 | function print_st() {
 16 |     if [[ $TERM == screen* ]] ; then
 17 |         printf "\a\033\\"
 18 |     else
 19 |         printf "\a"
 20 |     fi
 21 | }
 22 | 
 23 | # print_image filename inline base64contents print_filename
 24 | #   filename: Filename to convey to client
 25 | #   inline: 0 or 1
 26 | #   base64contents: Base64-encoded contents
 27 | #   print_filename: If non-empty, print the filename 
 28 | #                   before outputting the image
 29 | function print_image() {
 30 |     print_osc
 31 |     printf '1337;File='
 32 |     if [[ -n "$1" ]]; then
 33 |       printf 'name='`printf "%s" "$1" | base64`";"
 34 |     fi
 35 | 
 36 |     VERSION=$(base64 --version 2>&1)
 37 |     if [[ "$VERSION" =~ fourmilab ]]; then
 38 |       BASE64ARG=-d
 39 |     elif [[ "$VERSION" =~ GNU ]]; then
 40 |       BASE64ARG=-di
 41 |     else
 42 |       BASE64ARG=-D
 43 |     fi
 44 | 
 45 |     printf "%s" "$3" | base64 $BASE64ARG | wc -c | awk '{printf "size=%d",$1}'
 46 |     printf ";inline=$2"
 47 |     printf ":"
 48 |     printf "%s" "$3"
 49 |     print_st
 50 |     printf '\n'
 51 |     if [[ -n "$4" ]]; then
 52 |       echo $1
 53 |     fi
 54 | }
 55 | 
 56 | function error() {
 57 |     echo "ERROR: $*" 1>&2
 58 | }
 59 | 
 60 | function show_help() {
 61 |     echo "Usage: imgcat [-p] filename ..." 1>& 2
 62 |     echo "   or: cat filename | imgcat" 1>& 2
 63 | }
 64 | 
 65 | ## Main
 66 | 
 67 | if [ -t 0 ]; then
 68 |     has_stdin=f
 69 | else
 70 |     has_stdin=t
 71 | fi
 72 | 
 73 | # Show help if no arguments and no stdin.
 74 | if [ $has_stdin = f -a $# -eq 0 ]; then
 75 |     show_help
 76 |     exit
 77 | fi
 78 | 
 79 | # Look for command line flags.
 80 | while [ $# -gt 0 ]; do
 81 |     case "$1" in
 82 |     -h|--h|--help)
 83 |         show_help
 84 |         exit
 85 |         ;;
 86 |     -p|--p|--print)
 87 |         print_filename=1
 88 |         ;;
 89 |     -*)
 90 |         error "Unknown option flag: $1"
 91 |         show_help
 92 |         exit 1
 93 |       ;;
 94 |     *)
 95 |         if [ -r "$1" ] ; then
 96 |             has_stdin=f
 97 |             print_image "$1" 1 "$(base64 < "$1")" "$print_filename"
 98 |         else
 99 |             error "imgcat: $1: No such file or directory"
100 |             exit 2
101 |         fi
102 |         ;;
103 |     esac
104 |     shift
105 | done
106 | 
107 | # Read and print stdin
108 | if [ $has_stdin = t ]; then
109 |     print_image "" 1 "$(cat | base64)" ""
110 | fi
111 | 
112 | exit 0
113 | 


--------------------------------------------------------------------------------
/docs/_templates/alabaster/theme.conf:
--------------------------------------------------------------------------------
  1 | [theme]
  2 | inherit = basic
  3 | stylesheet = alabaster.css
  4 | pygments_style = alabaster.support.Alabaster
  5 | 
  6 | [options]
  7 | logo =
  8 | logo_name = false
  9 | logo_text_align = left
 10 | description =
 11 | description_font_style = normal
 12 | github_user =
 13 | github_repo =
 14 | github_button = true
 15 | github_banner = false
 16 | github_type = watch
 17 | github_count = true
 18 | badge_branch = master
 19 | travis_button = false
 20 | codecov_button = false
 21 | gratipay_user =
 22 | gittip_user =
 23 | analytics_id =
 24 | touch_icon =
 25 | canonical_url =
 26 | extra_nav_links =
 27 | sidebar_includehidden = true
 28 | sidebar_collapse = true
 29 | show_powered_by = true
 30 | show_related = false
 31 | 
 32 | gray_1 = #444
 33 | gray_2 = #EEE
 34 | gray_3 = #AAA
 35 | 
 36 | pink_1 = #FCC
 37 | pink_2 = #FAA
 38 | pink_3 = #D52C2C
 39 | 
 40 | base_bg = #fff
 41 | base_text = #000
 42 | hr_border = #B1B4B6
 43 | body_bg =
 44 | body_text = #3E4349
 45 | body_text_align = left
 46 | footer_text = #888
 47 | link = #004B6B
 48 | link_hover = #6D4100
 49 | sidebar_header =
 50 | sidebar_text = #555
 51 | sidebar_link =
 52 | sidebar_link_underscore = #999
 53 | sidebar_search_button = #CCC
 54 | sidebar_list = #000
 55 | sidebar_hr =
 56 | anchor = #DDD
 57 | anchor_hover_fg =
 58 | anchor_hover_bg = #EAEAEA
 59 | table_border = #888
 60 | shadow =
 61 | 
 62 | # Admonition options
 63 | ## basic level
 64 | admonition_bg =
 65 | admonition_border = #CCC
 66 | note_bg =
 67 | note_border = #CCC
 68 | seealso_bg =
 69 | seealso_border = #CCC
 70 | 
 71 | ## critical level
 72 | danger_bg =
 73 | danger_border =
 74 | danger_shadow =
 75 | error_bg =
 76 | error_border =
 77 | error_shadow =
 78 | 
 79 | ## normal level
 80 | tip_bg =
 81 | tip_border = #CCC
 82 | hint_bg =
 83 | hint_border = #CCC
 84 | important_bg =
 85 | important_border = #CCC
 86 | 
 87 | ## warning level
 88 | caution_bg =
 89 | caution_border =
 90 | attention_bg =
 91 | attention_border =
 92 | warn_bg =
 93 | warn_border =
 94 | 
 95 | topic_bg =
 96 | code_highlight_bg =
 97 | highlight_bg = #FAF3E8
 98 | xref_border = #fff
 99 | xref_bg = #FBFBFB
100 | admonition_xref_border = #fafafa
101 | admonition_xref_bg =
102 | footnote_bg = #FDFDFD
103 | footnote_border =
104 | pre_bg =
105 | narrow_sidebar_bg = #333
106 | narrow_sidebar_fg = #FFF
107 | narrow_sidebar_link =
108 | font_size = 17px
109 | caption_font_size = inherit
110 | viewcode_target_bg = #ffd
111 | code_bg = #ecf0f3
112 | code_text = #222
113 | code_hover = #EEE
114 | code_font_size = 0.9em
115 | code_font_family = 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace
116 | font_family = 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif
117 | head_font_family = 'Garamond', 'Georgia', serif
118 | caption_font_family = inherit
119 | code_highlight = #FFC
120 | page_width = 940px
121 | sidebar_width = 220px
122 | fixed_sidebar = false
123 | 


--------------------------------------------------------------------------------
/docs/guide/future/transformations.rst:
--------------------------------------------------------------------------------
 1 | Transformations
 2 | ===============
 3 | 
 4 | .. warning::
 5 | 
 6 |    This is a "future" document, that does not exist, it's only kept here not to lose the data until we organize better
 7 |    documentation versioning.
 8 | 
 9 | 
10 | Output
11 | ------
12 | 
13 | Let's see the rules (first to match wins).
14 | 
15 | 1. A flag, eventually followed by something else, marks a special behaviour. If it supports it, the remaining part of
16 |    the output line will be interpreted using the same rules, and some flags can be combined.
17 | 
18 |    **NOT_MODIFIED**
19 | 
20 |    **NOT_MODIFIED** tells bonobo to use the input row unmodified as the output.
21 | 
22 |    *CANNOT be combined*
23 | 
24 |    Example:
25 | 
26 |    .. code-block:: python
27 | 
28 |        from bonobo import NOT_MODIFIED
29 | 
30 |        def output_will_be_same_as_input(*args, **kwargs):
31 |            yield NOT_MODIFIED
32 | 
33 | 2. Once all flags are "consumed", the remaining part is interpreted.
34 | 
35 |    * If it is a :class:`bonobo.Bag` instance, then it's used directly.
36 |    * If it is a :class:`dict` then a kwargs-only :class:`bonobo.Bag` will be created.
37 |    * If it is a :class:`tuple` then an args-only :class:`bonobo.Bag` will be created, unless its last argument is a
38 |      :class:`dict` in which case a args+kwargs :class:`bonobo.Bag` will be created.
39 |    * If it's something else, it will be used to create a one-arg-only :class:`bonobo.Bag`.
40 | 
41 |    **APPEND**
42 | 
43 |    **APPEND** tells bonobo to append this output to the input (positional arguments will equal `input_args + output_args`,
44 |    keyword arguments will equal `{**input_kwargs, **output_kwargs}`).
45 | 
46 |    *CAN be combined, but not with itself*
47 | 
48 |    .. code-block:: python
49 | 
50 |        from bonobo import APPEND
51 | 
52 |        def output_will_be_appended_to_input(*args, **kwargs):
53 |            yield APPEND, 'foo', 'bar', {'eat_at': 'joe'}
54 | 
55 |    **LOOPBACK**
56 | 
57 |    **LOOPBACK** tells bonobo that this output must be looped back into our own input queue, allowing to create the stream
58 |    processing version of recursive algorithms.
59 | 
60 |    *CAN be combined, but not with itself*
61 | 
62 |    .. code-block:: python
63 | 
64 |        from bonobo import LOOPBACK
65 | 
66 |        def output_will_be_sent_to_self(*args, **kwargs):
67 |            yield LOOPBACK, 'Hello, I am the future "you".'
68 | 
69 |    **CHANNEL(...)**
70 | 
71 |    **CHANNEL(...)** tells bonobo that this output does not use the default channel and is routed through another path.
72 |    This is something you should probably not use unless your data flow design is complex, and if you're not certain
73 |    about it, it probably means that it is not the feature you're looking for.
74 | 
75 |    *CAN be combined, but not with itself*
76 | 
77 |    .. code-block:: python
78 | 
79 |       from bonobo import CHANNEL
80 | 
81 |       def output_will_be_sent_to_self(*args, **kwargs):
82 |           yield CHANNEL("errors"), 'That is not cool.'
83 | 
84 | 


--------------------------------------------------------------------------------
/Projectfile:
--------------------------------------------------------------------------------
  1 | # bonobo's description for medikit
  2 | 
  3 | from medikit import require
  4 | 
  5 | make = require('make')
  6 | pytest = require('pytest')
  7 | python = require('python')
  8 | sphinx = require('sphinx')
  9 | 
 10 | python.setup(
 11 |     name='bonobo',
 12 |     python_requires='>=3.5',
 13 |     description='Bonobo, a simple, modern and atomic extract-transform-load toolkit for python 3.5+.',
 14 |     license='Apache License, Version 2.0',
 15 |     url='https://www.bonobo-project.org/',
 16 |     download_url='https://github.com/python-bonobo/bonobo/tarball/{version}',
 17 |     author='Romain Dorgueil',
 18 |     author_email='romain@dorgueil.net',
 19 |     data_files=[
 20 |         (
 21 |             'share/jupyter/nbextensions/bonobo-jupyter', [
 22 |                 'bonobo/contrib/jupyter/static/extension.js',
 23 |                 'bonobo/contrib/jupyter/static/index.js',
 24 |                 'bonobo/contrib/jupyter/static/index.js.map',
 25 |             ]
 26 |         ),
 27 |     ],
 28 |     entry_points={
 29 |         'console_scripts': [
 30 |             'bonobo = bonobo.commands:entrypoint',
 31 |         ],
 32 |         'bonobo.commands': [
 33 |             'convert = bonobo.commands.convert:ConvertCommand',
 34 |             'download = bonobo.commands.download:DownloadCommand',
 35 |             'examples = bonobo.commands.examples:ExamplesCommand',
 36 |             'init = bonobo.commands.init:InitCommand',
 37 |             'inspect = bonobo.commands.inspect:InspectCommand',
 38 |             'run = bonobo.commands.run:RunCommand',
 39 |             'version = bonobo.commands.version:VersionCommand',
 40 |         ],
 41 |     }
 42 | )
 43 | 
 44 | python.add_requirements(
 45 |     'cached-property ~=1.4',
 46 |     'fs ~=2.0',
 47 |     'graphviz >=0.8,<0.9',
 48 |     'jinja2 ~=2.9',
 49 |     'mondrian ~=0.8',
 50 |     'packaging ~=19.0',
 51 |     'psutil ~=5.4',
 52 |     'python-slugify ~=1.2.0',
 53 |     'requests ~=2.0',
 54 |     'stevedore ~=1.27',
 55 |     'whistle ~=1.0',
 56 |     dev=[
 57 |         'cookiecutter >=1.5,<1.6',
 58 |         'pytest-timeout >=1,<2',
 59 |         'sphinx-sitemap >=0.2,<0.3',
 60 |     ],
 61 |     docker=[
 62 |         'bonobo-docker ~=0.6.0a1',
 63 |     ],
 64 |     jupyter=[
 65 |         'ipywidgets ~=6.0',
 66 |         'jupyter ~=1.0',
 67 |     ],
 68 |     sqlalchemy=[
 69 |         'bonobo-sqlalchemy ~=0.6.0a1',
 70 |     ],
 71 | )
 72 | 
 73 | 
 74 | @listen(make.on_generate)
 75 | def on_make_generate(event):
 76 |     makefile = event.makefile
 77 |     
 78 |     # Sphinx
 79 |     makefile['SPHINX_AUTOBUILD'] = '$(PYTHON_DIRNAME)/sphinx-autobuild'
 80 |     makefile.add_target(
 81 |         'watch-$(SPHINX_SOURCEDIR)',
 82 |         '$(SPHINX_AUTOBUILD) $(SPHINX_SOURCEDIR) $(shell mktemp -d)',
 83 |         phony=True
 84 |     )
 85 | 
 86 |     # Formating
 87 |     makefile.add_target(
 88 |         'format',
 89 |         '''
 90 |             black -l 120 .
 91 |             isort -rc -o mondrian -o whistle -y .
 92 |         ''',
 93 |         phony=True,
 94 |         doc='Reformats the whole codebase using our standards (requires black and isort).'
 95 |     )
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | # vim: ft=python:
102 | 


--------------------------------------------------------------------------------
/bonobo/contrib/django/commands.py:
--------------------------------------------------------------------------------
 1 | from logging import getLogger
 2 | from types import GeneratorType
 3 | 
 4 | from colorama import Back, Fore, Style
 5 | from mondrian import term
 6 | 
 7 | import bonobo
 8 | from bonobo.plugins.console import ConsoleOutputPlugin
 9 | from bonobo.util.term import CLEAR_EOL
10 | from django.core.management import BaseCommand
11 | from django.core.management.base import OutputWrapper
12 | 
13 | from .utils import create_or_update
14 | 
15 | 
16 | class ETLCommand(BaseCommand):
17 |     @property
18 |     def logger(self):
19 |         try:
20 |             return self._logger
21 |         except AttributeError:
22 |             self._logger = getLogger(type(self).__module__)
23 |             return self._logger
24 | 
25 |     create_or_update = staticmethod(create_or_update)
26 | 
27 |     def create_parser(self, prog_name, subcommand):
28 |         return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand))
29 | 
30 |     def add_arguments(self, parser):
31 |         """
32 |         Entry point for subclassed commands to add custom arguments.
33 |         """
34 |         pass
35 | 
36 |     def get_graph(self, *args, **options):
37 |         def not_implemented():
38 |             raise NotImplementedError("You must implement {}.get_graph() method.".format(self))
39 | 
40 |         return bonobo.Graph(not_implemented)
41 | 
42 |     def get_services(self):
43 |         return {}
44 | 
45 |     def get_strategy(self):
46 |         return None
47 | 
48 |     def info(self, *args, **kwargs):
49 |         self.logger.info(*args, **kwargs)
50 | 
51 |     def run(self, *args, **options):
52 |         results = []
53 |         with bonobo.parse_args(options) as options:
54 |             services = self.get_services()
55 |             strategy = self.get_strategy()
56 |             graph_coll = self.get_graph(*args, **options)
57 | 
58 |             if not isinstance(graph_coll, GeneratorType):
59 |                 graph_coll = (graph_coll,)
60 | 
61 |             for i, graph in enumerate(graph_coll):
62 |                 if not isinstance(graph, bonobo.Graph):
63 |                     raise ValueError("Expected a Graph instance, got {!r}.".format(graph))
64 |                 print(term.lightwhite("{}. {}".format(i + 1, graph.name or repr(graph).strip("<>"))))
65 |                 result = bonobo.run(graph, services=services, strategy=strategy)
66 |                 results.append(result)
67 |                 for node in result.nodes:
68 |                     print(node.get_statistics_as_string(), node.get_flags_as_string())
69 |                 print(term.lightblack(" ... return value: " + str(result)))
70 | 
71 |         return results
72 | 
73 |     def handle(self, *args, **options):
74 |         _stdout_backup, _stderr_backup = self.stdout, self.stderr
75 | 
76 |         self.stdout = OutputWrapper(ConsoleOutputPlugin._stdout, ending=CLEAR_EOL + "\n")
77 |         self.stderr = OutputWrapper(ConsoleOutputPlugin._stderr, ending=CLEAR_EOL + "\n")
78 |         self.stderr.style_func = lambda x: Fore.LIGHTRED_EX + Back.RED + "!" + Style.RESET_ALL + " " + x
79 | 
80 |         try:
81 |             return self.run(*args, **options)
82 |         finally:
83 |             self.stdout, self.stderr = _stdout_backup, _stderr_backup
84 | 


--------------------------------------------------------------------------------
/bonobo/nodes/io/file.py:
--------------------------------------------------------------------------------
  1 | from bonobo.config import ContextProcessor, Option, use_context
  2 | from bonobo.constants import NOT_MODIFIED
  3 | from bonobo.errors import UnrecoverableError
  4 | from bonobo.nodes.io.base import FileHandler, Reader, Writer
  5 | from bonobo.util import ensure_tuple
  6 | 
  7 | 
  8 | class FileReader(Reader, FileHandler):
  9 |     """Component factory for file-like readers.
 10 | 
 11 |     On its own, it can be used to read a file and yield one row per line, trimming the "eol" character at the end if
 12 |     present. Extending it is usually the right way to create more specific file readers (like json, csv, etc.)
 13 |     """
 14 | 
 15 |     mode = Option(
 16 |         str,
 17 |         default="r",
 18 |         __doc__="""
 19 |         What mode to use for open() call.
 20 |     """,
 21 |     )  # type: str
 22 | 
 23 |     output_fields = Option(
 24 |         ensure_tuple,
 25 |         required=False,
 26 |         __doc__="""
 27 |         Specify the field names of output lines.
 28 |         Mutually exclusive with "output_type".
 29 |     """,
 30 |     )
 31 |     output_type = Option(
 32 |         required=False,
 33 |         __doc__="""
 34 |         Specify the type of output lines.
 35 |         Mutually exclusive with "output_fields".
 36 |     """,
 37 |     )
 38 | 
 39 |     @ContextProcessor
 40 |     def output(self, context, *args, **kwargs):
 41 |         """
 42 |         Allow all readers to use eventually use output_fields XOR output_type options.
 43 | 
 44 |         """
 45 | 
 46 |         output_fields = self.output_fields
 47 |         output_type = self.output_type
 48 | 
 49 |         if output_fields and output_type:
 50 |             raise UnrecoverableError("Cannot specify both output_fields and output_type option.")
 51 | 
 52 |         if self.output_type:
 53 |             context.set_output_type(self.output_type)
 54 | 
 55 |         if self.output_fields:
 56 |             context.set_output_fields(self.output_fields)
 57 | 
 58 |         yield
 59 | 
 60 |     def read(self, file, *, fs):
 61 |         """
 62 |         Write a row on the next line of given file.
 63 |         Prefix is used for newlines.
 64 |         """
 65 |         for line in file:
 66 |             yield line.rstrip(self.eol)
 67 | 
 68 |     __call__ = read
 69 | 
 70 | 
 71 | @use_context
 72 | class FileWriter(Writer, FileHandler):
 73 |     """Component factory for file or file-like writers.
 74 | 
 75 |     On its own, it can be used to write in a file one line per row that comes into this component. Extending it is
 76 |     usually the right way to create more specific file writers (like json, csv, etc.)
 77 |     """
 78 | 
 79 |     mode = Option(
 80 |         str,
 81 |         default="w+",
 82 |         __doc__="""
 83 |         What mode to use for open() call.
 84 |     """,
 85 |     )  # type: str
 86 | 
 87 |     def write(self, file, context, line, *, fs):
 88 |         """
 89 |         Write a row on the next line of opened file in context.
 90 |         """
 91 |         context.setdefault("lineno", 0)
 92 |         self._write_line(file, (self.eol if context.lineno else "") + line)
 93 |         context.lineno += 1
 94 |         return NOT_MODIFIED
 95 | 
 96 |     def _write_line(self, file, line):
 97 |         return file.write(line)
 98 | 
 99 |     __call__ = write
100 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | ==========
 2 | 🐵  bonobo
 3 | ==========
 4 | 
 5 | Data-processing for humans.
 6 | 
 7 | .. image:: https://img.shields.io/pypi/v/bonobo.svg
 8 |     :target: https://pypi.python.org/pypi/bonobo
 9 |     :alt: PyPI
10 | 
11 | .. image:: https://img.shields.io/pypi/pyversions/bonobo.svg
12 |     :target: https://pypi.python.org/pypi/bonobo
13 |     :alt: Versions
14 | 
15 | .. image:: https://readthedocs.org/projects/bonobo/badge/?version=master
16 |     :target: http://docs.bonobo-project.org/
17 |     :alt: Documentation
18 | 
19 | .. image:: https://travis-ci.org/python-bonobo/bonobo.svg?branch=master
20 |     :target: https://travis-ci.org/python-bonobo/bonobo
21 |     :alt: Continuous Integration (Linux)
22 | 
23 | .. image:: https://ci.appveyor.com/api/projects/status/github/python-bonobo/bonobo?retina=true&branch=master&svg=true
24 |     :target: https://ci.appveyor.com/project/hartym/bonobo?branch=master
25 |     :alt: Continuous Integration (Windows)
26 | 
27 | .. image:: https://codeclimate.com/github/python-bonobo/bonobo/badges/gpa.svg
28 |    :target: https://codeclimate.com/github/python-bonobo/bonobo
29 |    :alt: Code Climate
30 | 
31 | .. image:: https://img.shields.io/coveralls/python-bonobo/bonobo/master.svg
32 |     :target: https://coveralls.io/github/python-bonobo/bonobo?branch=master
33 |     :alt: Coverage
34 | 
35 | Bonobo is an extract-transform-load framework for python 3.5+ (see comparisons with other data tools).
36 | 
37 | Bonobo uses plain old python objects (functions, generators and iterators), allows them to be linked together in a directed graph, and then executed using a parallelized strategy, without having to worry about the underlying complexity.
38 | 
39 | Developers can focus on writing simple and atomic operations, that are easy to unit-test by-design, while the focus of the
40 | framework is to apply them concurrently to rows of data.
41 | 
42 | One thing to note: write pure transformations and you'll be safe.
43 | 
44 | Bonobo is a young rewrite of an old python2.7 tool that ran millions of transformations per day for years on production.
45 | Although it may not yet be complete or fully stable (please, allow us to reach 1.0), the basics are there.
46 | 
47 | ----
48 | 
49 | *Bonobo is under heavy development, we're doing our best to keep the core as stable as possible while still moving forward. Please allow us to reach 1.0 stability and our sincere apologies for anything we break in the process (feel free to complain on issues, allowing us to correct breakages we did not expect)*
50 | 
51 | ----
52 | 
53 | Homepage: https://www.bonobo-project.org/ (`Roadmap <https://www.bonobo-project.org/roadmap>`_)
54 | 
55 | Documentation: http://docs.bonobo-project.org/
56 | 
57 | Contributing guide: http://docs.bonobo-project.org/en/latest/contribute/index.html
58 | 
59 | Issues: https://github.com/python-bonobo/bonobo/issues
60 | 
61 | Slack: https://bonobo-slack.herokuapp.com/
62 | 
63 | Release announcements: http://eepurl.com/csHFKL
64 | 
65 | ----
66 | 
67 | Made with ♥ by `Romain Dorgueil <https://twitter.com/rdorgueil>`_ and `contributors <https://github.com/python-bonobo/bonobo/graphs/contributors>`_.
68 | 
69 | .. image:: https://img.shields.io/pypi/l/bonobo.svg
70 |     :target: https://pypi.python.org/pypi/bonobo
71 |     :alt: License
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/bonobo/commands/init.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from jinja2 import Environment, FileSystemLoader
 4 | from mondrian import humanizer
 5 | 
 6 | from bonobo.commands import BaseCommand
 7 | 
 8 | 
 9 | class InitCommand(BaseCommand):
10 |     TEMPLATES = {"bare", "default"}
11 |     TEMPLATES_PATH = os.path.join(os.path.dirname(__file__), "templates")
12 | 
13 |     def add_arguments(self, parser):
14 |         parser.add_argument("filename")
15 |         parser.add_argument("--force", "-f", default=False, action="store_true")
16 | 
17 |         target_group = parser.add_mutually_exclusive_group(required=False)
18 |         target_group.add_argument("--template", "-t", choices=self.TEMPLATES, default="default")
19 |         target_group.add_argument("--package", "-p", action="store_true", default=False)
20 | 
21 |     def create_file_from_template(self, *, template, filename):
22 |         template_name = template
23 |         name, ext = os.path.splitext(filename)
24 |         if ext != ".py":
25 |             raise ValueError('Filenames should end with ".py".')
26 | 
27 |         loader = FileSystemLoader(self.TEMPLATES_PATH)
28 |         env = Environment(loader=loader)
29 |         template = env.get_template(template_name + ".py-tpl")
30 | 
31 |         with open(filename, "w+") as f:
32 |             f.write(template.render(name=name))
33 | 
34 |         print(humanizer.Success("Generated {} using template {!r}.".format(filename, template_name)))
35 | 
36 |     def create_package(self, *, filename):
37 |         _, ext = os.path.splitext(filename)
38 |         if ext != "":
39 |             raise ValueError("Package names should not have an extension.")
40 | 
41 |         try:
42 |             import medikit.commands
43 |         except ImportError as exc:
44 |             raise ImportError(
45 |                 "To initialize a package, you need to install medikit (pip install --upgrade medikit)."
46 |             ) from exc
47 | 
48 |         package_name = os.path.basename(filename)
49 |         medikit.commands.handle_init(
50 |             os.path.join(os.getcwd(), filename, "Projectfile"), name=package_name, requirements=["bonobo"]
51 |         )
52 | 
53 |         self.logger.info('Generated "{}" package with medikit.'.format(package_name))
54 |         self.create_file_from_template(template="default", filename=os.path.join(filename, package_name, "__main__.py"))
55 | 
56 |         print(
57 |             humanizer.Success(
58 |                 'Package "{}" has been created.'.format(package_name),
59 |                 "",
60 |                 "Install it...",
61 |                 "",
62 |                 "    $ `pip install --editable {}`".format(filename),
63 |                 "",
64 |                 "Then maybe run the example...",
65 |                 "",
66 |                 "    $ `python -m {}`".format(package_name),
67 |                 "",
68 |                 "Enjoy!",
69 |             )
70 |         )
71 | 
72 |     @humanizer.humanize()
73 |     def handle(self, *, template, filename, package=False, force=False):
74 |         if os.path.exists(filename) and not force:
75 |             raise FileExistsError("Target filename already exists, use --force to override.")
76 | 
77 |         if package:
78 |             self.create_package(filename=filename)
79 |         else:
80 |             self.create_file_from_template(template=template, filename=filename)
81 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at bonobo@rdc.li. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/docs/extension/sqlalchemy.rst:
--------------------------------------------------------------------------------
  1 | .. currentmodule:: bonobo_sqlalchemy
  2 | 
  3 | Working with SQLAlchemy
  4 | =======================
  5 | 
  6 | .. include:: _beta.rst
  7 | 
  8 | Read the introduction: https://www.bonobo-project.org/with/sqlalchemy
  9 | 
 10 | Installation
 11 | ::::::::::::
 12 | 
 13 | To install the extension, use the `sqlalchemy` extra:
 14 | 
 15 | .. code-block:: shell-session
 16 | 
 17 |     $ pip install bonobo[sqlalchemy]
 18 | 
 19 | .. note:: You can install more than one extra at a time separating the names with commas.
 20 | 
 21 | Overview and examples
 22 | :::::::::::::::::::::
 23 | 
 24 | First, you'll need a database connection (:obj:`sqlalchemy.engine.Engine` instance), that must be provided as a service.
 25 | 
 26 | .. code-block:: python
 27 | 
 28 |     import sqlalchemy
 29 | 
 30 |     def get_services():
 31 |         return {
 32 |             'sqlalchemy.engine': sqlalchemy.create_engine(...)
 33 |         }
 34 | 
 35 | The `sqlalchemy.engine` name is the default name used by the provided transformations, but you can override it (for
 36 | example if you need more than one connection) and specify the service name using `engine='myengine'` while building your
 37 | transformations.
 38 | 
 39 | Lets create some tables and add some data. (You may need to edit the SQL if your database server uses a different
 40 | version of SQL.)
 41 | 
 42 | .. code-block:: sql
 43 | 
 44 |     CREATE TABLE test_in (
 45 |       id INTEGER PRIMARY KEY NOT NULL,
 46 |       text TEXT
 47 |     );
 48 | 
 49 |     CREATE TABLE test_out (
 50 |       id INTEGER PRIMARY KEY NOT NULL,
 51 |       text TEXT
 52 |     );
 53 | 
 54 |     INSERT INTO test_in (id, text) VALUES (1, 'Cat');
 55 |     INSERT INTO test_in (id, text) VALUES (2, 'Dog');
 56 | 
 57 | 
 58 | There are two transformation classes provided by this extension.
 59 | 
 60 | One reader, one writer.
 61 | 
 62 | Let's select some data:
 63 | 
 64 | .. code-block:: python
 65 | 
 66 |     import bonobo
 67 |     import bonobo_sqlalchemy
 68 | 
 69 |     def get_graph():
 70 |         graph = bonobo.Graph()
 71 |         graph.add_chain(
 72 |             bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100),
 73 |             bonobo.PrettyPrinter(),
 74 |         )
 75 |         return graph
 76 | 
 77 | You should see:
 78 | 
 79 | .. code-block:: shell-session
 80 | 
 81 |     $ python tutorial.py
 82 |     ┌
 83 |     │ id[0] = 1
 84 |     │ text[1] = 'Cat'
 85 |     └
 86 |     ┌
 87 |     │ id[0] = 2
 88 |     │ text[1] = 'Dog'
 89 |     └
 90 |      - Select in=1 out=2 [done]
 91 |      - PrettyPrinter in=2 out=2 [done]
 92 | 
 93 | 
 94 | Now let's insert some data:
 95 | 
 96 | .. code-block:: python
 97 | 
 98 |     import bonobo
 99 |     import bonobo_sqlalchemy
100 | 
101 | 
102 |     def get_graph(**options):
103 |         graph = bonobo.Graph()
104 |         graph.add_chain(
105 |             bonobo_sqlalchemy.Select('SELECT * FROM test_in', limit=100),
106 |             bonobo_sqlalchemy.InsertOrUpdate('test_out')
107 |         )
108 | 
109 |         return graph
110 | 
111 | If you check the `test_out` table, it should now have the data.
112 | 
113 | Reference
114 | :::::::::
115 | 
116 | :mod:`bonobo_sqlalchemy`
117 | ------------------------
118 | 
119 | .. automodule:: bonobo_sqlalchemy
120 | 
121 | Source code
122 | :::::::::::
123 | 
124 | https://github.com/python-bonobo/bonobo-sqlalchemy
125 | 
126 | 


--------------------------------------------------------------------------------