├── .bumpversion.cfg
├── .coveragerc
├── .gitignore
├── .travis.yml
├── AUTHORS.rst
├── CHANGELOG.rst
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── appveyor.yml
├── bootstrap.py
├── ci
    ├── appveyor-bootstrap.ps1
    ├── appveyor-with-compiler.cmd
    └── templates
    │   ├── .travis.yml
    │   ├── appveyor.yml
    │   └── tox.ini
├── docs
    ├── authors.rst
    ├── changelog.rst
    ├── conf.py
    ├── contributing.rst
    ├── images
    │   └── github-private-token.png
    ├── index.rst
    ├── outputs.rst
    ├── overview.rst
    ├── reference
    │   ├── index.rst
    │   └── processor.rst
    ├── requirements.txt
    ├── sources.rst
    └── spelling_wordlist.txt
├── examples
    └── do-123.py
├── requirements-dev.txt
├── setup.cfg
├── setup.py
├── src
    └── processor
    │   ├── __init__.py
    │   ├── outputs
    │       ├── __init__.py
    │       ├── debug.hy
    │       ├── email.hy
    │       ├── fanout.hy
    │       ├── rss.hy
    │       ├── slack.hy
    │       └── xmpp.hy
    │   ├── pipeline.hy
    │   ├── sources
    │       ├── __init__.py
    │       ├── github.hy
    │       ├── imap.hy
    │       ├── mix.hy
    │       ├── twitter.hy
    │       └── web.hy
    │   ├── storage.hy
    │   ├── utils
    │       ├── __init__.py
    │       ├── datastructures.hy
    │       ├── macro.hy
    │       └── twitter.py
    │   └── version.hy
├── tasks.py
├── tests
    ├── __init__.py
    ├── outputs.hy
    ├── pipeline.hy
    ├── sources.hy
    └── test_processor.py
└── tox.ini


/.bumpversion.cfg:
--------------------------------------------------------------------------------
1 | [bumpversion]
2 | current_version = 0.1.0
3 | files = setup.py src/processor/__init__.py
4 | commit = True
5 | tag = True
6 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [paths]
 2 | source = src
 3 | 
 4 | [run]
 5 | branch = True
 6 | source = src
 7 | parallel = true
 8 | 
 9 | [report]
10 | show_missing = true
11 | precision = 2
12 | omit = *migrations*
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | .coverage
28 | .coverage.*
29 | nosetests.xml
30 | htmlcov
31 | 
32 | # Translations
33 | *.mo
34 | 
35 | # Mr Developer
36 | .mr.developer.cfg
37 | .project
38 | .pydevproject
39 | .idea
40 | 
41 | # Complexity
42 | output/*.html
43 | output/*/index.html
44 | 
45 | # Sphinx
46 | docs/_build
47 | 
48 | .DS_Store
49 | *~
50 | .*.sw[po]
51 | .build
52 | .ve
53 | .bootstrap
54 | *.bak
55 | .noseids
56 | /env
57 | mail2rss.py
58 | /rss-feed.xml
59 | /*.db
60 | /do-*.py
61 | /*.log
62 | .\#*
63 | \#*


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python: 2.7
 3 | sudo: false
 4 | env:
 5 |   global:
 6 |     LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so
 7 |   matrix:
 8 |     - TOXENV=check
 9 |     - TOXENV=3.3,coveralls
10 |     - TOXENV=3.3-nocover
11 |     - TOXENV=3.4,coveralls
12 |     - TOXENV=3.4-nocover
13 | before_install:
14 |   - python --version
15 |   - virtualenv --version
16 |   - pip --version
17 |   - uname -a
18 |   - lsb_release -a
19 | after_success:
20 |   - codecov
21 | install:
22 |   - pip install tox
23 |   - pip install codecov
24 | script:
25 |   - tox -v
26 | notifications:
27 |   email:
28 |     on_success: never
29 |     on_failure: always
30 | 
31 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
1 | 
2 | Authors
3 | =======
4 | 
5 | * Alexander Artemenko - http://dev.svetlyak.ru
6 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | =========
 3 | 
 4 | 0.10.0 (2016-01-04)
 5 | -------------------
 6 | 
 7 | * IMAP source was fixed to work with new IMAPClient's API and
 8 | support ``IMAPClient > 1.0.0``.
 9 | * Datastorage was fixed to get ``filename`` from ``PROCESSOR_DB``
10 |   environment variable in case if it was setup using
11 |   ``os.environ['PROCESSOR_DB'] = 'some.db'`` after the imports.
12 | 
13 | 0.9.0 (2015-12-06)
14 | ------------------
15 | 
16 | Code was fixed to work with HyLang from ``a3bd90390cb37b46ae33ce3a73ee84a0feacce7d``
17 | commit. Please, use this pinned version of HyLang and `subscribe`_ on future
18 | release notes to know when this requirement will change.
19 | 
20 | .. _subscribe: https://allmychanges.com/p/python/processor/
21 | 
22 | 0.8.0 (2015-11-16)
23 | ------------------
24 | 
25 | * Code was fixed to work with latest Hy, from GitHub.
26 | * Added ``twitter.mentions`` source, to read stream of mentions from the Twitter.
27 | * Fixed a way how number of messages from IMAP folder is limited. Previously
28 |   limit was applied even when we already know an ID of the last seen message,
29 |   but now limit is ignored in this case and only applied when visiting the
30 |   folder first time.
31 | 
32 | 0.7.0 (2015-05-05)
33 | ------------------
34 | 
35 | New output – XMPP was added and now processor is able
36 | to notify Jabber users.
37 | 
38 | 0.6.0 (2015-05-01)
39 | ------------------
40 | 
41 | The biggest change in this release is a new source – ``github.releases``.
42 | It is able to read all new releases in given repository and send them into
43 | processing pipeline. This works as for public repositories, and for private
44 | too. `Read the docs`_ for futher details.
45 | 
46 | .. _Read the docs: https://python-processor.readthedocs.org/en/latest/sources.html#github-releases
47 | 
48 | Other changes are:
49 | 
50 | * Storage backend now saves JSON database nicely pretty printed for you could read and edit it in your favorite editor. This is Emacs, right?
51 | * Twitter.search source now saves state after the tweet was processed. This way processor shouldn't loose tweets if there was exception somewhere in processing pipeline.
52 | * IMAP source was fixed and now is able to fetch emails from really big folders.
53 | 
54 | 
55 | 0.5.0 (2015-04-15)
56 | ------------------
57 | 
58 | Good news, everyone! New output was added - ``email``.
59 | Now Processor is able to notify you via email about any event.
60 | 
61 | 0.4.0 (2015-04-06)
62 | ------------------
63 | 
64 | * Function ``run_pipline`` was simplified and now accepts only one source and one ouput.
65 |   To implement more complex pipelines, use ``sources.mix`` and ``outputs.fanout`` helpers.
66 | 
67 | 0.3.0 (2015-04-01)
68 | ------------------
69 | 
70 | * Added a `web.hook`_ source.
71 | * Now `source` could be not only a iterable object, but any function which returns values.
72 | 
73 | .. _web.hook: https://python-processor.readthedocs.org/en/latest/sources.html#web-hook
74 | 
75 | 0.2.1 (2015-03-30)
76 | ------------------
77 | 
78 | Fixed error in ``import-or-error`` macro, which prevented from using 3-party libraries.
79 | 
80 | 0.2.0 (2015-03-30)
81 | ------------------
82 | 
83 | Most 3-party libraries are optional now. If you want to use
84 | some extension which requires external library, it will issue
85 | an error and call ``sys.exit(1)`` until you satisfy this
86 | requirement.
87 | 
88 | This should make life easier for thouse, who does not want
89 | to use ``rss`` output which requires ``feedgen`` which requires
90 | ``lxml`` which is hard to build because it is C extension.
91 | 
92 | 0.1.0 (2015-03-18)
93 | ------------------
94 | 
95 | * First release on PyPI.
96 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Contributing
 3 | ============
 4 | 
 5 | Contributions are welcome, and they are greatly appreciated! Every
 6 | little bit helps, and credit will always be given.
 7 | 
 8 | Bug reports
 9 | ===========
10 | 
11 | When `reporting a bug <https://github.com/svetlyak40wt/python-processor/issues>`_ please include:
12 | 
13 |     * Your operating system name and version.
14 |     * Any details about your local setup that might be helpful in troubleshooting.
15 |     * Detailed steps to reproduce the bug.
16 | 
17 | Documentation improvements
18 | ==========================
19 | 
20 | processor could always use more documentation, whether as part of the
21 | official processor docs, in docstrings, or even on the web in blog posts,
22 | articles, and such.
23 | 
24 | Feature requests and feedback
25 | =============================
26 | 
27 | The best way to send feedback is to file an issue at https://github.com/svetlyak40wt/python-processor/issues.
28 | 
29 | If you are proposing a feature:
30 | 
31 | * Explain in detail how it would work.
32 | * Keep the scope as narrow as possible, to make it easier to implement.
33 | * Remember that this is a volunteer-driven project, and that contributions are welcome :)
34 | 
35 | Development
36 | ===========
37 | 
38 | To set up `python-processor` for local development:
39 | 
40 | 1. `Fork python-processor on GitHub <https://github.com/svetlyak40wt/python-processor/fork>`_.
41 | 2. Clone your fork locally::
42 | 
43 |     git clone git@github.com:your_name_here/python-processor.git
44 | 
45 | 3. Create a branch for local development::
46 | 
47 |     git checkout -b name-of-your-bugfix-or-feature
48 | 
49 |    Now you can make your changes locally.
50 | 
51 | 4. When you're done making changes, run all the checks, doc builder and spell checker with `tox <http://tox.readthedocs.org/en/latest/install.html>`_ one command::
52 | 
53 |     tox
54 | 
55 | 5. Commit your changes and push your branch to GitHub::
56 | 
57 |     git add .
58 |     git commit -m "Your detailed description of your changes."
59 |     git push origin name-of-your-bugfix-or-feature
60 | 
61 | 6. Submit a pull request through the GitHub website.
62 | 
63 | Pull Request Guidelines
64 | -----------------------
65 | 
66 | If you need some code review or feedback while you're developing the code just make the pull request.
67 | 
68 | For merging, you should:
69 | 
70 | 1. Include passing tests (run ``tox``) [1]_.
71 | 2. Update documentation when there's new API, functionality etc. 
72 | 3. Add a note to ``CHANGELOG.rst`` about the changes.
73 | 4. Add yourself to ``AUTHORS.rst``.
74 | 
75 | .. [1] If you don't have all the necessary python versions available locally you can rely on Travis - it will 
76 |        `run the tests <https://travis-ci.org/svetlyak40wt/python-processor/pull_requests>`_ for each change you add in the pull request.
77 |        
78 |        It will be slower though ...
79 |        
80 | Tips
81 | ----
82 | 
83 | To run a subset of tests::
84 | 
85 |     tox -e envname -- py.test -k test_myfeature
86 | 
87 | To run all the test environments in *parallel* (you need to ``pip install detox``)::
88 | 
89 |     detox


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Alexander Artemenko
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 5 | following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following
 8 | disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
11 | disclaimer in the documentation and/or other materials provided with the distribution.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
14 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
16 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
17 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
18 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
19 | THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | graft docs
 2 | graft examples
 3 | graft src
 4 | graft ci
 5 | graft tests
 6 | 
 7 | include *.komodoproject
 8 | include .bumpversion.cfg
 9 | include .coveragerc
10 | include .isort.cfg
11 | include .pylintrc
12 | 
13 | include AUTHORS.rst
14 | include CHANGELOG.rst
15 | include CONTRIBUTING.rst
16 | include LICENSE
17 | include README.rst
18 | 
19 | include bootstrap.py
20 | include tox.ini .travis.yml appveyor.yml
21 | 
22 | global-exclude *.py[co] __pycache__ *.so *.pyd
23 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ================
  2 | python-processor
  3 | ================
  4 | 
  5 | Badges
  6 | ======
  7 | 
  8 | | |docs| |changelog| |travis| |coveralls| |landscape| |scrutinizer|
  9 | | |version| |downloads| |wheel| |supported-versions| |supported-implementations|
 10 | 
 11 | .. |docs| image:: https://readthedocs.org/projects/python-processor/badge/?style=flat
 12 |     :target: https://readthedocs.org/projects/python-processor
 13 |     :alt: Documentation Status
 14 | 
 15 | .. |changelog| image:: http://allmychanges.com/p/python/processor/badge/
 16 |     :target: http://allmychanges.com/p/python/processor/?utm_source=badge
 17 |     :alt: Release Notes
 18 | 
 19 | .. |travis| image:: http://img.shields.io/travis/svetlyak40wt/python-processor/master.png?style=flat
 20 |     :alt: Travis-CI Build Status
 21 |     :target: https://travis-ci.org/svetlyak40wt/python-processor
 22 | 
 23 | .. |coveralls| image:: http://img.shields.io/coveralls/svetlyak40wt/python-processor/master.png?style=flat
 24 |     :alt: Coverage Status
 25 |     :target: https://coveralls.io/r/svetlyak40wt/python-processor
 26 | 
 27 | .. |landscape| image:: https://landscape.io/github/svetlyak40wt/python-processor/master/landscape.svg?style=flat
 28 |     :target: https://landscape.io/github/svetlyak40wt/python-processor/master
 29 |     :alt: Code Quality Status
 30 | 
 31 | .. |version| image:: http://img.shields.io/pypi/v/processor.png?style=flat
 32 |     :alt: PyPI Package latest release
 33 |     :target: https://pypi.python.org/pypi/processor
 34 | 
 35 | .. |downloads| image:: http://img.shields.io/pypi/dm/processor.png?style=flat
 36 |     :alt: PyPI Package monthly downloads
 37 |     :target: https://pypi.python.org/pypi/processor
 38 | 
 39 | .. |wheel| image:: https://img.shields.io/pypi/wheel/processor.svg?style=flat
 40 |     :alt: PyPI Wheel
 41 |     :target: https://pypi.python.org/pypi/processor
 42 | 
 43 | .. |supported-versions| image:: https://img.shields.io/pypi/pyversions/processor.svg?style=flat
 44 |     :alt: Supported versions
 45 |     :target: https://pypi.python.org/pypi/processor
 46 | 
 47 | .. |supported-implementations| image:: https://img.shields.io/pypi/implementation/processor.svg?style=flat
 48 |     :alt: Supported imlementations
 49 |     :target: https://pypi.python.org/pypi/processor
 50 | 
 51 | .. |scrutinizer| image:: https://img.shields.io/scrutinizer/g/svetlyak40wt/python-processor/master.png?style=flat
 52 |     :alt: Scrtinizer Status
 53 |     :target: https://scrutinizer-ci.com/g/svetlyak40wt/python-processor/
 54 | 
 55 | 
 56 | Simple rules
 57 | ==============
 58 | 
 59 | Python processor is a tool for creating chained pipelines for dataprocessing.
 60 | It have very few key concepts:
 61 | 
 62 | Data object
 63 |     Any python dict with two required fields: ``source`` and ``type``.
 64 | Source
 65 |     An iterable sequence of ``data objects`` or a function which returns ``data objects``.
 66 |     See `full list of sources`_ in the docs.
 67 | Output
 68 |     A function which accepts a ``data object`` as input and could output another. See `full list of outputs`_ in the docs.
 69 |     (or same) ``data object`` as result.
 70 | Predicate
 71 |     Pipeline consists from sources outputs, but ``predicate`` decides which
 72 |     ``data object`` should be processed by which ``output``.
 73 | 
 74 | Quick example
 75 | =============
 76 | 
 77 | Here is example of pipeline which reads IMAP folder and sends all emails to Slack chat:
 78 | 
 79 | .. code:: python
 80 | 
 81 |     run_pipeline(
 82 |         sources.imap('imap.gmail.com'
 83 |                      'username',
 84 |                      'password'
 85 |                      'INBOX'),
 86 |         [prepare_email_for_slack, outputs.slack(SLACK_URL)])
 87 | 
 88 | Here you construct a pipeline, which uses ``sources.imap`` for reading imap folder
 89 | "INBOX" of ``username@gmail.com``. In more complex case ``outputs.fanout``
 90 | can be used for routing dataobjects to different processors and ``sources.mix`` can
 91 | be used to merge items two or more sources into a one stream.
 92 | 
 93 | Functions ``prepare_email_to_slack`` and ``outputs.slack(SLACK_URL)`` are processors. First one
 94 | is a simple function which accepts data object, returned by imap source and transforming
 95 | it to the data object which could be used by slack.output. We need that because slack
 96 | requires a different set of fields. Call to ``outputs.slack(SLACK_URL)`` returns a
 97 | function which gets an object and send it to the specified Slack's endpoint.
 98 | 
 99 | It is just example, for working snippets, continue reading this documention ;-)
100 | 
101 | .. Note:: By the way, did you know there is a Lisp dialect which runs on Python
102 |           virtual machine? It's name is HyLang, and python processor is written in this
103 |           language.
104 | 
105 |     
106 | Installation
107 | ============
108 | 
109 | Create a virtual environment with python3:::
110 |   
111 |    virtualenv --python=python3 env
112 |    source env/bin/activate
113 | 
114 | Install required version of hylang (this step is necessary because Hy syntax is not
115 | final yet and frequently changed by language maintainers):::
116 | 
117 |   pip install -U 'git+git://github.com/hylang/hy.git@a3bd90390cb37b46ae33ce3a73ee84a0feacce7d#egg=hy'
118 | 
119 | If you are on OSX, then install lxml on OSX separately:::
120 |    
121 |    STATIC_DEPS=true pip install lxml
122 | 
123 | Then install the ``processor``:::
124 | 
125 |     pip install processor
126 | 
127 | Usage
128 | =====
129 | 
130 | Now create an executable python script, where you'll place your pipline's configuration.
131 | For example, this simple code creates a process line which searches new results in Twitter
132 | and outputs them to console. Of cause, you can output them not only to console, but also
133 | post by email, to Slack chat or everywhere else if there is an output for it:
134 | 
135 | .. code:: python
136 | 
137 |     #!env/bin/python3
138 |     import os
139 |     from processor import run_pipeline, sources, outputs
140 |     from twiggy_goodies.setup import setup_logging
141 | 
142 | 
143 |     for_any_message = lambda msg: True
144 | 
145 |     def prepare(tweet):
146 |         return {'text': tweet['text'],
147 |                 'from': tweet['user']['screen_name']}
148 | 
149 |     setup_logging('twitter.log')
150 | 
151 |     run_pipeline(
152 |         sources=[sources.twitter.search(
153 |             'My Company',
154 |             consumer_key='***', consumer_secret='***',
155 |             access_token='***', access_secret='***',
156 |             )],
157 |         rules=[(for_any_message, [prepare, outputs.debug()])])
158 | 
159 | 
160 | Running this code, will fetch new results for search by query ``My Company``
161 | and output them on the screen. Of course, you could use any other ``output``,
162 | supported by the ``processor``. Browse online documentation to find out
163 | which sources and outputs are supported and for to configure them.
164 | 
165 | 
166 | .. _full list of sources: sources.html
167 | .. _full list of outputs: outputs.html
168 | 
169 | 
170 | Ideas for Sources and Outputs
171 | =============================
172 | 
173 | * ``web-hook`` endpoint `(in progress)`.
174 | * ``tail`` source which reads file and outputs lines appeared in a file between invocations
175 |   or is able to emulate ``tail -f`` behaviour. Python module
176 |   `tailer <https://pypi.python.org/pypi/tailer/>`_ could be used here.
177 | * ``grep`` output -- a filter to grep some fields using patterns. With ``tail`` and ``grep``
178 |   you could build a pipeline which watch on a log and send errors by email or to the chat.
179 | * ``xmpp`` output.
180 | * ``irc`` output.
181 | * ``rss/atom feed reader``.
182 | * ``weather`` source which tracks tomorrow's weather forecast and outputs a message if it was
183 |   changed significantly, for example from "sunny" to "rainy".
184 | * ``github`` some integrations with github API?
185 | * ``jira`` or other task tracker of your choice?
186 | * `suggest your ideas!`
187 | 
188 | 
189 | Documentation
190 | =============
191 | 
192 | https://python-processor.readthedocs.org/
193 | 
194 | 
195 | Development
196 | ===========
197 | 
198 | To run the all tests run::
199 | 
200 |     tox
201 | 
202 | .. include:: AUTHORS.rst
203 | .. include:: CHANGELOG.rst
204 | 
205 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: '{build}'
 2 | build: off
 3 | environment:
 4 |   global:
 5 |     WITH_COMPILER: "cmd /E:ON /V:ON /C .\\ci\\appveyor-with-compiler.cmd"
 6 |   matrix:
 7 |     - TOXENV: check
 8 |       PYTHON_HOME: "C:\\Python27"
 9 |       PYTHON_VERSION: "2.7"
10 |       PYTHON_ARCH: "32"
11 |     - TOXENV: "3.3"
12 |       TOXPYTHON: "C:\\Python33\\python.exe"
13 |       WINDOWS_SDK_VERSION: "v7.1"
14 |       PYTHON_HOME: "C:\\Python33"
15 |       PYTHON_VERSION: "3.3"
16 |       PYTHON_ARCH: "32"
17 |     - TOXENV: "3.3"
18 |       TOXPYTHON: "C:\\Python33-x64\\python.exe"
19 |       WINDOWS_SDK_VERSION: "v7.1"
20 |       PYTHON_HOME: "C:\\Python33-x64"
21 |       PYTHON_VERSION: "3.3"
22 |       PYTHON_ARCH: "64"
23 |     - TOXENV: "3.3-nocover"
24 |       TOXPYTHON: "C:\\Python33\\python.exe"
25 |       WINDOWS_SDK_VERSION: "v7.1"
26 |       PYTHON_HOME: "C:\\Python33"
27 |       PYTHON_VERSION: "3.3"
28 |       PYTHON_ARCH: "32"
29 |     - TOXENV: "3.3-nocover"
30 |       TOXPYTHON: "C:\\Python33-x64\\python.exe"
31 |       WINDOWS_SDK_VERSION: "v7.1"
32 |       PYTHON_HOME: "C:\\Python33-x64"
33 |       PYTHON_VERSION: "3.3"
34 |       PYTHON_ARCH: "64"
35 |     - TOXENV: "3.4"
36 |       TOXPYTHON: "C:\\Python34\\python.exe"
37 |       WINDOWS_SDK_VERSION: "v7.1"
38 |       PYTHON_HOME: "C:\\Python34"
39 |       PYTHON_VERSION: "3.4"
40 |       PYTHON_ARCH: "32"
41 |     - TOXENV: "3.4"
42 |       TOXPYTHON: "C:\\Python34-x64\\python.exe"
43 |       WINDOWS_SDK_VERSION: "v7.1"
44 |       PYTHON_HOME: "C:\\Python34-x64"
45 |       PYTHON_VERSION: "3.4"
46 |       PYTHON_ARCH: "64"
47 |     - TOXENV: "3.4-nocover"
48 |       TOXPYTHON: "C:\\Python34\\python.exe"
49 |       WINDOWS_SDK_VERSION: "v7.1"
50 |       PYTHON_HOME: "C:\\Python34"
51 |       PYTHON_VERSION: "3.4"
52 |       PYTHON_ARCH: "32"
53 |     - TOXENV: "3.4-nocover"
54 |       TOXPYTHON: "C:\\Python34-x64\\python.exe"
55 |       WINDOWS_SDK_VERSION: "v7.1"
56 |       PYTHON_HOME: "C:\\Python34-x64"
57 |       PYTHON_VERSION: "3.4"
58 |       PYTHON_ARCH: "64"
59 | init:
60 |   - "ECHO %TOXENV%"
61 |   - ps: "ls C:\\Python*"
62 | install:
63 |   - "powershell ci\\appveyor-bootstrap.ps1"
64 | test_script:
65 |   - "%PYTHON_HOME%\\Scripts\\tox --version"
66 |   - "%PYTHON_HOME%\\Scripts\\virtualenv --version"
67 |   - "%PYTHON_HOME%\\Scripts\\pip --version"
68 |   - "%WITH_COMPILER% %PYTHON_HOME%\\Scripts\\tox"
69 | after_test:
70 |   - "IF \"%TOXENV:~-8,8%\" == \"-nocover\" %WITH_COMPILER% %TOXPYTHON% setup.py bdist_wheel"
71 | artifacts:
72 |   - path: dist\*
73 | 
74 | 


--------------------------------------------------------------------------------
/bootstrap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | if __name__ == '__main__':
 4 |     import os
 5 |     import sys
 6 |     if not os.path.exists('.tox/configure'):
 7 |         import subprocess
 8 |         print("Bootstrapping ...")
 9 |         subprocess.check_call(['python', '-mvirtualenv', '.tox/configure'])
10 |         print("Installing `jinja2` and `matrix` into bootstrap environment ...")
11 |         if sys.platform == 'win32':
12 |             subprocess.check_call([r'.tox\configure\Scripts\pip', 'install', 'jinja2', 'matrix'])
13 |         else:
14 |             subprocess.check_call(['.tox/configure/bin/pip', 'install', 'jinja2', 'matrix'])
15 |     if sys.platform == 'win32':
16 |         exec(compile(
17 |             open(r'.tox\configure\Scripts\activate_this.py').read(),
18 |             r'.tox\configure\Scripts\activate_this.py',
19 |             'exec'
20 |         ), dict(__file__=r'.tox\configure\Scripts\activate_this.py'))
21 |     else:
22 |         exec(compile(
23 |             open('.tox/configure/bin/activate_this.py').read(),
24 |             '.tox/configure/bin/activate_this.py',
25 |             'exec'
26 |         ), dict(__file__='.tox/configure/bin/activate_this.py'))
27 |     import jinja2
28 |     import matrix
29 | 
30 |     jinja = jinja2.Environment(
31 |         loader=jinja2.FileSystemLoader(os.path.join('ci', 'templates')),
32 |         trim_blocks=True,
33 |         lstrip_blocks=True,
34 |         keep_trailing_newline=True
35 |     )
36 |     tox_environments = {}
37 |     for alias, conf in matrix.from_file('setup.cfg').items():
38 |         python = conf['python_versions']
39 |         deps = conf['dependencies']
40 |         if 'coverage_flags' in conf:
41 |             cover = {'false': False, 'true': True}[conf['coverage_flags'].lower()]
42 |         if 'environment_variables' in conf:
43 |             env_vars = conf['environment_variables']
44 | 
45 |         tox_environments[alias] = {
46 |             'python': 'python' + python if 'py' not in python else python,
47 |             'deps': deps.split(),
48 |         }
49 |         if 'coverage_flags' in conf:
50 |             tox_environments[alias].update(cover=cover)
51 |         if 'environment_variables' in conf:
52 |             tox_environments[alias].update(env_vars=env_vars.split())
53 | 
54 |     for name in os.listdir(os.path.join('ci', 'templates')):
55 |         with open(name, 'w') as fh:
56 |             fh.write(jinja.get_template(name).render(tox_environments=tox_environments))
57 |         print("Wrote %s" % name)
58 | 
59 |     print("DONE.")
60 | 


--------------------------------------------------------------------------------
/ci/appveyor-bootstrap.ps1:
--------------------------------------------------------------------------------
 1 | # Source: https://github.com/pypa/python-packaging-user-guide/blob/master/source/code/install.ps1
 2 | # Sample script to install Python and pip under Windows
 3 | # Authors: Olivier Grisel and Kyle Kastner
 4 | # License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
 5 | 
 6 | $BASE_URL = "https://www.python.org/ftp/python/"
 7 | $GET_PIP_URL = "https://bootstrap.pypa.io/get-pip.py"
 8 | $GET_PIP_PATH = "C:\get-pip.py"
 9 | 
10 | 
11 | function DownloadPython ($python_version, $platform_suffix) {
12 |     $webclient = New-Object System.Net.WebClient
13 |     $filename = "python-" + $python_version + $platform_suffix + ".msi"
14 |     $url = $BASE_URL + $python_version + "/" + $filename
15 | 
16 |     $basedir = $pwd.Path + "\"
17 |     $filepath = $basedir + $filename
18 |     if (Test-Path $filename) {
19 |         Write-Host "Reusing" $filepath
20 |         return $filepath
21 |     }
22 | 
23 |     # Download and retry up to 5 times in case of network transient errors.
24 |     Write-Host "Downloading" $filename "from" $url
25 |     $retry_attempts = 3
26 |     for($i=0; $i -lt $retry_attempts; $i++){
27 |         try {
28 |             $webclient.DownloadFile($url, $filepath)
29 |             break
30 |         }
31 |         Catch [Exception]{
32 |             Start-Sleep 1
33 |         }
34 |    }
35 |    Write-Host "File saved at" $filepath
36 |    return $filepath
37 | }
38 | 
39 | 
40 | function InstallPython ($python_version, $architecture, $python_home) {
41 |     Write-Host "Installing Python" $python_version "for" $architecture "bit architecture to" $python_home
42 |     if (Test-Path $python_home) {
43 |         Write-Host $python_home "already exists, skipping."
44 |         return $false
45 |     }
46 |     if ($architecture -eq "32") {
47 |         $platform_suffix = ""
48 |     } else {
49 |         $platform_suffix = ".amd64"
50 |     }
51 |     $filepath = DownloadPython $python_version $platform_suffix
52 |     Write-Host "Installing" $filepath "to" $python_home
53 |     $args = "/qn /i $filepath TARGETDIR=$python_home"
54 |     Write-Host "msiexec.exe" $args
55 |     Start-Process -FilePath "msiexec.exe" -ArgumentList $args -Wait -Passthru
56 |     Write-Host "Python $python_version ($architecture) installation complete"
57 |     return $true
58 | }
59 | 
60 | 
61 | function InstallPip ($python_home) {
62 |     $pip_path = $python_home + "/Scripts/pip.exe"
63 |     $python_path = $python_home + "/python.exe"
64 |     if (-not(Test-Path $pip_path)) {
65 |         Write-Host "Installing pip..."
66 |         $webclient = New-Object System.Net.WebClient
67 |         $webclient.DownloadFile($GET_PIP_URL, $GET_PIP_PATH)
68 |         Write-Host "Executing:" $python_path $GET_PIP_PATH
69 |         Start-Process -FilePath "$python_path" -ArgumentList "$GET_PIP_PATH" -Wait -Passthru
70 |     } else {
71 |         Write-Host "pip already installed."
72 |     }
73 | }
74 | 
75 | function InstallPackage ($python_home, $pkg) {
76 |     $pip_path = $python_home + "/Scripts/pip.exe"
77 |     & $pip_path install $pkg
78 | }
79 | 
80 | function main () {
81 |     InstallPython $env:PYTHON_VERSION $env:PYTHON_ARCH $env:PYTHON_HOME
82 |     InstallPip $env:PYTHON_HOME
83 |     InstallPackage $env:PYTHON_HOME setuptools
84 |     InstallPackage $env:PYTHON_HOME wheel
85 |     InstallPackage $env:PYTHON_HOME tox
86 | }
87 | 
88 | main
89 | 


--------------------------------------------------------------------------------
/ci/appveyor-with-compiler.cmd:
--------------------------------------------------------------------------------
 1 | :: To build extensions for 64 bit Python 3, we need to configure environment
 2 | :: variables to use the MSVC 2010 C++ compilers from GRMSDKX_EN_DVD.iso of:
 3 | :: MS Windows SDK for Windows 7 and .NET Framework 4 (SDK v7.1)
 4 | ::
 5 | :: To build extensions for 64 bit Python 2, we need to configure environment
 6 | :: variables to use the MSVC 2008 C++ compilers from GRMSDKX_EN_DVD.iso of:
 7 | :: MS Windows SDK for Windows 7 and .NET Framework 3.5 (SDK v7.0)
 8 | ::
 9 | :: 32 bit builds do not require specific environment configurations.
10 | ::
11 | :: Note: this script needs to be run with the /E:ON and /V:ON flags for the
12 | :: cmd interpreter, at least for (SDK v7.0)
13 | ::
14 | :: More details at:
15 | :: https://github.com/cython/cython/wiki/64BitCythonExtensionsOnWindows
16 | :: http://stackoverflow.com/a/13751649/163740
17 | ::
18 | :: Author: Olivier Grisel
19 | :: License: CC0 1.0 Universal: http://creativecommons.org/publicdomain/zero/1.0/
20 | @ECHO OFF
21 | 
22 | SET COMMAND_TO_RUN=%*
23 | SET WIN_SDK_ROOT=C:\Program Files\Microsoft SDKs\Windows
24 | 
25 | IF "%PYTHON_ARCH%"=="64" (
26 |     ECHO SDK: %WINDOWS_SDK_VERSION% ARCH: %PYTHON_ARCH%
27 |     SET DISTUTILS_USE_SDK=1
28 |     SET MSSdk=1
29 |     "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Setup\WindowsSdkVer.exe" -q -version:%WINDOWS_SDK_VERSION%
30 |     "%WIN_SDK_ROOT%\%WINDOWS_SDK_VERSION%\Bin\SetEnv.cmd" /x64 /release
31 |     ECHO Executing: %COMMAND_TO_RUN%
32 |     call %COMMAND_TO_RUN% || EXIT 1
33 | ) ELSE (
34 |     ECHO SDK: %WINDOWS_SDK_VERSION% ARCH: %PYTHON_ARCH%
35 |     ECHO Executing: %COMMAND_TO_RUN%
36 |     call %COMMAND_TO_RUN% || EXIT 1
37 | )
38 | 


--------------------------------------------------------------------------------
/ci/templates/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python: 2.7
 3 | sudo: false
 4 | env:
 5 |   global:
 6 |     LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so
 7 |   matrix:
 8 |     - TOXENV=check
 9 | {% for env, config in tox_environments|dictsort %}
10 |     - TOXENV={{ env }}{% if config.cover %},coveralls{% endif %}
11 | 
12 | {% endfor %}
13 | before_install:
14 |   - python --version
15 |   - virtualenv --version
16 |   - pip --version
17 |   - uname -a
18 |   - lsb_release -a
19 | install:
20 |   - pip install tox
21 | script:
22 |   - tox -v
23 | notifications:
24 |   email:
25 |     on_success: never
26 |     on_failure: always
27 | 
28 | 


--------------------------------------------------------------------------------
/ci/templates/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: '{build}'
 2 | build: off
 3 | environment:
 4 |   global:
 5 |     WITH_COMPILER: "cmd /E:ON /V:ON /C .\\ci\\appveyor-with-compiler.cmd"
 6 |   matrix:
 7 |     - TOXENV: check
 8 |       PYTHON_HOME: "C:\\Python27"
 9 |       PYTHON_VERSION: "2.7"
10 |       PYTHON_ARCH: "32"
11 | {% for env, config in tox_environments|dictsort %}{% if env.startswith('2.7') or env.startswith('3.4') or env.startswith('3.3') %}
12 |     - TOXENV: "{{ env }}"
13 |       TOXPYTHON: "C:\\Python{{ env[:3].replace('.', '') }}\\python.exe"
14 |       WINDOWS_SDK_VERSION: "v7.{{ '1' if env[0] == '3' else '0' }}"
15 |       PYTHON_HOME: "C:\\Python{{ env[:3].replace('.', '') }}"
16 |       PYTHON_VERSION: "{{ env[:3] }}"
17 |       PYTHON_ARCH: "32"
18 |     - TOXENV: "{{ env }}"
19 |       TOXPYTHON: "C:\\Python{{ env[:3].replace('.', '') }}-x64\\python.exe"
20 |       WINDOWS_SDK_VERSION: "v7.{{ '1' if env[0] == '3' else '0' }}"
21 |       PYTHON_HOME: "C:\\Python{{ env[:3].replace('.', '') }}-x64"
22 |       PYTHON_VERSION: "{{ env[:3] }}"
23 |       PYTHON_ARCH: "64"
24 | {% endif %}{% endfor %}
25 | init:
26 |   - "ECHO %TOXENV%"
27 |   - ps: "ls C:\\Python*"
28 | install:
29 |   - "powershell ci\\appveyor-bootstrap.ps1"
30 | test_script:
31 |   - "%PYTHON_HOME%\\Scripts\\tox --version"
32 |   - "%PYTHON_HOME%\\Scripts\\virtualenv --version"
33 |   - "%PYTHON_HOME%\\Scripts\\pip --version"
34 |   - "%WITH_COMPILER% %PYTHON_HOME%\\Scripts\\tox"
35 | after_test:
36 |   - "IF \"%TOXENV:~-8,8%\" == \"-nocover\" %WITH_COMPILER% %TOXPYTHON% setup.py bdist_wheel"
37 | artifacts:
38 |   - path: dist\*
39 | 
40 | 


--------------------------------------------------------------------------------
/ci/templates/tox.ini:
--------------------------------------------------------------------------------
  1 | [tox]
  2 | envlist =
  3 |     clean,
  4 |     check,
  5 | {% for env in tox_environments|sort %}
  6 |     {{ env }},
  7 | {% endfor %}
  8 |     report,
  9 |     docs
 10 | 
 11 | [testenv]
 12 | setenv =
 13 |     PYTHONPATH={toxinidir}/tests
 14 |     PYTHONUNBUFFERED=yes
 15 | deps =
 16 |     pytest
 17 |     pytest-capturelog
 18 | commands =
 19 |     {posargs:py.test -vv --ignore=src}
 20 | 
 21 | [testenv:spell]
 22 | setenv =
 23 |     SPELLCHECK = 1
 24 | commands =
 25 |     sphinx-build -b spelling docs dist/docs
 26 | usedevelop = true
 27 | deps =
 28 |     -r{toxinidir}/docs/requirements.txt
 29 |     sphinxcontrib-spelling
 30 |     pyenchant
 31 | 
 32 | [testenv:docs]
 33 | whitelist_externals =
 34 |     rm
 35 | commands =
 36 |     rm -rf dist/docs || rmdir /S /Q dist\docs
 37 |     sphinx-build -b html docs dist/docs
 38 |     sphinx-build -b linkcheck docs dist/docs
 39 | usedevelop = true
 40 | deps =
 41 |     -r{toxinidir}/docs/requirements.txt
 42 | 
 43 | [testenv:configure]
 44 | deps =
 45 |     jinja2
 46 |     matrix
 47 | usedevelop = true
 48 | commands =
 49 |     python bootstrap.py
 50 | 
 51 | [testenv:check]
 52 | basepython = python3.4
 53 | deps =
 54 |     docutils
 55 |     check-manifest
 56 |     flake8
 57 |     collective.checkdocs
 58 |     pygments
 59 | usedevelop = true
 60 | commands =
 61 |     python setup.py checkdocs
 62 |     python setup.py check --strict --metadata
 63 |     check-manifest {toxinidir}
 64 |     flake8 src
 65 | 
 66 | [testenv:coveralls]
 67 | deps =
 68 |     coveralls
 69 | usedevelop = true
 70 | commands =
 71 |     coverage combine
 72 |     coverage report
 73 |     coveralls
 74 | 
 75 | [testenv:report]
 76 | basepython = python3.4
 77 | commands =
 78 |     coverage combine
 79 |     coverage report
 80 | usedevelop = true
 81 | deps = coverage
 82 | 
 83 | [testenv:clean]
 84 | commands = coverage erase
 85 | usedevelop = true
 86 | deps = coverage
 87 | 
 88 | {% for env, config in tox_environments|dictsort %}
 89 | [testenv:{{ env }}]
 90 | basepython = {{ config.python }}
 91 | {% if config.cover or config.env_vars %}
 92 | setenv =
 93 |     {[testenv]setenv}
 94 | {% endif %}
 95 | {% for var in config.env_vars %}
 96 |     {{ var }}
 97 | {% endfor %}
 98 | {% if config.cover %}
 99 |     WITH_COVERAGE=yes
100 | usedevelop = true
101 | commands =
102 |     {posargs:py.test --cov=src --cov-report=term-missing -vv}
103 | {% endif %}
104 | {% if config.cover or config.deps %}
105 | deps =
106 |     {[testenv]deps}
107 | {% endif %}
108 | {% if config.cover %}
109 |     pytest-cov
110 | {% endif %}
111 | {% for dep in config.deps %}
112 |     {{ dep }}
113 | {% endfor %}
114 | 
115 | {% endfor %}
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGELOG.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import re
 3 | import os
 4 | extensions = [
 5 |     'sphinx.ext.autodoc',
 6 |     'sphinx.ext.autosummary',
 7 |     'sphinx.ext.todo',
 8 |     'sphinx.ext.coverage',
 9 |     'sphinx.ext.ifconfig',
10 |     'sphinx.ext.viewcode',
11 |     'sphinxcontrib.napoleon'
12 | ]
13 | if os.getenv('SPELLCHECK'):
14 |     extensions += 'sphinxcontrib.spelling',
15 |     spelling_show_suggestions = True
16 |     spelling_lang = 'en_US'
17 | 
18 | source_suffix = '.rst'
19 | master_doc = 'index'
20 | project = u'processor'
21 | copyright = u'2015, Alexander Artemenko'
22 | version = release = re.findall(
23 |     'version="(.*)"',
24 |     open(os.path.join(os.path.dirname(__file__), '../setup.py')).read()
25 | )[0]
26 | 
27 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
28 | 
29 | if on_rtd:
30 |     # workaround for making build on read the docs pass
31 |     # see issue https://github.com/rtfd/readthedocs.org/issues/1202
32 |     html_theme = 'sphinx_rtd_theme'
33 | else:
34 |     import sphinx_py3doc_enhanced_theme
35 |     html_theme = "sphinx_py3doc_enhanced_theme"
36 |     html_theme_path = [sphinx_py3doc_enhanced_theme.get_html_theme_path()]
37 |     html_theme_options = {
38 |         'githuburl': 'https://github.com/svetlyak40wt/python-processor/'
39 |     }
40 | 
41 | 
42 | pygments_style = 'trac'
43 | templates_path = ['.']
44 | html_use_smartypants = True
45 | html_last_updated_fmt = '%b %d, %Y'
46 | html_split_index = True
47 | html_sidebars = {
48 |    '**': ['searchbox.html', 'globaltoc.html', 'sourcelink.html'],
49 | }
50 | html_short_title = '%s-%s' % (project, version)
51 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/images/github-private-token.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/svetlyak40wt/python-processor/9126a021d603030899897803ab9973250e5b16f6/docs/images/github-private-token.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | ======================================
 2 | Welcome to processor's documentation!
 3 | ======================================
 4 | 
 5 | .. include:: overview.rst
 6 | 
 7 | 
 8 | What is next?
 9 | =============
10 | 
11 | Read about `sources`_, `outputs`_ and try to build you own pipeline!
12 | 
13 | And please, `send you contributions`_ as pull requests. Writing new sources and outputs is easier than you think!
14 | 
15 | .. _sources: sources.html
16 | .. _outputs: outputs.html
17 | .. _send you contributions: https://github.com/svetlyak40wt/python-processor
18 |              
19 | .. toctree::
20 |    :maxdepth: 2
21 |    :hidden:
22 | 
23 |    sources
24 |    outputs
25 |    contributing
26 |    authors
27 |    changelog
28 | 


--------------------------------------------------------------------------------
/docs/outputs.rst:
--------------------------------------------------------------------------------
  1 | =======
  2 | Outputs
  3 | =======
  4 | 
  5 | debug
  6 | =====
  7 | 
  8 | This output is very useful for debugging you input. All it does right
  9 | now -- returns ``pprint`` function, but possible interface will be extended
 10 | in future to select which fields to ouput or suppress, cache or something like
 11 | that.
 12 | 
 13 | fanout
 14 | ======
 15 | 
 16 | Fanout output is useful, when you want to feed one data objects stream to two
 17 | or more pipelines. For example, you could send some events by email and into
 18 | the `slack`_ chat simultaneously::
 19 | 
 20 |   run_pipeline(some_source(),
 21 |                outputs.fanout(
 22 |                   outputs.email('vaily@pupkin.name'),
 23 |                   outputs.slack(SLACK_URL)))
 24 | 
 25 | Or if you need to preprocess data objects for each output, then code will
 26 | looks like this::
 27 | 
 28 |   run_pipeline(some_source(),
 29 |                outputs.fanout(
 30 |                   [prepare_email, outputs.email('vaily@pupkin.name')],
 31 |                   [prepare_slack, outputs.slack(SLACK_URL)]))
 32 | 
 33 | Where ``prepare_email`` and ``prepare_slack`` just a functions which return
 34 | data objects with fields for `email` and `slack`_ outputs.
 35 | 
 36 | 
 37 | email
 38 | =====
 39 | 
 40 | Sends an email to given address via configured SMTP server.
 41 | When configuring, you have to specify ``host``, ``port``, ``user`` and ``password``.
 42 | And also a ``mail_to``, which is an email of recipient who should receive a message
 43 | and ``mail_from`` which should be a tuple like ``(name, email)`` and designate
 44 | sender. Here is an example::
 45 | 
 46 |   run_pipeline(
 47 |     [{'subject': 'Hello from processor',
 48 |       'body': 'The <b>HTML</b> body.'}],
 49 |     outputs.email(mail_to='somebody@gmail.com',
 50 |                   mail_from=('Processor', 'processor@yandex.ru'),
 51 |                   host='smtp.yandex.ru',
 52 |                   user='processor',
 53 |                   password='***',
 54 |                   port=465,
 55 |                   ssl=True,
 56 |               ))
 57 | 
 58 | 
 59 | Each data object should contain these fields:
 60 | 
 61 | **subject**
 62 |     Email's subject
 63 | **body**
 64 |     HTML body of the email.
 65 | 
 66 | 
 67 | rss
 68 | ===
 69 | 
 70 | Creates an RSS feed on the disk. Has one required parameter --
 71 | ``filename`` and one optional -- ``limit``, which is ``10`` by default and
 72 | limiting result feed's length.
 73 | 
 74 | Each data object should contain these fields:
 75 | 
 76 | **title**
 77 |     Feed item's title.
 78 | **id** (optional)
 79 |     Feed item's unique identifier. If not provided, then md5 hash from title will be used.
 80 | **body**
 81 |     Any text to be placed inside of rss item's body.
 82 | 
 83 | 
 84 | slack
 85 | =====
 86 | 
 87 | Write a message to Slack chat. A message could be sent to a
 88 | channel or directly to somebody.
 89 | 
 90 | This output has one required parameter ``url``. You could
 91 | obtain it at the Slack's integrations page. Select "Incoming WebHooks"
 92 | among all available integrations. Add a hook and copy it's ``url``
 93 | into the script. Other parameter is ``defaults``. It is a dict to be merged with each data object and by default it has ``{"renderer": "markdown", "username": "Processor"}`` value.
 94 | 
 95 | Each data object should contain these fields:
 96 | 
 97 | **text**
 98 |     Text of the message to be posted. This is only required field. Other fields are optional and described on Slack's integration page.
 99 | **username** (optional)
100 |     A name to be displayed as sender's name.
101 | **icon_url** (optional)
102 |     A link to png icon. It should be 57x57 pixels.
103 | **icon_emoji** (optional)
104 |     An emoji string. Choose one at `Emoji Cheat Sheet`_.
105 | **channel**
106 |     A public channel can be specified with ``#other-channel``, and a Direct Message with ``@username``.
107 | 
108 |     
109 | XMPP
110 | =====
111 | 
112 | XMPP output sends messages to given jabber id (JID). It connects
113 | as a Jabber client to a server and sends messages through it.
114 | 
115 | .. Note::
116 |    If you use Google's xmpp, then you will need to add Bot's JID into
117 |    your roster. Otherwise, messages will not be accepted by server.
118 |    
119 | This output is configured by three parameters ``jid``, ``password`` and ``host``.
120 | They are used to connect to a server as a jabber client. Optionally,
121 | you could specify ``port`` (which is 5222 by default) and ``recipients`` –
122 | a list of who need to be notified. Recipients list could be overriden
123 | if data object contains field ``recipients``.
124 | 
125 | Each data object should contain these fields:
126 | 
127 | **text**
128 |     Text of the message to be posted.
129 | **recipients** (optional)
130 |     A list of JIDs to be notified.
131 | 
132 | 


--------------------------------------------------------------------------------
/docs/overview.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 |    :start-after: https://scrutinizer-ci.com/g/svetlyak40wt/python-processor/
3 |    :end-before: Documentation
4 | 
5 | 


--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
1 | Reference
2 | =========
3 | 
4 | .. toctree::
5 |     :glob:
6 | 
7 |     processor*
8 | 


--------------------------------------------------------------------------------
/docs/reference/processor.rst:
--------------------------------------------------------------------------------
1 | processor
2 | =============================
3 | 
4 | .. automodule:: processor
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx
2 | sphinxcontrib-napoleon
3 | sphinx-py3doc-enhanced-theme
4 | -e .
5 | 


--------------------------------------------------------------------------------
/docs/sources.rst:
--------------------------------------------------------------------------------
  1 | =======
  2 | Sources
  3 | =======
  4 | 
  5 | .. _full list of sources:
  6 | 
  7 | mix
  8 | ====
  9 | 
 10 | This is a helper to mix data objects from two or more sources into one stream.
 11 | When mixed, dataobjects are interleaved. For example::
 12 | 
 13 |   >>> from processor import sources
 14 |   >>> source1 = [1,2,3]
 15 |   >>> source2 = [5,6,7,8]
 16 |   >>> print(list(sources.mix(source1, source2)))
 17 |   
 18 |   [1, 5, 2, 6, 3, 7, 8]
 19 | 
 20 | Mix source iterates through each given source until it raises StopIteration.
 21 | That means, if you'll give it an infinite sources like a `web.hook`_, then
 22 | resulting source also will be infinite.
 23 | 
 24 | imap
 25 | ====
 26 | 
 27 | Imap source is able to read new emails from specified folder on IMAP server.
 28 | All you need is to specify server's address, optional port and user credentials:
 29 | 
 30 | Example::
 31 | 
 32 |   from processor import run_pipeline, source, outputs
 33 |   run_pipeline(
 34 |       sources.imap("imap.gmail.com",
 35 |                             "username",
 36 |                             "****word",
 37 |                             "Inbox"),
 38 |       outputs.debug())
 39 | 
 40 | This script will read ``Inbox`` folder at server ``imap.gmail.com``
 41 | and print resulting dicts to the terminal's screen.
 42 | 
 43 | github
 44 | ======
 45 | 
 46 | Access to private repositories
 47 | ------------------------------
 48 | 
 49 | To have access to private repositories, you need to generate a "personal
 50 | access token" at the GitHub.
 51 | 
 52 | All you need to do this, is to click `on the image below`_ and it will open a page
 53 | with only scopes needed for the Processor:
 54 | 
 55 | .. image:: images/github-private-token.png
 56 |            :target: `on the image below`_
 57 |    
 58 | .. _on the image below: https://github.com/settings/tokens/new?scopes=repo,public_repo
 59 | 
 60 | Then copy this token into the clipboard and pass it as a ``access_token`` parameter to each
 61 | ``github.****`` source.
 62 | 
 63 | .. Note::
 64 |    Access token not only let the processor read from private repositories,
 65 |    but also makes rate limits higher, so you could poll GitHub's API more frequently.
 66 | 
 67 |    Without token you can make only 60 request per hour, but with token – 5000 requests per hour.
 68 | 
 69 | github.releases
 70 | ---------------
 71 | 
 72 | Outputs new releases of the given repository. On first call, it will output all the most recent
 73 | releases, then remeber position on next calls will return only new releases if any were found.
 74 | 
 75 | Example::
 76 | 
 77 |   from processor import run_pipeline, source, outputs
 78 |   
 79 |   github_creds = dict(access_token='keep-it-in-secret')
 80 |   run_pipeline(
 81 |       sources.github.releases('https://github.com/mozilla/metrics-graphics', **github_creds),
 82 |       outputs.debug())
 83 | 
 84 | This source returns following fields:
 85 | 
 86 | source
 87 |     github.releases
 88 | type
 89 |     github.release
 90 | payload
 91 |     The object returned by GitHub's API. See section "Response" at GitHub's docs on `repos/releases`_.
 92 | 
 93 | .. _repos/releases: https://developer.github.com/v3/repos/releases/#response
 94 | 
 95 |       
 96 | twitter
 97 | =======
 98 | 
 99 | .. Note::
100 |    To use this source, you need to obtain an access token from twitter.
101 |    There is a detailed instruction how to do this `Twitter's documentation`_.
102 |    You could encapsulate twitter credentials into a dict::
103 | 
104 |       twitter_creds = dict(consumer_key='***', consumer_secret='***',
105 |                            access_token='***', access_secret='***')
106 |       sources.twitter.search('Some query', **twitter_creds)
107 |       sources.twitter.followers(**twitter_creds)
108 | 
109 | 
110 | .. _Twitter's documentation: https://dev.twitter.com/oauth/overview/application-owner-access-tokens
111 | 
112 | twitter.search
113 | --------------
114 | 
115 | This source runs search by given query in Twitter and returns fresh
116 | results::
117 | 
118 |   from processor import run_pipeline, source, outputs
119 |   run_pipeline(
120 |       sources.twitter.search('iOS release notes', **twitter_creds),
121 |       outputs.debug())
122 | 
123 | It returns following fields:
124 | 
125 | source
126 |     twitter.search
127 | type
128 |     twitter.tweet
129 | *other*
130 |     Other fields are same as them returns Twitter API. See section "Example Result" at twitter's docs on `search/tweets`_.
131 | 
132 | .. _search/tweets: https://dev.twitter.com/rest/reference/get/search/tweets
133 | 
134 | 
135 | twitter.followers
136 | -----------------
137 | 
138 | First invocation returns all who you follows, each next -- only new followers::
139 | 
140 |   from processor import run_pipeline, source, outputs
141 |   run_pipeline(
142 |       sources.twitter.followers(**twitter_creds),
143 |       outputs.debug())
144 | 
145 | 
146 | It returns following fields:
147 | 
148 | source
149 |     twitter.followers
150 | type
151 |     twitter.user
152 | *other*
153 |     Other fields are same as them returns Twitter API. See section "Example Result" at twitter's docs on `followers/list`_.
154 | 
155 | .. _followers/list: https://dev.twitter.com/rest/reference/get/followers/list
156 | 
157 | 
158 | web.hook
159 | ========
160 | 
161 | This source starts a webserver which listens on a given interface and port.
162 | All GET and POST requests are transformed into the data objects.
163 | 
164 | Configuration example::
165 |   
166 |   run_pipeline(sources.web.hook(host='0.0.0.0', port=1999),
167 |                outputs.debug())
168 | 
169 | By default, it starts on ``localhost:8000``, but in this case on
170 | ``0.0.0.0:1999``.
171 | 
172 | Here is example of data objects, produced by this source when somebody
173 | posts JSON::
174 | 
175 |   {'data': {'some-value': 0},
176 |    'headers': {'Accept': 'application/json',
177 |      'Accept-Encoding': 'gzip, deflate',
178 |      'Connection': 'keep-alive',
179 |      'Content-Length': '17',
180 |      'Content-Type': 'application/json; charset=utf-8',
181 |      'Host': '127.0.0.1:1999',
182 |      'User-Agent': 'HTTPie/0.8.0'},
183 |    'method': 'POST',
184 |    'path': '/the-hook',
185 |    'query': {'query': ['var']},
186 |    'source': 'web.hook',
187 |    'type': 'http-request'}
188 | 
189 | 
190 | This source returns data objects with following fields:
191 | 
192 | source
193 |     web.hook
194 | type
195 |     http-request
196 | method
197 |     GET or POST
198 | path
199 |     Resource path without query arguments
200 | query
201 |     Query arguments
202 | headers
203 |     A headers dictionary. Please, note, this is usual dictionary with case sensitive keys.
204 | data
205 |     Request data, if this was a POST, None for GET. If requests has ``application/json`` content type, then
206 |     data decoded automatically into the python representation. For other content types, if there is
207 |     charset part, then data is decoded from bytes into a string, otherwise, it remains as bytes.
208 | 
209 | 
210 | .. Note::
211 |    This source runs in blocking mode. This means it blocks ``run_pipeline`` execution until somebody interupt it.
212 | 
213 |    No other sources could be processed together with ``web.hook``.
214 | 


--------------------------------------------------------------------------------
/docs/spelling_wordlist.txt:
--------------------------------------------------------------------------------
 1 | builtin
 2 | builtins
 3 | classmethod
 4 | staticmethod
 5 | classmethods
 6 | staticmethods
 7 | args
 8 | kwargs
 9 | callstack
10 | Changelog
11 | Indices
12 | 


--------------------------------------------------------------------------------
/examples/do-123.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from processor import outputs, run_pipeline
 4 | 
 5 | 
 6 | def create_counter(name):
 7 |     return ({'counter': '{0} {1}'.format(name, i)}
 8 |             for i in range(10))
 9 | 
10 | run_pipeline([create_counter('bob'),
11 |               create_counter('joe')],
12 |              [(lambda item: True, outputs.debug())])
13 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pudb
2 | html2text
3 | invoke
4 | wheel
5 | # to run tests
6 | detox
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal = 1
 3 | 
 4 | [aliases]
 5 | release = register clean --all sdist bdist_wheel upload
 6 | 
 7 | [flake8]
 8 | max-line-length = 140
 9 | exclude = tests/*,*/migrations/*,*/south_migrations/*
10 | 
11 | [pytest]
12 | norecursedirs =
13 |     .git
14 |     .tox
15 |     dist
16 |     build
17 |     south_migrations
18 |     migrations
19 | python_files =
20 |     test_*.py
21 |     *_test.py
22 |     tests.py
23 | addopts =
24 |     -rxEfs
25 |     --strict
26 |     --ignore docs/conf.py
27 |     --ignore setup.py
28 |     --ignore bootstrap.py
29 |     --doctest-modules
30 |     --doctest-glob \*.rst
31 |     --tb short
32 | 
33 | [isort]
34 | force_single_line=True
35 | line_length=120
36 | known_first_party=processor
37 | default_section=THIRDPARTY
38 | forced_separate=test_processor
39 | 
40 | [matrix]
41 | # This is the configuration for the `./bootstrap.py` script.
42 | # It generates `.travis.yml`, `tox.ini` and `appveyor.yml`.
43 | #
44 | # Syntax: [alias:] value [!variable[glob]] [&variable[glob]]
45 | #
46 | # alias:
47 | #  - is used to generate the tox environment
48 | #  - it's optional
49 | #  - if not present the alias will be computed from the `value`
50 | # value:
51 | #  - a value of "-" means empty
52 | # !variable[glob]:
53 | #  - exclude the combination of the current `value` with 
54 | #    any value matching the `glob` in `variable`
55 | #  - can use as many you want
56 | # &variable[glob]:
57 | #  - only include the combination of the current `value` 
58 | #    when there's a value matching `glob` in `variable`
59 | #  - can use as many you want
60 | 
61 | python_versions =
62 |     2.6
63 |     2.7
64 |     3.3
65 |     3.4
66 |     pypy
67 | 
68 | dependencies =
69 | #    1.4: Django==1.4.16 !python_versions[3.*]
70 | #    1.5: Django==1.5.11
71 | #    1.6: Django==1.6.8
72 | #    1.7: Django==1.7.1 !python_versions[2.6]
73 | # Deps commented above are provided as examples. That's what you would use in a Django project.
74 | 
75 | coverage_flags =
76 |     : true
77 |     nocover: false
78 | 
79 | environment_variables =
80 |     -
81 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- encoding: utf-8 -*-
 2 | import io
 3 | import re
 4 | from glob import glob
 5 | from os.path import basename
 6 | from os.path import dirname
 7 | from os.path import join
 8 | from os.path import splitext
 9 | 
10 | from setuptools import find_packages
11 | from setuptools import setup
12 | 
13 | 
14 | def read(*names, **kwargs):
15 |     return io.open(
16 |         join(dirname(__file__), *names),
17 |         encoding=kwargs.get("encoding", "utf8")
18 |     ).read()
19 | 
20 | 
21 | def remove_rst_roles(text):
22 |     return re.sub(r':[a-z]+:`~?(.*?)`', r'``\1``', text)
23 | 
24 | 
25 | def expand_includes(text, path='.'):
26 |     """Recursively expands includes in given text."""
27 |     def read_and_expand(match):
28 |         filename = match.group('filename')
29 |         filename = join(path, filename)
30 |         text = read(filename)
31 |         return expand_includes(
32 |             text, path=join(path, dirname(filename)))
33 | 
34 |     return re.sub(r'^\.\. include:: (?P<filename>.*)$',
35 |                   read_and_expand,
36 |                   text,
37 |                   flags=re.MULTILINE)
38 | 
39 | 
40 | setup(
41 |     name="processor",
42 |     version="0.10.0",
43 |     license="BSD",
44 |     description="A microframework to build source -> filter -> action workflows.",
45 |     long_description=remove_rst_roles(expand_includes(read('README.rst'))),
46 |     author="Alexander Artemenko",
47 |     author_email="svetlyak.40wt@gmail.com",
48 |     url="https://github.com/svetlyak40wt/python-processor",
49 |     packages=find_packages("src"),
50 |     package_dir={"": "src"},
51 |     py_modules=[splitext(basename(path))[0] for path in glob("src/*.py")],
52 |     include_package_data=True,
53 |     zip_safe=False,
54 |     classifiers=[
55 |         # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers
56 |         "Development Status :: 3 - Alpha",
57 |         "Intended Audience :: Developers",
58 |         "License :: OSI Approved :: BSD License",
59 |         "Operating System :: Unix",
60 |         "Operating System :: POSIX",
61 |         "Programming Language :: Python",
62 |         "Programming Language :: Python :: 3",
63 |         "Programming Language :: Python :: 3.3",
64 |         "Programming Language :: Python :: 3.4",
65 |         "Programming Language :: Python :: Implementation :: CPython",
66 |         "Programming Language :: Python :: Implementation :: PyPy",
67 |         "Topic :: Utilities",
68 |     ],
69 |     keywords=[
70 |         'processing', 'devops', 'imap', 'rss', 'twitter'
71 |     ],
72 |     install_requires=[
73 |         'hy',
74 |         'twiggy-goodies>=0.7.0',
75 |     ],
76 |     extras_require={
77 |         'sources.imap': ['IMAPClient'],
78 |         'sources.twitter': ['requests-oauthlib'],
79 |         'sources.github': ['requests'],
80 |         'outputs.rss': ['feedgen'],
81 |         'outputs.slack': ['requests'],
82 |         'outputs.xmpp': ['sleekxmpp'],
83 |         # 'feedparser',
84 |         # 'python-dateutil',
85 |     },
86 |     entry_points={
87 |         "console_scripts": [
88 |             "processor = processor.__main__:main"
89 |         ]
90 |     },
91 | )
92 | 


--------------------------------------------------------------------------------
/src/processor/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | import hy
3 | from .version import *
4 | from .pipeline import *
5 | 


--------------------------------------------------------------------------------
/src/processor/outputs/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .debug import debug
3 | from .fanout import fanout
4 | from .rss import rss
5 | from .slack import slack
6 | from .email import email
7 | from .xmpp import xmpp
8 | 


--------------------------------------------------------------------------------
/src/processor/outputs/debug.hy:
--------------------------------------------------------------------------------
1 | (import [pprint [pprint]])
2 | 
3 | (defn debug []
4 |   (defn debug_processor [item]
5 |     (pprint item)
6 |     item))
7 | 


--------------------------------------------------------------------------------
/src/processor/outputs/email.hy:
--------------------------------------------------------------------------------
 1 | (require processor.utils.macro)
 2 | 
 3 | 
 4 | (defn email [&optional mail_to mail_from [host "localhost"] [port 25] [ssl False] user password]
 5 |   (import-or-error [emails [html]]
 6 |                    "Please, install 'email' library to use 'email' output.")
 7 |   
 8 |   (fn [&optional item]
 9 |     (setv message (apply html [] {"html" (get item "body")
10 |                                          "subject" (get item "subject")
11 |                                          "mail_from" mail_from}))
12 |     (setv response (apply message.send [] {"to" mail_to
13 |                                            "smtp" {"host" host
14 |                                                    "port" port
15 |                                                    "ssl" ssl
16 |                                                    "user" user
17 |                                                    "password" password}}))
18 |     (if-not (= response.status_code 250)
19 |             (raise (RuntimeError (.format "Bad SMTP status code: {0}" response.status_code))))
20 |     None))
21 |   
22 | 


--------------------------------------------------------------------------------
/src/processor/outputs/fanout.hy:
--------------------------------------------------------------------------------
 1 | (import [processor.utils.datastructures [ensure-list]])
 2 | 
 3 | 
 4 | (defn fanout [&rest pipelines]
 5 |   (setv pipelines (list (map ensure-list pipelines)))
 6 |   
 7 |   (fn [input]
 8 |     (setv results [])
 9 | 
10 |     (for [pipeline pipelines]
11 |       (setv msg input)
12 |       
13 |       (for [output pipeline]
14 |         (setv msg (output msg))
15 |         (lif-not msg
16 |                  (break)))
17 | 
18 |       ;; at the end of each pipeline
19 |       ;; we yield result if any
20 |       (lif msg
21 |            (yield msg)))))
22 | 


--------------------------------------------------------------------------------
/src/processor/outputs/rss.hy:
--------------------------------------------------------------------------------
 1 | (require processor.utils.macro)
 2 | 
 3 | (import codecs)
 4 | (import [hashlib [md5]])
 5 | (import [processor.storage [get-storage]])
 6 | 
 7 | ;; Uses http://lkiesow.github.io/python-feedgen/
 8 | 
 9 | (defn create-feed [data &optional
10 |                    [title "Rss Feed"]
11 |                    [description "Without description"]
12 |                    [link "no link"]]
13 |   (import-or-error [feedgen.feed [FeedGenerator]]
14 |                    "Please, install 'feedgen' library to use 'rss' output.")
15 | 
16 |   (setv feed (FeedGenerator))
17 |   (.title feed title)
18 |   (.link feed {"rel" "alternate"
19 |                "href" link})
20 |   (.description feed description)
21 |   
22 |   (for [item data]
23 |     (setv feed-item (.add_entry feed))
24 |     (setv item-title (get item "title"))
25 |     (setv item-id (or (.get item "id")
26 |                       (.hexdigest (md5 (.encode item-title "utf-8")))))
27 |     (setv item-body (.get item "body"))
28 |     
29 |     (.id feed-item item-id)
30 |     (.title feed-item item-title)
31 |     (if item-body
32 |       (.description feed-item item-body)))
33 |   
34 |   (apply .rss_str [feed] {"pretty" True}))
35 | 
36 | 
37 | (defn rss [filename &optional [limit 10]]
38 |   "Accepts dicts with fields
39 |    - title
40 |    - body
41 |   "
42 |   (with-log-fields {"filename" filename "limit" limit}
43 |     (log.info "Creating rss output")
44 |     (setv [get-value set-value] (get-storage "rss-target")))
45 |   
46 |   (defn rss-updater [obj]
47 |     (with-log-fields {"filename" filename
48 |                       "title" (get obj "title")}
49 |       (log.info "Adding item to the feed")
50 |       
51 |       (setv data (get-value filename []))
52 |       (.append data obj)
53 |       (setv data (cut data (- limit)))
54 |       
55 |       (set-value filename data)
56 | 
57 |       (log.info "Writing to the file")
58 |       (with [f (open filename "wb")]
59 |             (.write f (create-feed data))))))
60 | 


--------------------------------------------------------------------------------
/src/processor/outputs/slack.hy:
--------------------------------------------------------------------------------
 1 | (require processor.utils.macro)
 2 | 
 3 | (import json)
 4 | (import [processor.utils [merge-dicts]])
 5 | 
 6 | 
 7 | (defn slack [url &optional [defaults {"renderer" "markdown"
 8 |                                       "username" "Processor"}]]
 9 |   "Output to Slack.
10 |    Each object should have field 'text' and could have other
11 |    optional fields which Slack supports in the payload, for example:
12 |    - renderer: which engine use to render text (by default 'markdown');
13 |    - username: username (by default 'Processor');
14 |    - icon_emoji: an icon the the posts (by default None, choose one here: http://www.emoji-cheat-sheet.com);
15 |    - channel: a channel where to post, could be #something or @somebody."
16 | 
17 |   (import-or-error [requests [post]]
18 |                    "Please, install 'requests' library to use 'slack' output.")
19 |   (defn send-to-slack [obj]
20 |     (setv data (merge-dicts defaults obj))
21 |     (post url (json.dumps data))))
22 | 


--------------------------------------------------------------------------------
/src/processor/outputs/xmpp.hy:
--------------------------------------------------------------------------------
 1 | (import processor)
 2 | (import time)
 3 | (require processor.utils.macro)
 4 | 
 5 | 
 6 | (defn xmpp [jid password host &optional [port 5222] [recipients []]]
 7 |   (import-or-error [sleekxmpp [ClientXMPP]]
 8 |                    "Please, install 'sleekxmpp' library to use 'xmpp' source.")
 9 | 
10 |   (defclass Bot [ClientXMPP]
11 |     (defn __init__ [self jid password recipients]
12 |       (.__init__ (super Bot self) jid password)
13 |       (setv self.recipients recipients)
14 |       (self.add_event_handler "session_start" self.start))
15 |     
16 |     (defn start [self event]
17 |       (self.send_presence)
18 |       (self.get_roster))
19 |     
20 |     (defn send_to_recipients [self message recipients]
21 |       (setv recipients (or recipients
22 |                            self.recipients))
23 |       (for [recipient recipients]
24 |         (apply self.send_message [] {"mto" recipient "mbody" message}))))
25 |   
26 |   (setv bot (Bot jid password recipients))
27 |   (bot.register_plugin "xep_0030") ;; Service Discovery
28 |   (bot.register_plugin "xep_0199") ;; XMPP Ping
29 | 
30 | 
31 |   (bot.connect [host port])
32 | 
33 |   (processor.on_close (fn [] (do (time.sleep 1)
34 |                                  (apply bot.disconnect [] {"wait" True}))))
35 |   
36 |   (apply bot.process [] {"block" False})
37 | 
38 |   ;; actual message sending function
39 |   (fn [item]
40 |     (bot.send_to_recipients (item.get "text" "Not given")
41 |                             (item.get "recipients"))))
42 | 
43 | 


--------------------------------------------------------------------------------
/src/processor/pipeline.hy:
--------------------------------------------------------------------------------
  1 | (import [collections.abc [Iterable]])
  2 | (import [collections [deque]])
  3 | 
  4 | 
  5 | ;; (defn extract_messages [sources]
  6 | ;;     """Returns messages, taking
  7 | ;;     them one by one from each source.
  8 | 
  9 | ;;     If source returns None, then it
 10 | ;;     should be skipped.
 11 | ;;     """
 12 | ;;     (setv sources (list (map iter sources)))
 13 | ;;     (setv idx 0)
 14 | 
 15 | ;;     (while sources
 16 | ;;       (setv source (get sources idx))
 17 |       
 18 | ;;       (try
 19 | ;;        (setv value (next source))
 20 |        
 21 | ;;        (except [e StopIteration]
 22 | ;;          (del (get sources idx)))
 23 |        
 24 | ;;        (else
 25 | ;;         (lif value
 26 | ;;              (yield value))
 27 | ;;         (setv idx (+ idx 1))))
 28 | 
 29 | ;;       (if (>= idx (len sources))
 30 | ;;         (setv idx 0))))
 31 | 
 32 | 
 33 | ;; (defn run-action [actions msg]
 34 | ;;   (if-not (isinstance actions Iterable)
 35 | ;;     (setv actions [actions]))
 36 |   
 37 | ;;   (for [action actions]
 38 | ;;     (setv msg (action msg))
 39 | ;;     (if-not msg
 40 | ;;             (break)))
 41 | ;;   msg)
 42 | 
 43 | 
 44 | (defn make-generator [func]
 45 |   "Makes generator from a function,
 46 | calling it until it return None and yielding returned values"
 47 |   (setv value (func))
 48 |   (while (not (is_none value))
 49 |     (yield value)
 50 |     (setv value (func))))
 51 | 
 52 | 
 53 | ;; (defn run_pipeline [sources rules]
 54 | ;;   (setv sources (list-comp
 55 | ;;                  (if (callable s)
 56 | ;;                    (make-generator s)
 57 | ;;                    s)
 58 | ;;                  [s sources]))
 59 | ;;   (for [msg (extract_messages sources)]
 60 | ;;     (for [(, trigger action) rules]
 61 | ;;       (if (trigger msg)
 62 | ;;         (run-action action msg)))))
 63 | 
 64 | 
 65 | (setv _on-close-callbacks [])
 66 | 
 67 | 
 68 | (defn on-close [func]
 69 |   "Add `func` to the list of callbacks to be called
 70 |    when all sources will be exhausted."
 71 |   (.append _on-close-callbacks func))
 72 | 
 73 | 
 74 | (defn run_pipeline [source pipeline]
 75 |   (setv source (if (callable source)
 76 |                  (make-generator source)
 77 |                  source))
 78 |   (setv pipeline (if (isinstance pipeline Iterable)
 79 |                    pipeline
 80 |                    [pipeline]))
 81 | 
 82 |   (setv queue (deque))
 83 | 
 84 |   (for [msg source]
 85 |     ;; if source returned something like
 86 |     ;; list, then it's items are processed separately
 87 |     (setv msg (if (or (isinstance msg dict)
 88 |                       (not (isinstance msg Iterable)))
 89 |                 [msg]
 90 |                 msg))
 91 | 
 92 |     (when msg
 93 |       (setv step (first pipeline))
 94 | 
 95 |       (when step
 96 |         (for [item msg]
 97 |           (setv response (step item))
 98 |           ;; if not None was returned, then process further
 99 |           (lif response
100 |                (do
101 |                 (setv response (if (or (isinstance response dict)
102 |                                        (not (isinstance response Iterable)))
103 |                                  [response]
104 |                                  response))
105 |                 (run_pipeline response (list (rest pipeline)))))))))
106 |   
107 |   (for [callback _on-close-callbacks]
108 |     (apply callback)))
109 | 


--------------------------------------------------------------------------------
/src/processor/sources/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .mix import mix
3 | from .imap import imap
4 | from . import twitter
5 | from . import web
6 | from . import github
7 | 


--------------------------------------------------------------------------------
/src/processor/sources/github.hy:
--------------------------------------------------------------------------------
 1 | (require processor.utils.macro)
 2 | 
 3 | (import re)
 4 | (import [processor.storage [get-storage]])
 5 | 
 6 | 
 7 | (defn get-api-url [base-url handle]
 8 |   (setv match (re.match
 9 |                "^https?://github.com/(?P<repo>[^/]+)/(?P<username>[^/]+)"
10 |                base-url))
11 |   (when match
12 |     (setv data (match.groupdict))
13 |     (assoc data "handle" handle)
14 |     (setv format (getattr "https://api.github.com/repos/{repo}/{username}/{handle}" "format"))
15 |     (apply format [] data)))
16 | 
17 | 
18 | (defn releases [repo-url &optional access-token]
19 |   (import-or-error [requests]
20 |                    "Please, install 'requests' library to use 'slack' output.")
21 |   
22 |   (setv url (get-api-url repo-url "releases"))
23 |   (setv headers
24 |         (if access-token
25 |           {"Authorization" (+ "token " access-token)}
26 |           {}))
27 |   
28 |   (setv response (apply requests.get [url] {"headers" headers}))
29 | 
30 |   (setv [get-value set-value] (get-storage "github"))
31 |   (setv seen-id-key (.join ":" [url "seen-id"]))
32 |   (setv seen-id (get-value seen-id-key 0))
33 | 
34 |   (setv items (list-comp item [item (response.json)]
35 |                          (> (get item "id") seen-id)))
36 |   (items.reverse)
37 | 
38 |   (defn format-item [item]
39 |     {"source" "github.releases"
40 |      "type" "github.release"
41 |      "payload" item})
42 |   
43 |   (for [item items]
44 |     (setv item-id (get item "id"))
45 |     (yield (format-item item))
46 |     (set-value seen-id-key item-id)))
47 | 


--------------------------------------------------------------------------------
/src/processor/sources/imap.hy:
--------------------------------------------------------------------------------
  1 | (import email)
  2 | (import pytz)
  3 | (import imaplib)
  4 | (import datetime)
  5 | (require processor.utils.macro)
  6 | 
  7 | (import [processor.storage [get-storage]])
  8 | 
  9 | 
 10 | (defn decode-header [text]
 11 |   (setv decoded (email.header.decode_header text))
 12 |   (setv parts (list (genexpr (if (isinstance part str)
 13 |                                part
 14 |                                (.decode part (or encoding
 15 |                                                  "utf-8")))
 16 |                              [[part encoding] decoded])))
 17 |   (.join " " parts))
 18 | 
 19 | 
 20 | (defn email-headers [msg]
 21 |   (dict (genexpr [(.lower key) (decode-header value)]
 22 |                  [[key value] (msg.items)])))
 23 | 
 24 | 
 25 | (defn first-not-null [items]
 26 |   (setv result None)
 27 |   (for [item items]
 28 |     (lif item
 29 |          (do (setv result item)
 30 |              (break))))
 31 |   result)
 32 | 
 33 | 
 34 | (defn email-body [message content_type]
 35 |   (if (= (message.get_content_type) content_type)
 36 |     (do (setv charset (or (message.get_content_charset)
 37 |                           "utf-8"))
 38 |         (.decode (message.get_payload None True)
 39 |                  charset
 40 |                  "replace"))
 41 |     
 42 |     (if (message.is_multipart)
 43 |       (first-not-null (genexpr (email-body item content_type)
 44 |                                [item (message.get_payload)])))))
 45 | 
 46 | 
 47 | (defn decode-message [item]
 48 |   (setv msg (email.message_from_bytes item))
 49 |   {"type" "email"
 50 |    "source" "imap"
 51 |    "headers" (email-headers msg)
 52 |    "plain-body" (email-body msg "text/plain")
 53 |    "html-body" (email-body msg "text/html")})
 54 | 
 55 | 
 56 | (defn imap [hostname username password folder &optional [limit 10]]
 57 |   (import-or-error [imapclient [IMAPClient]]
 58 |                    "Please, install 'imapclient==1.0.0' library to use 'imap' source.")
 59 | 
 60 |   (setv server (apply IMAPClient [hostname] {"use_uid" True
 61 |                                              "ssl" True
 62 |                                              "timeout" 10}))
 63 |   
 64 |   (setv [get-value set-value] (get-storage "imap-source"))
 65 |   (setv seen-position-key (.join ":" [server.host folder "position"]))
 66 |   (setv seen-position (get-value seen-position-key -1))
 67 | 
 68 |   (with-log-fields {"seen_position" seen-position
 69 |                     "server" hostname
 70 |                     "username" username                
 71 |                     "folder" folder}
 72 |     (log.info "Checking IMAP folder")
 73 |     
 74 |     (.login server username password)
 75 |     (.select_folder server folder)
 76 | 
 77 |     (setv search-criterion
 78 |           (if (> seen-position 0)
 79 |             ;; if this is not a first time when we are fetching data
 80 |             ;; then just fetch every new message
 81 |             ["UID" (.format "{0}:*" (+ seen-position 1))]
 82 |             ;; if wer are fetching for the first time, then we have
 83 |             ;; to limit message by date because when there is big amount
 84 |             ;; of data, then imaplib unable to download that many data at once
 85 |             (do 
 86 |              (setv since (- (datetime.datetime.utcnow)
 87 |                             (datetime.timedelta 1))) ; we are only interested in letters for the last 1 day
 88 |              (setv since (imaplib.Time2Internaldate
 89 |                           (apply since.replace
 90 |                                  []
 91 |                                  {"tzinfo" pytz.UTC})))
 92 |                                 ; now just strip a date part from time part
 93 |              (setv since (get (.split (.strip since "\"") " " 1)
 94 |                               0))
 95 |              (.format "NOT DELETED SINCE {0}" since))))
 96 | 
 97 |     ;; docs for the SEARCH command http://tools.ietf.org/html/rfc3501#section-6.4.4
 98 |     (log.info (.format "Searching with search-criterion={0}" search-criterion))
 99 |     (setv message-ids (server.search search-criterion))
100 |     
101 |     (if (> seen-position 0)
102 |       ;; if already processed this folder in past, then output only unprocessed
103 |       ;; messages
104 |       (setv message-ids (list-comp id [id message-ids] (> id seen-position)))
105 |       ;; otherwise, just take N messages from the top
106 |       (setv message-ids (list (cut message-ids (- limit)))))
107 |     
108 |     (setv messages (.fetch server
109 |                            message-ids ["RFC822"]))
110 |     (setv messages (list-comp (get item (.encode "RFC822" "utf-8"))
111 |                               [item (.values messages)]))
112 |     (setv messages (map decode-message messages))
113 |     (setv results (list messages)))
114 |   
115 |   (if message-ids
116 |     (with-log-fields {"message_ids" message-ids}
117 |       (log.info "We processed some message ids")
118 |       (yield-from results)
119 |       (set-value seen-position-key (max message-ids)))))
120 | 


--------------------------------------------------------------------------------
/src/processor/sources/mix.hy:
--------------------------------------------------------------------------------
 1 | (defn mix [&rest sources]
 2 |     """Returns messages, taking
 3 |     them one by one from each source.
 4 | 
 5 |     If source returns None, then it
 6 |     should be skipped.
 7 |     """
 8 |     (setv sources (list (map iter sources)))
 9 |     (setv idx 0)
10 | 
11 |     (while sources
12 |       (setv source (get sources idx))
13 |       
14 |       (try
15 |        (setv value (next source))
16 |        
17 |        (except [e StopIteration]
18 |          (del (get sources idx)))
19 |        
20 |        (else
21 |         (lif value
22 |              (yield value))
23 |         (setv idx (+ idx 1))))
24 | 
25 |       (if (>= idx (len sources))
26 |         (setv idx 0))))
27 | 


--------------------------------------------------------------------------------
/src/processor/sources/twitter.hy:
--------------------------------------------------------------------------------
  1 | (require processor.utils.macro)
  2 | (require hy.contrib.anaphoric)
  3 | 
  4 | (import urllib)
  5 | 
  6 | (import [processor.storage [get-storage]])
  7 | (import [processor.utils [merge-dicts]])
  8 | (import [itertools [takewhile]])
  9 | (import [twiggy_goodies.threading [log]])
 10 | 
 11 | 
 12 | (defn rate-limited [data]
 13 |   "Checks if response from twitter contains error because rate limit was exceed."
 14 |   (setv errors (if (isinstance data dict)
 15 |                  (.get data "errors")))
 16 |   (if errors
 17 |     (when (= (get (get it 0) "code")
 18 |              88)
 19 |       (log.warning "Rate limited")
 20 |       True)
 21 |     False))
 22 | 
 23 | 
 24 | (defn search [query &optional consumer_key consumer_secret access_token access_secret]
 25 |   (import-or-error [requests_oauthlib [OAuth1Session]]
 26 |                    "Please, install 'requests-oauthlib' to use 'twitter.search' source.")
 27 |   
 28 |   (defn add-source-and-type [tweet]
 29 |     (merge-dicts tweet {"source" "twitter.search"
 30 |                         "type" "twitter.tweet"}))
 31 |   
 32 |   (with-log-name-and-fields "twitter-search" {"query" query}
 33 |     (setv [get-value set-value] (get-storage "twitter-search"))
 34 |     (setv seen-id-key (.join ":" [query "seen-id"]))
 35 |     (setv seen-id (get-value seen-id-key 0))
 36 |     
 37 |     (setv url (+ "https://api.twitter.com/1.1/search/tweets.json?"
 38 |                  (urllib.parse.urlencode {"q" query})))
 39 |     (setv twitter (apply OAuth1Session []
 40 |                           {"client_key" consumer_key
 41 |                            "client_secret" consumer_secret
 42 |                            "resource_owner_key" access_token
 43 |                            "resource_owner_secret" access_secret}))
 44 |     (log.info "Searching in twitter")
 45 |     (setv response (twitter.get url))
 46 |     (setv data (response.json))
 47 | 
 48 |     
 49 |     (unless (rate-limited data)
 50 |       (setv metadata (get data "search_metadata"))
 51 |       (setv max-id (get metadata "max_id"))
 52 |       (setv statuses (get data "statuses"))
 53 |       (setv new-statuses (genexpr (add-source-and-type item)
 54 |                                   [item statuses]
 55 |                                   (> (get item "id")
 56 |                                      seen-id)))
 57 |       (yield-from new-statuses)
 58 |       (set-value seen-id-key max-id))))
 59 | 
 60 | 
 61 | (defn followers [&optional consumer_key consumer_secret access_token access_secret]
 62 |   (import-or-error [requests_oauthlib [OAuth1Session]]
 63 |                    "Please, install 'requests-oauthlib' to use 'twitter.followers' source.")
 64 | 
 65 |   (defn add-source-and-type [tweet]
 66 |     (merge-dicts tweet {"source" "twitter.followers"
 67 |                         "type" "twitter.user"}))
 68 | 
 69 |   (with-log-name "twitter-followers"
 70 |     (setv [get-value set-value] (get-storage "twitter-followers"))
 71 |     (setv seen-key "seen")
 72 |     (setv seen (set (get-value seen-key (set))))
 73 |     
 74 |     (setv url "https://api.twitter.com/1.1/followers/list.json?count=200")
 75 |     (setv twitter (apply OAuth1Session []
 76 |                           {"client_key" consumer_key
 77 |                            "client_secret" consumer_secret
 78 |                            "resource_owner_key" access_token
 79 |                            "resource_owner_secret" access_secret}))
 80 |     (log.info "Fetching followers from twitter")
 81 | 
 82 |     (defn fetch-data [cursor]
 83 |       (setv page-url (+ url (if cursor
 84 |                               (+ "&cursor=" (str cursor))
 85 |                               "")))
 86 |       (print "Fetching:" page-url)
 87 |       (setv response (twitter.get page-url))
 88 |       (setv data (response.json))
 89 |       (unless (rate-limited data)
 90 |         (setv users (get data "users"))
 91 |         (when users
 92 |           (yield-from users)
 93 |           (setv next-cursor (get data "next_cursor"))
 94 |           (print "next-cursor:" next-cursor)
 95 |           (if next-cursor
 96 |               (yield-from (fetch-data next-cursor))))))
 97 | 
 98 | 
 99 |     (setv new-followers (takewhile (fn [user] (not (in (get user "id")
100 |                                                        seen)))
101 |                                    (fetch-data 0)))
102 |     (setv new-followers-ids (list-comp (get item "id")
103 |                                        [item new-followers]))
104 | 
105 |     (.update seen new-followers-ids)
106 |     (yield-from (map add-source-and-type
107 |                      new-followers))
108 |     
109 |     (set-value seen-key (list seen))))
110 | 
111 | 
112 | (defn mentions [&optional consumer_key consumer_secret access_token access_secret]
113 |   (import-or-error [requests_oauthlib [OAuth1Session]]
114 |                    "Please, install 'requests-oauthlib' to use 'twitter.followers' source.")
115 | 
116 |   (with-log-name "twitter-mentions"
117 |     (setv [get-value set-value] (get-storage "twitter-mentions"))
118 |     (setv seen-id-key "seen-id")
119 |     (setv seen-id (get-value seen-id-key 0))
120 |     
121 |     (setv url "https://api.twitter.com/1.1/statuses/mentions_timeline.json")
122 |     
123 |     (setv twitter (apply OAuth1Session []
124 |                           {"client_key" consumer_key
125 |                            "client_secret" consumer_secret
126 |                            "resource_owner_key" access_token
127 |                            "resource_owner_secret" access_secret}))
128 |     (log.info "Fetching mentions from twitter")
129 | 
130 |     (setv response (twitter.get url))
131 |     (setv posts (response.json))
132 | 
133 |     (unless (rate-limited posts)
134 |       (setv max-id (max (map (fn [item] (get item "id")) posts)))
135 | 
136 |       (setv new-posts (genexpr {"source" "twitter.mentions"
137 |                                 "type" "twitter.tweet"
138 |                                 "payload" item}
139 |                                [item posts]
140 |                                (> (get item "id")
141 |                                   seen-id)))
142 | 
143 |       (yield-from new-posts)
144 |       (set-value seen-id-key max-id))))
145 | 


--------------------------------------------------------------------------------
/src/processor/sources/web.hy:
--------------------------------------------------------------------------------
 1 | (import [http.server [HTTPServer BaseHTTPRequestHandler]])
 2 | (import [queue [Queue]])
 3 | (import [threading [Thread]])
 4 | (import [cgi [parse_header]])
 5 | (import [urllib.parse [parse_qs]])
 6 | (import json)
 7 | 
 8 | 
 9 | (defn parse-headers [headers]
10 |   (dict headers))
11 | 
12 | 
13 | (defn parse-data [request]
14 |   (setv ctype (.lower (request.headers.get
15 |                        "content-type"
16 |                        "")))
17 |   (setv [ctype ctype_opts] (parse_header ctype))
18 |   
19 |   (setv content-length (int (request.headers.get
20 |                              "content-length"
21 |                              "0")))
22 |   (setv data (request.rfile.read content-length))
23 |   
24 |   (cond [(= ctype "application/json")
25 |          (json.loads (data.decode (ctype_opts.get "charset" "utf-8")))]
26 |         [(> content-length 0) (if (in "charset" ctype_opts)
27 |                                 (data.decode (get ctype_opts "charset"))
28 |                                 data)]
29 |         [True None]))
30 | 
31 | 
32 | (defn parse-query [request]
33 |   (setv splitted (request.path.split "?" 1))
34 |   (if (= (len splitted) 2)
35 |     [(get splitted 0) (parse_qs (get splitted 1))]
36 |     [(get splitted 0) None]))
37 | 
38 | 
39 | (defn create-request-processor [queue]
40 |   (fn [request]
41 |     (setv [path query] (parse-query request))
42 |     
43 |     (queue.put {"type" "http-request"
44 |                 "source" "web.hook"
45 |                 "path" path
46 |                 "query" query
47 |                 "headers" (parse-headers request.headers)
48 |                 "method" request.command
49 |                 "data" (parse-data request)})
50 |   
51 |     (request.send_response 200)
52 |     (request.send_header "Content-type" "text/plain")
53 |     (request.end_headers)))
54 | 
55 | 
56 | (defn hook [&optional [host "127.0.0.1"] [port "8000"]]
57 |   (setv queue (Queue))
58 |   (setv process-request (create-request-processor queue))
59 |   
60 |   (defclass Handler [BaseHTTPRequestHandler]
61 |     [server_version "python-processor"
62 |      sys_version ""
63 |      do_GET process-request
64 |      do_POST process-request])
65 |   
66 |   (setv server (HTTPServer (tuple [host (int port)]) Handler))
67 |   (setv worker (apply Thread [] {"target" server.serve_forever}))
68 |   (setv worker.daemon True)
69 |   (worker.start)
70 | 
71 |   (while True
72 |     (yield (queue.get))))
73 | 


--------------------------------------------------------------------------------
/src/processor/storage.hy:
--------------------------------------------------------------------------------
 1 | (import os)
 2 | (import pickle)
 3 | (import json)
 4 | (require processor.utils.macro)
 5 | 
 6 | (setv not-given (object))
 7 | 
 8 | 
 9 | (defn get-storage [plugin-name &optional [db-filename not-given]]
10 | 
11 |   (setv db-filename (if (is not-given db-filename)
12 |                       (os.environ.get "PROCESSOR_DB" "processor.db")
13 |                       db-filename))
14 |   
15 |   (with-log-fields {"db_filename" db-filename}
16 |     (log.info "Will use this file to store data"))
17 |   
18 |   (defn get-plugin-data []
19 |     (setv data (if (os.path.exists db-filename)
20 |                  (with [f (open db-filename "r")]
21 |                        (json.load f))
22 |                  {}))
23 |     (.setdefault data plugin-name {})
24 |     (get data plugin-name))
25 | 
26 |   (defn save-plugin-data [plugin-data]
27 |     (setv data (if (os.path.exists db-filename)
28 |                  (with [f (open db-filename "r")]
29 |                        (json.load f))
30 |                  {}))
31 |     (assoc data plugin-name plugin-data)
32 |     (with [f (open db-filename "w")]
33 |           (apply json.dump [data f] {"sort_keys" True "indent" 4})))
34 | 
35 |   (defn get-value [key &optional [default not-given]]
36 |     (setv db-filename "processor.db")
37 |     (setv plugin-data (get-plugin-data))
38 | 
39 |     (if (= default not-given)
40 |       (.get plugin-data key)
41 |       (.get plugin-data key default)))
42 |   
43 |   (defn set-value [key &optional value]
44 |     (setv plugin-data (get-plugin-data))
45 |     (assoc plugin-data key value)
46 |     (save-plugin-data plugin-data))
47 |   
48 |   [get-value set-value])
49 | 


--------------------------------------------------------------------------------
/src/processor/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # flake8: noqa
2 | from .datastructures import *
3 | 


--------------------------------------------------------------------------------
/src/processor/utils/datastructures.hy:
--------------------------------------------------------------------------------
 1 | (defn merge-dicts [first &rest others]
 2 |   (setv new-dict (.copy first))
 3 |   (for [d others]
 4 |     (.update new-dict d))
 5 |   new-dict)
 6 | 
 7 | 
 8 | (defn ensure-list [item]
 9 |   (if (isinstance item list)
10 |     item
11 |     [item]))
12 | 


--------------------------------------------------------------------------------
/src/processor/utils/macro.hy:
--------------------------------------------------------------------------------
 1 | (defmacro with-log-fields [fields &rest body]
 2 |   `(do
 3 |     (import [twiggy_goodies.threading [log]])
 4 |     (with [(apply log.fields [] ~fields)]
 5 |           ~@body)))
 6 | 
 7 | 
 8 | (defmacro with-log-name-and-fields [name fields &rest body]
 9 |   `(do
10 |     (import [twiggy_goodies.threading [log]])
11 |     (with [(apply log.name_and_fields [~name] ~fields)]
12 |           ~@body)))
13 | 
14 | 
15 | (defmacro with-log-name [name &rest body]
16 |   `(with-log-name-and-fields ~name {} ~@body))
17 | 
18 | 
19 | (defmacro import-or-error [args message]
20 |   `(try (import ~args)
21 |         (except [e ImportError]
22 |           (print ~message)
23 |           (import sys)
24 |           (sys.exit 1))))
25 | 


--------------------------------------------------------------------------------
/src/processor/utils/twitter.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def remove_unnecessary_text(text):
 5 |     text = re.sub(r'Download the official Twitter app.*',
 6 |                   '', text)
 7 |     text = re.sub(r'Sent from my.*', '', text)
 8 |     text = re.sub(r'^\W+$', r'', text, flags=re.MULTILINE)
 9 |     text = re.sub(r'\n+', r'\n', text)
10 |     return text
11 | 
12 | 
13 | def swap_twitter_subject(subject, body):
14 |     """If subject starts from 'Tweet from...'
15 |     then we need to get first meaning line from the body."""
16 | 
17 |     if subject.startswith('Tweet from'):
18 |         lines = body.split('\n')
19 |         for idx, line in enumerate(lines):
20 |             if re.match(r'.*, ?\d{2}:\d{2}]]', line) is not None:
21 |                 try:
22 |                     subject = lines[idx + 1]
23 |                 except IndexError:
24 |                     pass
25 |                 break
26 |     return subject, body
27 | 


--------------------------------------------------------------------------------
/src/processor/version.hy:
--------------------------------------------------------------------------------
1 | (setv __version__ "0.1.0")
2 | 


--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
 1 | from invoke import run, task
 2 | 
 3 | @task
 4 | def release():
 5 |     run('git push')
 6 |     run('git push --tags')
 7 |     run('python setup.py register')
 8 |     run('python setup.py sdist upload')
 9 |     run('python setup.py bdist_wheel upload')
10 | 
11 | 
12 | @task
13 | def serve_docs():
14 |     from livereload import Server, shell
15 | 
16 |     build_command = 'sphinx-build -b html docs dist/docs'
17 |     run(build_command)
18 | 
19 |     server = Server()
20 |     server.watch('*.rst', shell(build_command))
21 |     server.watch('docs/', shell(build_command))
22 |     server.serve(root='dist/docs')
23 | 
24 | 
25 | @task
26 | def test():
27 |     run('nosetests --nocapture --verbose')
28 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import hy
2 | from .pipeline import *
3 | from .sources import *
4 | from .outputs import *
5 | 


--------------------------------------------------------------------------------
/tests/outputs.hy:
--------------------------------------------------------------------------------
 1 | (import [nose.tools [eq_]])
 2 | (import [processor [run_pipeline sources outputs]])
 3 | 
 4 | 
 5 | (defn test_fanout_with_functions []
 6 |   (setv source [1 2 3 4 5 6 7])
 7 |   (setv odds [])
 8 |   (setv evens [])
 9 | 
10 |   (run_pipeline source (outputs.fanout
11 |                         (fn [item]
12 |                           (if (odd? item)
13 |                             (odds.append item)))
14 |                         (fn [item]
15 |                           (if (even? item)
16 |                             (evens.append item)))))
17 |   (eq_ [1 3 5 7] odds)
18 |   (eq_ [2 4 6] evens))
19 | 
20 | 
21 | (defn test_fanout_with_chains []
22 |   (setv source [1 2 3 4 5 6 7])
23 |   (setv odds [])
24 |   (setv evens [])
25 | 
26 |   (run_pipeline source (outputs.fanout
27 |                         [(fn [item] (if (odd? item)
28 |                                       item))
29 |                          odds.append]
30 |                         [(fn [item]  (if (even? item)
31 |                                        item))
32 |                          evens.append]))
33 |   (eq_ [1 3 5 7] odds)
34 |   (eq_ [2 4 6] evens))
35 | 
36 | 
37 | (defn test_fanout_outputs_items []
38 |   (setv source [1 2])
39 |   (setv results [])
40 | 
41 |   ;; here we use `identity` function to make
42 |   ;; pipeline longer and ensure that intermediate
43 |   ;; items are not yielded from `fanout`
44 |   (run-pipeline source
45 |                 [(outputs.fanout
46 |                   [identity
47 |                    (fn [item] (+ item 10))]
48 |                   [identity
49 |                    (fn [item] (* item item))])
50 |                  results.append])
51 | 
52 |   (eq_ [11 1 12 4] results))
53 | 


--------------------------------------------------------------------------------
/tests/pipeline.hy:
--------------------------------------------------------------------------------
 1 | (import [nose.tools [eq_]])
 2 | (import [processor [run_pipeline sources outputs]])
 3 | 
 4 | 
 5 | (defn test_source_as_a_list []
 6 |   (setv results [])
 7 |   (run_pipeline [1 2 3 4]
 8 |                 results.append)
 9 |   (eq_ [1 2 3 4] results))
10 | 
11 | 
12 | (defn test_source_as_a_function []
13 |   (setv items [1 2 3])
14 |   (setv results [])
15 |   
16 |   (defn source []
17 |     (if items
18 |       (items.pop)))
19 |   
20 |   (run_pipeline source
21 |                 results.append)
22 |   (eq_ [3 2 1] results))
23 | 
24 | 
25 | (defn test_two_outputs []
26 |     (setv source [{"message" "blah"
27 |                    "level" "WARN"}
28 |                   {"message" "minor"
29 |                    "level" "INFO"}])
30 | 
31 |     (defn trigger [msg]
32 |         (if (= (msg.get "level") "WARN")
33 |             msg))
34 | 
35 |     (setv warnings [])
36 | 
37 |     (run_pipeline source
38 |                   [trigger warnings.append])
39 | 
40 |     (eq_ 1 (len warnings)))
41 | 
42 | 
43 | (defn test_source_can_return_lists_of_items_instead_of_dicts []
44 |   "Source can return not a dicionaries, but iterable objects
45 |   then each item in it is processed separately in the rest of the pipeline."
46 |   (setv source [1 [2 3] 4])
47 |   (setv results [])
48 | 
49 |   (run_pipeline source results.append)
50 |   (eq_ [1 2 3 4] results))
51 | 
52 | 
53 | (defn test_any_step_can_return_list_of_items_instead_of_dict []
54 |   "If some pipeline step returns not a dicionary, but iterable object
55 |   then each item in it is processed separately in the rest of the pipeline."
56 |   (setv source [1 2 4])
57 |   (setv results [])
58 | 
59 |   (defn list_if_two [item]
60 |     (if (= item 2)
61 |       [2 3]
62 |       item))
63 | 
64 |   (run_pipeline source [list_if_two results.append])
65 |   (eq_ [1 2 3 4] results))
66 | 
67 | 


--------------------------------------------------------------------------------
/tests/sources.hy:
--------------------------------------------------------------------------------
 1 | (import [nose.tools [eq_]])
 2 | (import [processor [sources]])
 3 | 
 4 | 
 5 | (defn test_mix []
 6 |   (setv source1 [1 2 3 4 5])
 7 |   (setv source2 [6 7 None 8])
 8 |   (setv desired_result [1 6 2 7 3 4 8 5])
 9 |   (setv result (list (sources.mix source1 source2)))
10 |   (eq_ desired_result result))
11 | 


--------------------------------------------------------------------------------
/tests/test_processor.py:
--------------------------------------------------------------------------------
 1 | import hy
 2 | from nose.tools import eq_
 3 | 
 4 | from processor import run_pipeline
 5 | 
 6 | 
 7 | def test_pipeline():
 8 |     def producer():
 9 |         return [{'message': 'blah',
10 |                  'level': 'WARN'},
11 |                 {'message': 'minor',
12 |                  'level': 'INFO'}]
13 | 
14 |     def trigger(msg):
15 |         if msg.get('level') == 'WARN':
16 |             return True
17 | 
18 |     warnings = []
19 |     def action(msg):
20 |         warnings.append(msg)
21 | 
22 |     run_pipeline(producer(),
23 |                  [trigger, action])
24 | 
25 |     eq_(1, len(warnings))
26 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
  1 | [tox]
  2 | envlist =
  3 |     clean,
  4 |     check,
  5 |     3.3,
  6 |     3.3-nocover,
  7 |     3.4,
  8 |     3.4-nocover,
  9 |     3.5,
 10 |     3.5-nocover,
 11 |     report,
 12 |     docs
 13 | 
 14 | [testenv]
 15 | setenv =
 16 |     PYTHONPATH={toxinidir}/tests
 17 |     PYTHONUNBUFFERED=yes
 18 | deps =
 19 |     nose
 20 | commands =
 21 |     {posargs:nosetests -v tests}
 22 | 
 23 | [testenv:spell]
 24 | setenv =
 25 |     SPELLCHECK = 1
 26 | commands =
 27 |     sphinx-build -b spelling docs dist/docs
 28 | usedevelop = true
 29 | deps =
 30 |     -r{toxinidir}/docs/requirements.txt
 31 |     sphinxcontrib-spelling
 32 |     pyenchant
 33 | 
 34 | [testenv:docs]
 35 | whitelist_externals =
 36 |     rm
 37 | commands =
 38 |     rm -rf dist/docs || rmdir /S /Q dist\docs
 39 |     sphinx-build -b html docs dist/docs
 40 |     sphinx-build -b linkcheck docs dist/docs
 41 | usedevelop = true
 42 | deps =
 43 |     -r{toxinidir}/docs/requirements.txt
 44 | 
 45 | [testenv:configure]
 46 | deps =
 47 |     jinja2
 48 |     matrix
 49 | usedevelop = true
 50 | commands =
 51 |     python bootstrap.py
 52 | 
 53 | [testenv:check]
 54 | basepython = python3.4
 55 | deps =
 56 |     docutils
 57 |     check-manifest
 58 |     flake8
 59 |     collective.checkdocs
 60 |     pygments
 61 | usedevelop = true
 62 | commands =
 63 |     python setup.py checkdocs
 64 |     python setup.py check --strict --metadata
 65 |     check-manifest --ignore tasks.py,requirements-dev.txt {toxinidir}
 66 |     flake8 src
 67 | 
 68 | [testenv:coveralls]
 69 | deps =
 70 |     coveralls
 71 | usedevelop = true
 72 | commands =
 73 |     coverage combine
 74 |     coverage report
 75 |     coveralls
 76 | 
 77 | [testenv:report]
 78 | basepython = python3.4
 79 | commands =
 80 |     coverage combine
 81 |     coverage report
 82 | usedevelop = true
 83 | deps = coverage
 84 | 
 85 | [testenv:clean]
 86 | commands = coverage erase
 87 | usedevelop = true
 88 | deps = coverage
 89 | 
 90 | [testenv:3.3]
 91 | basepython = python3.3
 92 | setenv =
 93 |     {[testenv]setenv}
 94 |     WITH_COVERAGE=yes
 95 | usedevelop = true
 96 | commands =
 97 |     {posargs:nosetests --with-coverage --cover-package=processor --cover-html-dir=term-missing -v}
 98 | 
 99 | [testenv:3.3-nocover]
100 | basepython = python3.3
101 | 
102 | [testenv:3.4]
103 | basepython = python3.4
104 | setenv =
105 |     {[testenv]setenv}
106 |     WITH_COVERAGE=yes
107 | usedevelop = true
108 | commands =
109 |     {posargs:nosetests --with-coverage --cover-package=processor --cover-html-dir=term-missing -v}
110 | 
111 | [testenv:3.4-nocover]
112 | basepython = python3.4
113 | 


--------------------------------------------------------------------------------