├── .gitattributes
├── .gitignore
├── .pyup.yml
├── .travis.yml
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── bin
    ├── benchmark
    └── runpipe
├── dev-requirements.txt
├── docs
    ├── AUTHORS.rst
    ├── CHANGES.rst
    ├── COOKBOOK.rst
    ├── FAQ.rst
    ├── INSTALLATION.rst
    └── TODO.rst
├── examples
    ├── __init__.py
    ├── demo.py
    ├── gigs.py
    ├── kazeeki.py
    ├── simple1.py
    ├── simple2.py
    ├── split.py
    ├── usage.ipynb
    ├── usage.py
    └── wired.py
├── helpers
    ├── check-stage
    ├── clean
    ├── pippy
    ├── srcdist
    └── wheel
├── manage.py
├── optional-requirements.txt
├── pyproject.toml
├── requirements.txt
├── riko
    ├── __init__.py
    ├── autorss.py
    ├── bado
    │   ├── __init__.py
    │   ├── io.py
    │   ├── itertools.py
    │   ├── microdom.py
    │   ├── mock.py
    │   ├── requests.py
    │   ├── sux.py
    │   └── util.py
    ├── cast.py
    ├── collections.py
    ├── currencies.py
    ├── data
    │   ├── Politik.xml
    │   ├── TheEdTechie.xml
    │   ├── Topthemen.xml
    │   ├── autoblog.xml
    │   ├── bbc.html
    │   ├── bbci.co.uk.xml
    │   ├── caltrain.html
    │   ├── capnorth.xml
    │   ├── cnn.html
    │   ├── countries.csv
    │   ├── currencies.csv
    │   ├── currencies.json
    │   ├── delicious.xml
    │   ├── elance.json
    │   ├── election_results.json
    │   ├── feed.xml
    │   ├── fourtitude.xml
    │   ├── freelancer.json
    │   ├── gawker.xml
    │   ├── gigs.json
    │   ├── greenhughes.xml
    │   ├── guru.json
    │   ├── health.xml
    │   ├── lorem.txt
    │   ├── odesk.json
    │   ├── ouseful.xml
    │   ├── ouseful_feedburner.xml
    │   ├── places.xml
    │   ├── podcast.xml
    │   ├── psychemedia_delicious.xml
    │   ├── psychemedia_slideshare.xml
    │   ├── quote.json
    │   ├── schools.xml
    │   ├── sciencedaily.html
    │   ├── scotland.xml
    │   ├── spreadsheet.csv
    │   ├── status.csv
    │   ├── topstories.xml
    │   ├── users.jyu.fi.html
    │   ├── yodel.xml
    │   └── yql.xml
    ├── dates.py
    ├── dotdict.py
    ├── locations.py
    ├── modules
    │   ├── __init__.py
    │   ├── count.py
    │   ├── csv.py
    │   ├── currencyformat.py
    │   ├── dateformat.py
    │   ├── exchangerate.py
    │   ├── feedautodiscovery.py
    │   ├── fetch.py
    │   ├── fetchdata.py
    │   ├── fetchpage.py
    │   ├── fetchsitefeed.py
    │   ├── fetchtext.py
    │   ├── filter.py
    │   ├── geolocate.py
    │   ├── hash.py
    │   ├── input.py
    │   ├── itembuilder.py
    │   ├── join.py
    │   ├── refind.py
    │   ├── regex.py
    │   ├── rename.py
    │   ├── reverse.py
    │   ├── rssitembuilder.py
    │   ├── simplemath.py
    │   ├── slugify.py
    │   ├── sort.py
    │   ├── split.py
    │   ├── strconcat.py
    │   ├── strfind.py
    │   ├── strreplace.py
    │   ├── strtransform.py
    │   ├── subelement.py
    │   ├── substr.py
    │   ├── sum.py
    │   ├── tail.py
    │   ├── timeout.py
    │   ├── tokenizer.py
    │   ├── truncate.py
    │   ├── typecast.py
    │   ├── udf.py
    │   ├── union.py
    │   ├── uniq.py
    │   ├── urlbuilder.py
    │   ├── urlparse.py
    │   ├── xpathfetchpage.py
    │   └── yql.py
    ├── parsers.py
    └── utils.py
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── pylintrc
    ├── test.py
    └── test_examples.py
└── tox.ini


/.gitattributes:
--------------------------------------------------------------------------------
1 | riko/data/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | *.py[cod]
 3 | *.DS_Store
 4 | *.ipynb
 5 | *checkpoint.ipynb
 6 | .cookiecutter
 7 | .ipynb_checkpoints/*
 8 | .idea/
 9 | example*.log
10 | examples/.ipynb_checkpoints/*
11 | 
12 | # C extensions
13 | *.so
14 | 
15 | # Packages
16 | *.egg*
17 | *.egg-info
18 | .eggs
19 | .installed.cfg
20 | .installed.cfg
21 | *-0.*/*
22 | dist
23 | build
24 | eggs
25 | parts
26 | var
27 | sdist
28 | develop-eggs
29 | lib
30 | lib64
31 | venv
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | 
36 | # Unit test / coverage reports
37 | *.wpu
38 | *.rope
39 | *.noseids
40 | *.ropeproject
41 | .coverage
42 | .tox
43 | .noseids
44 | .cache/*
45 | .scripttest
46 | coverage.xml
47 | cover/*
48 | htmlcov/*
49 | nosetests.xml
50 | 
51 | # Translations
52 | *.mo
53 | 
54 | # Mr Developer
55 | .mr.developer.cfg
56 | .project
57 | .pydevproject
58 | 
59 | # Complexity
60 | output/*.html
61 | output/*/index.html
62 | 
63 | # Sphinx
64 | docs/_build
65 | build/*
66 | 
67 | # Misc
68 | *.pstats
69 | .vscode
70 | callgraph.svg
71 | ttyrecord
72 | 
73 | 


--------------------------------------------------------------------------------
/.pyup.yml:
--------------------------------------------------------------------------------
1 | # autogenerated pyup.io config file
2 | # see https://pyup.io/docs/configuration/ for all available options
3 | 
4 | update: security
5 | pin: False
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | python:
 4 |   - "3.7"
 5 |   - "3.8"
 6 |   - "3.9"
 7 |   - pypy3.7-7.3.2
 8 | 
 9 | cache:
10 |   directories:
11 |     - ~/.cache/pip
12 | 
13 | env:
14 |   matrix:
15 |     - OPTIONAL=true
16 |     - OPTIONAL=false
17 |   global:
18 |     - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels
19 |     - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels
20 |     - PYTHONHASHSEED=94967295
21 | 
22 | matrix:
23 |   fast_finish: true
24 |   allow_failures:
25 |   - python: pypy3.7-7.3.2
26 |   exclude:
27 |   - python: pypy3.7-7.3.2
28 |     env: OPTIONAL=true
29 | 
30 | notifications:
31 |   email:
32 |     on_success: always
33 | 
34 | branches:
35 |   except:
36 |     - /^v[0-9]/
37 | 
38 | before_install:
39 |   - pip install -U pip
40 |   - pip install wheel
41 | 
42 | install:
43 |   - pip install -r dev-requirements.txt
44 |   - pip install -r requirements.txt
45 |   - |
46 |       if [[ "$OPTIONAL" == "true" ]]; then
47 |         pip wheel -r optional-requirements.txt
48 |         pip install -r optional-requirements.txt
49 |       fi
50 | 
51 | script:
52 |   - manage lint && manage test
53 | 
54 | after_success:
55 |   - if [[ "$TRAVIS_PULL_REQUEST" == "true" ]]; then exit 0; fi
56 |   - if [[ "$TRAVIS_BRANCH" != "master" ]]; then exit 0; fi
57 |   - if [[ "$TRAVIS_PYTHON_VERSION" != "3.9" ]]; then exit 0; fi
58 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Contributing
  3 | ============
  4 | 
  5 | Contributions are welcome, and they are greatly appreciated! Every
  6 | little bit helps, and credit will always be given.
  7 | 
  8 | When contributing, please mimic the coding style/conventions used in this repo.
  9 | If you add new classes or functions, please add the appropriate doc blocks with
 10 | examples. Also, make sure the python linter and tests pass.
 11 | 
 12 | Ready to contribute? Here's how.
 13 | 
 14 | Types of Contributions
 15 | ----------------------
 16 | 
 17 | Feedback & Bug Reports
 18 | ~~~~~~~~~~~~~~~~~~~~~~
 19 | 
 20 | The best way to send feedback or report a bug is to file an issue at
 21 | https://github.com/nerevu/riko/issues.
 22 | 
 23 | If you are reporting a bug, please include:
 24 | 
 25 | * Your operating system name and version.
 26 | * Any details about your local setup that might be helpful in troubleshooting.
 27 | * Detailed steps to reproduce the bug.
 28 | 
 29 | Bug Fixes
 30 | ~~~~~~~~~
 31 | 
 32 | Look through the GitHub `issues`_ for anything tagged with ``bug`` and hack away.
 33 | 
 34 | Feature Implementation
 35 | ~~~~~~~~~~~~~~~~~~~~~~
 36 | 
 37 | Look through the GitHub `issues`_ for anything tagged with ``feature`` and hack away.
 38 | 
 39 | If you are *proposing* a feature:
 40 | 
 41 | * Explain in detail how it would work.
 42 | * To make it easier to implement, Keep the scope as narrow as possible.
 43 | * Remember that this is a volunteer-driven project, and that contributions
 44 |   are welcome :)
 45 | 
 46 | Documentation
 47 | ~~~~~~~~~~~~~
 48 | 
 49 | riko could always use more documentation, whether as part of the
 50 | official docs, in docstrings, or even on the web in blog posts, articles, and such.
 51 | Feel free to contribute any type of documentation.
 52 | 
 53 | Get Started!
 54 | ------------
 55 | 
 56 | Ready to contribute? Here's how to set up ``riko`` for local development.
 57 | 
 58 | 1. Fork the ``riko`` repo on GitHub and clone
 59 | 
 60 | .. code-block:: bash
 61 | 
 62 |     git clone git@github.com:<your_username>/riko.git
 63 |     cd riko
 64 | 
 65 | 2. Setup a new `virtualenv`_ with ``virtualenvwrapper``
 66 | 
 67 | .. code-block:: bash
 68 | 
 69 |     mkvirtualenv --no-site-packages riko
 70 | 
 71 | Or, if you only have ``virtualenv`` installed
 72 | 
 73 | .. code-block:: bash
 74 | 
 75 |     virtualenv --no-site-packages ~/.venvs/riko
 76 |     source ~/.venvs/riko/bin/activate
 77 | 
 78 | 3. Install required modules
 79 | 
 80 | Python3
 81 | 
 82 | .. code-block:: bash
 83 | 
 84 |     pip install -r dev-requirements.txt
 85 |     pip install -r optional-requirements.txt
 86 |     pip install -r requirements.txt
 87 | 
 88 | Python2
 89 | 
 90 | .. code-block:: bash
 91 | 
 92 |     pip install -r dev-requirements.txt
 93 |     pip install -r optional-requirements.txt
 94 |     pip install -r py2-requirements.txt
 95 | 
 96 | 4. Run setup develop script
 97 | 
 98 | .. code-block:: bash
 99 | 
100 |     python setup.py develop
101 | 
102 | 5. Create a branch for local development
103 | 
104 | .. code-block:: bash
105 | 
106 |     git checkout -b name-of-your-bugfix-or-feature
107 | 
108 | 6. Make your changes and run linter and tests
109 | 
110 | .. code-block:: bash
111 | 
112 |     manage lint
113 |     manage test
114 | 
115 |     # or to run the full integration tests
116 |     tox
117 | 
118 | 5. Commit your changes and push your branch to GitHub
119 | 
120 | .. code-block:: bash
121 | 
122 |     git add .
123 |     git commit -m "Your detailed description of your changes."
124 |     git push origin name-of-your-bugfix-or-feature
125 | 
126 | 6. Submit a pull request on the riko `repo`_.
127 | 
128 | Pull Request Guidelines
129 | -----------------------
130 | 
131 | Before you submit a pull request, check that it meets these guidelines:
132 | 
133 | 1. The pull request includes tests.
134 | 2. If the pull request adds functionality, the docs should be updated: Put
135 |    your new functionality into a function with a docstring, and add the
136 |    feature to the list in README.rst.
137 | 
138 | .. _issues: https://github.com/nerevu/riko/issues
139 | .. _repo: https://github.com/nerevu/riko
140 | .. _virtualenv: https://virtualenv.pypa.io/en/latest/index.html
141 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016, Reuben Cummings
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include riko/data *
2 | recursive-include tests *
3 | recursive-include helpers *
4 | recursive-include docs *
5 | recursive-include examples *
6 | include LICENSE
7 | include *.rst
8 | include *requirements.txt
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help clean check-stage pipme require lint test tox register upload release sdist wheel
 2 | 
 3 | help:
 4 | 	@echo "clean - remove Python file and build artifacts"
 5 | 	@echo "check-stage - check staged changes for lint errors"
 6 | 	@echo "pipme - install requirements.txt"
 7 | 	@echo "require - create requirements.txt"
 8 | 	@echo "lint - check style with flake8"
 9 | 	@echo "test - run nose and script tests"
10 | 	@echo "release - package and upload a release"
11 | 	@echo "sdist - create a source distribution package"
12 | 	@echo "wheel - create a wheel package"
13 | 	@echo "upload - upload dist files"
14 | 	@echo "register - register package with PyPI"
15 | 	@echo "tox - run tests on every Python version with tox"
16 | 
17 | clean:
18 | 	helpers/clean
19 | 
20 | check-stage:
21 | 	helpers/check-stage
22 | 
23 | pipme:
24 | 	pip install -r requirements.txt
25 | 
26 | require:
27 | 	pip freeze -l | grep -vxFf dev-requirements.txt > requirements.txt
28 | 
29 | lint:
30 | 	flake8 riko tests
31 | 
32 | test:
33 | 	nosetests -xv
34 | 	python tests/test.py
35 | 
36 | release: clean sdist wheel upload
37 | 
38 | register:
39 | 	python setup.py register
40 | 
41 | sdist:
42 | 	clean
43 | 	helpers/srcdist
44 | 
45 | wheel:
46 | 	clean
47 | 	helpers/wheel
48 | 
49 | upload:
50 | 	twine upload dist/*
51 | 
52 | tox:
53 | 	tox
54 | 


--------------------------------------------------------------------------------
/bin/benchmark:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim: sw=4:ts=4:expandtab
  4 | 
  5 | from __future__ import (
  6 |     absolute_import, division, print_function, unicode_literals)
  7 | 
  8 | import sys
  9 | 
 10 | from os import path as p
 11 | from functools import partial
 12 | from multiprocessing.dummy import Pool as ThreadPool
 13 | from multiprocessing import Pool
 14 | from time import time, sleep
 15 | from itertools import chain
 16 | 
 17 | from builtins import *  # noqa # pylint: disable=unused-import
 18 | 
 19 | sys.path.append('../riko')
 20 | 
 21 | from riko import get_path
 22 | from riko.bado import coroutine, return_value, react
 23 | from riko.bado.util import async_sleep
 24 | from riko.bado.itertools import async_imap
 25 | from riko.modules.fetch import pipe, async_pipe
 26 | from riko.collections import (
 27 |     SyncPipe, SyncCollection, AsyncPipe, AsyncCollection, get_chunksize,
 28 |     get_worker_cnt)
 29 | 
 30 | NUMBER = 1
 31 | LOOPS = 1
 32 | DELAY = 0.1
 33 | 
 34 | parent = p.join(p.abspath(p.dirname(p.dirname(__file__))), 'data')
 35 | files = [
 36 |     'ouseful.xml',
 37 |     'feed.xml',
 38 |     'delicious.xml',
 39 |     'psychemedia_delicious.xml',
 40 |     'ouseful_feedburner.xml',
 41 |     'TheEdTechie.xml',
 42 |     'yodel.xml',
 43 |     'gawker.xml',
 44 |     'health.xml',
 45 |     'topstories.xml',
 46 |     'autoblog.xml',
 47 |     'fourtitude.xml',
 48 |     'greenhughes.xml',
 49 |     'psychemedia_slideshare.xml']
 50 | 
 51 | urls = [get_path(f) for f in files]
 52 | confs = [{'url': url, 'sleep': DELAY} for url in urls]
 53 | sources = [{'url': url} for url in urls]
 54 | length = len(files)
 55 | iterable = [DELAY for x in files]
 56 | 
 57 | 
 58 | def baseline_sync():
 59 |     return list(map(sleep, iterable))
 60 | 
 61 | 
 62 | def baseline_threads():
 63 |     workers = get_worker_cnt(length)
 64 |     chunksize = get_chunksize(length, workers)
 65 |     pool = ThreadPool(workers)
 66 |     return list(pool.imap_unordered(sleep, iterable, chunksize=chunksize))
 67 | 
 68 | 
 69 | def baseline_procs():
 70 |     workers = get_worker_cnt(length, False)
 71 |     chunksize = get_chunksize(length, workers)
 72 |     pool = Pool(workers)
 73 |     return list(pool.imap_unordered(sleep, iterable, chunksize=chunksize))
 74 | 
 75 | 
 76 | def sync_pipeline():
 77 |     pipes = (pipe(conf=conf) for conf in confs)
 78 |     return list(chain.from_iterable(pipes))
 79 | 
 80 | 
 81 | def sync_pipe():
 82 |     streams = (SyncPipe('fetch', conf=conf).list for conf in confs)
 83 |     return list(chain.from_iterable(streams))
 84 | 
 85 | 
 86 | def sync_collection():
 87 |     return SyncCollection(sources, sleep=DELAY).list
 88 | 
 89 | 
 90 | def par_sync_collection():
 91 |     return SyncCollection(sources, parallel=True, sleep=DELAY).list
 92 | 
 93 | 
 94 | def baseline_async():
 95 |     return async_imap(async_sleep, iterable)
 96 | 
 97 | 
 98 | def async_pipeline():
 99 |     d = async_imap(lambda conf: async_pipe(conf=conf), confs)
100 |     d.addCallbacks(list, print)
101 | 
102 | def async_pipe():
103 |     asyncCallable = lambda conf: AsyncPipe('fetch', conf=conf).list
104 |     d = async_imap(asyncCallable, confs)
105 |     d.addCallbacks(list, print)
106 | 
107 | 
108 | def async_collection():
109 |     return AsyncCollection(sources, sleep=DELAY).list
110 | 
111 | 
112 | def parse_results(results):
113 |     switch = {0: 'secs', 3: 'msecs', 6: 'usecs'}
114 |     best = min(results)
115 | 
116 |     for places in [0, 3, 6]:
117 |         factor = pow(10, places)
118 |         if 1 / best // factor == 0:
119 |             break
120 | 
121 |     return round(best * factor, 2), switch[places]
122 | 
123 | 
124 | def print_time(test, max_chars, run_time, units):
125 |     padded = test.zfill(max_chars).replace('0', ' ')
126 |     msg = '%s - %i repetitions/loop, best of %i loops: %s %s'
127 |     print(msg % (padded, NUMBER, LOOPS, run_time, units))
128 | 
129 | 
130 | @coroutine
131 | def run_async(reactor, tests, max_chars):
132 |     for test in tests:
133 |         results = []
134 | 
135 |         for i in range(LOOPS):
136 |             loop = 0
137 | 
138 |             for j in range(NUMBER):
139 |                 start = time()
140 |                 yield test()
141 |                 loop += time() - start
142 | 
143 |             results.append(loop)
144 | 
145 |         run_time, units = parse_results(results)
146 |         print_time(test.__name__, max_chars, run_time, units)
147 | 
148 |     return_value(None)
149 | 
150 | if __name__ == '__main__':
151 |     from timeit import repeat
152 | 
153 |     run = partial(repeat, repeat=LOOPS, number=NUMBER)
154 |     sync_tests = [
155 |         'baseline_sync', 'baseline_threads', 'baseline_procs', 'sync_pipeline',
156 |         'sync_pipe', 'sync_collection', 'par_sync_collection']
157 | 
158 |     async_tests = [baseline_async, async_pipeline, async_pipe, async_collection]
159 |     combined_tests = sync_tests + [f.__name__ for f in async_tests]
160 |     max_chars = max(list(map(len, combined_tests)))
161 | 
162 |     for test in sync_tests:
163 |         results = run('%s()' % test, setup='from __main__ import %s' % test)
164 |         run_time, units = parse_results(results)
165 |         print_time(test, max_chars, run_time, units)
166 | 
167 |     react(run_async, [async_tests, max_chars])
168 | 


--------------------------------------------------------------------------------
/bin/runpipe:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | from __future__ import (
 5 |     absolute_import, division, print_function, unicode_literals)
 6 | 
 7 | import requests
 8 | import sys
 9 | sys.path.append('../riko')
10 | 
11 | from os import path as p
12 | from importlib import import_module
13 | 
14 | try:
15 |     # python 3.3+
16 |     from importlib.machinery import SourceFileLoader
17 | except ImportError:
18 |     try:
19 |         # python 3.4+
20 |         from importlib.util import spec_from_file_location, module_from_spec
21 |     except ImportError:
22 |         # python 2.7-
23 |         from imp import load_source as load_file
24 |         io_error = IOError
25 |     else:
26 |         io_error = FileNotFoundError
27 | 
28 |         def load_file(name, src):
29 |             location = 'examples/%s.py' % src
30 |             spec = spec_from_file_location(name, location)
31 |             module = module_from_spec(spec)
32 |             spec.loader.exec_module(module)
33 |             return module
34 | else:
35 |     io_error = FileNotFoundError
36 |     load_file = lambda name, src: SourceFileLoader(name, src).load_module()
37 | 
38 | from argparse import RawTextHelpFormatter, ArgumentParser
39 | from riko.bado import react
40 | 
41 | parser = ArgumentParser(
42 |     description='description: Runs a riko pipe', prog='runpipe',
43 |     usage='%(prog)s [pipeid]', formatter_class=RawTextHelpFormatter)
44 | 
45 | parser.add_argument(
46 |     dest='pipeid', nargs='?', default=sys.stdin,
47 |     help='The pipe to run (default: reads from stdin).')
48 | 
49 | parser.add_argument(
50 |     '-a', '--async', dest='isasync', action='store_true', default=False,
51 |     help="Load async pipe.\n\n")
52 | 
53 | parser.add_argument(
54 |     '-t', '--test', action='store_true', default=False,
55 |     help="Run in test mode (uses default inputs).\n\n")
56 | 
57 | args = parser.parse_args()
58 | 
59 | 
60 | def file2name(path):
61 |     return p.splitext(p.basename(path))[0]
62 | 
63 | 
64 | def run():
65 |     """CLI runner"""
66 |     try:
67 |         pipeid = args.pipeid.read()
68 |     except AttributeError:
69 |         pipeid = args.pipeid
70 | 
71 |     try:
72 |         name = file2name('%s.py' % pipeid)
73 |         module = load_file(name, pipeid)
74 |     except io_error:
75 |         try:
76 |             module = import_module('examples.%s' % pipeid)
77 |         except ImportError:
78 |             exit('Pipe examples.%s not found!' % pipeid)
79 | 
80 |     if args.isasync:
81 |         pipeline = getattr(module, 'async_pipe')
82 |         react(pipeline, [args.test])
83 |     else:
84 |         pipeline = getattr(module, 'pipe')
85 |         pipeline(test=args.test)
86 | 
87 | if __name__ == "__main__":
88 |     run()
89 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
 1 | pip>20.0
 2 | wheel>=0.29.0
 3 | black>=19.3b0,<22.0
 4 | coverage>=4.3.4,<6.0.0
 5 | flake8>=3.7.9,<5.0.0
 6 | flake8-black>=0.1.1,<0.3.0
 7 | nose>=1.3.7,<2.0.0
 8 | manage.py>=0.2.10,<0.3.0
 9 | pkutils>=3.0.0,<4.0.0
10 | pylint>=2.5.0,<3.0.0
11 | responses>=0.9.0,<0.15.0
12 | scripttest>=1.3,<2.0
13 | setuptools>=42.0.2
14 | tox>=3.14.3,<4.0.0
15 | twine>=3.2.0,<4.0.0
16 | 


--------------------------------------------------------------------------------
/docs/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Development Lead
 6 | ----------------
 7 | 
 8 | * Reuben Cummings <reubano@gmail.com>
 9 | 
10 | Contributors
11 | ------------
12 | 
13 | None yet. Why not be the first?
14 | 


--------------------------------------------------------------------------------
/docs/CHANGES.rst:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 
  4 | %%version%% (unreleased)
  5 | ------------------------
  6 | 
  7 | Bugfixes
  8 | ~~~~~~~~
  9 | 
 10 | - Store downloaded packages in wheel dir. [Reuben Cummings]
 11 | 
 12 | - Fix prefix generation. [Reuben Cummings]
 13 | 
 14 | v0.35.1 (2016-07-22)
 15 | --------------------
 16 | 
 17 | Bugfixes
 18 | ~~~~~~~~
 19 | 
 20 | - Fix makefile lint command. [Reuben Cummings]
 21 | 
 22 | - Update pygogo requirement (fixes #2) [Reuben Cummings]
 23 | 
 24 | v0.35.0 (2016-07-19)
 25 | --------------------
 26 | 
 27 | New
 28 | ~~~
 29 | 
 30 | - Limit the number of unique items tracked. [Reuben Cummings]
 31 | 
 32 | - Add grouping ability to count pipe. [Reuben Cummings]
 33 | 
 34 | Bugfixes
 35 | ~~~~~~~~
 36 | 
 37 | - Fix processor metadata. [Reuben Cummings]
 38 | 
 39 | v0.34.0 (2016-07-19)
 40 | --------------------
 41 | 
 42 | New
 43 | ~~~
 44 | 
 45 | - Add list element searching to microdom. [Reuben Cummings]
 46 | 
 47 | - Add more operations to filter pipes. [Reuben Cummings]
 48 | 
 49 | Changes
 50 | ~~~~~~~
 51 | 
 52 | - Merge async_pmap and async_imap. [Reuben Cummings]
 53 | 
 54 | - Change deferToProcess name and arguments. [Reuben Cummings]
 55 | 
 56 | - Rename modules/functions, and update docs. [Reuben Cummings]
 57 | 
 58 | Bugfixes
 59 | ~~~~~~~~
 60 | 
 61 | - Force getElementsByTagName to return child. [Reuben Cummings]
 62 | 
 63 | - Only use FakeReactor when actually needed. [Reuben Cummings]
 64 | 
 65 | - Fix async html parsing. [Reuben Cummings]
 66 | 
 67 | - Prevent IndexError. [Reuben Cummings]
 68 | 
 69 | - Fix async opening of http files. [Reuben Cummings]
 70 | 
 71 | - Be lenient with html parsing. [Reuben Cummings]
 72 | 
 73 | - Fix empty xpath and start value bugs. [Reuben Cummings]
 74 | 
 75 | v0.33.0 (2016-07-01)
 76 | --------------------
 77 | 
 78 | Changes
 79 | ~~~~~~~
 80 | 
 81 | - Major refactor for py3 support: [Reuben Cummings]
 82 | 
 83 |   - fix py3 and open file errors
 84 |   - port missing twisted modules
 85 |   - refactor rss parsing
 86 |   - and streaming json support
 87 |   - rename request function
 88 |   - make benchmarks.py a script and add to tests
 89 | 
 90 | Bugfixes
 91 | ~~~~~~~~
 92 | 
 93 | - Fix pypy test errors. [Reuben Cummings]
 94 | 
 95 | v0.32.0 (2016-06-16)
 96 | --------------------
 97 | 
 98 | Changes
 99 | ~~~~~~~
100 | 
101 | - Refactor to remove Twisted dependency. [Reuben Cummings]
102 | 
103 | v0.31.0 (2016-06-16)
104 | --------------------
105 | 
106 | New
107 | ~~~
108 | 
109 | - Add parallel testing. [Reuben Cummings]
110 | 
111 | v0.30.2 (2016-06-16)
112 | --------------------
113 | 
114 | Bugfixes
115 | ~~~~~~~~
116 | 
117 | - Add missing optional dependency. [Reuben Cummings]
118 | 
119 | v0.30.1 (2016-06-16)
120 | --------------------
121 | 
122 | Bugfixes
123 | ~~~~~~~~
124 | 
125 | - Fix failed test runner. [Reuben Cummings]
126 | 
127 | - Fix lxml dependency errors. [Reuben Cummings]
128 | 
129 | v0.30.0 (2016-06-15)
130 | --------------------
131 | 
132 | New
133 | ~~~
134 | 
135 | - Try loading workflow from curdir first. [Reuben Cummings]
136 | 
137 | Bugfixes
138 | ~~~~~~~~
139 | 
140 | - Fix remaining pypy errors. [Reuben Cummings]
141 | 
142 | - Fix “newdict instance” error for pypy. [Reuben Cummings]
143 | 
144 | - Add detagging to `fetchpage` async parser. [Reuben Cummings]
145 | 
146 | v0.28.0 (2016-03-25)
147 | --------------------
148 | 
149 | New
150 | ~~~
151 | 
152 | - Add option to specify value if no regex match found. [Reuben Cummings]
153 | 
154 | Changes
155 | ~~~~~~~
156 | 
157 | - Make default exchange rate field ‘content’ [Reuben Cummings]
158 | 
159 | - Split now returns tier of feeds. [Reuben Cummings]
160 | 
161 | Bugfixes
162 | ~~~~~~~~
163 | 
164 | - Fix test mode for input pipe. [Reuben Cummings]
165 | 
166 | - Fix terminal parsing. [Reuben Cummings]
167 | 
168 | - Fix input pipe if no inputs given. [Reuben Cummings]
169 | 
170 | - Fix sleep config. [Reuben Cummings]
171 | 
172 | - Fix json bool parsing. [Reuben Cummings]
173 | 
174 | 
175 | 


--------------------------------------------------------------------------------
/docs/INSTALLATION.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ------------
 3 | 
 4 | (You are using a `virtualenv`_, right?)
 5 | 
 6 | At the command line, install riko using either ``pip`` (recommended)
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |     pip install riko
11 | 
12 | or ``easy_install``
13 | 
14 | .. code-block:: bash
15 | 
16 |     easy_install riko
17 | 
18 | Detailed installation instructions
19 | ----------------------------------
20 | 
21 | If you have `virtualenvwrapper`_ installed, at the command line type:
22 | 
23 | .. code-block:: bash
24 | 
25 |     mkvirtualenv riko
26 |     pip install riko
27 | 
28 | Or, if you only have ``virtualenv`` installed:
29 | 
30 | .. code-block:: bash
31 | 
32 | 	virtualenv ~/.venvs/riko
33 | 	source ~/.venvs/riko/bin/activate
34 | 	pip install riko
35 | 
36 | Otherwise, you can install globally::
37 | 
38 |     pip install riko
39 | 
40 | .. _virtualenv: https://virtualenv.pypa.io/en/latest/index.html
41 | .. _virtualenvwrapper: https://virtualenvwrapper.readthedocs.org/en/latest/
42 | 


--------------------------------------------------------------------------------
/docs/TODO.rst:
--------------------------------------------------------------------------------
 1 | ====
 2 | TODO
 3 | ====
 4 | 
 5 | - Upgrade to Python 3
 6 | - Add more protocols (FTP, SSH, IMAP, etc.)
 7 | - Add HDFS support
 8 | 
 9 | .. todo:: vim: set filetype=rst:
10 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nerevu/riko/4d27102b605b8b4050ba566d5e0895d8d5f8b09a/examples/__init__.py


--------------------------------------------------------------------------------
/examples/demo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | 
 4 | """
 5 | riko demo
 6 | ~~~~~~~~~
 7 | 
 8 | Word Count
 9 | 
10 |     >>> from riko import get_path
11 |     >>> from riko.collections import SyncPipe
12 |     >>>
13 |     >>> url = get_path('users.jyu.fi.html')
14 |     >>> fetch_conf = {
15 |     ...     'url': url, 'start': '<body>', 'end': '</body>', 'detag': True}
16 |     >>> replace_conf = {'rule': {'find': '\\n', 'replace': ' '}}
17 |     >>>
18 |     >>> counts = (SyncPipe('fetchpage', conf=fetch_conf)
19 |     ...     .strreplace(conf=replace_conf, assign='content')
20 |     ...     .tokenizer(conf={'delimiter': ' '}, emit=True)
21 |     ...     .count()
22 |     ...     .output)
23 |     >>>
24 |     >>> next(counts) == {'count': 70}
25 |     True
26 | 
27 | Fetching feeds
28 | 
29 |     >>> from riko.modules import fetch
30 |     >>>
31 |     >>> url = get_path('gawker.xml')
32 |     >>> intersection = [
33 |     ...     'author', 'author.name', 'author.uri', 'dc:creator', 'id', 'link',
34 |     ...     'pubDate', 'summary', 'title', 'y:id', 'y:published', 'y:title']
35 |     >>> feed = fetch.pipe(conf={'url': url})
36 |     >>> item = next(feed)
37 |     >>> set(item).issuperset(intersection)
38 |     True
39 |     >>> item['title'][:24] == 'This Is What A Celebrity'
40 |     True
41 |     >>> item['link'][:23] == 'http://feeds.gawker.com'
42 |     True
43 | """
44 | from riko import get_path
45 | from riko.bado import coroutine
46 | from riko.collections import SyncPipe, AsyncPipe
47 | 
48 | replace_conf = {"rule": {"find": "\n", "replace": " "}}
49 | health = get_path("health.xml")
50 | caltrain = get_path("caltrain.html")
51 | start = '<body id="thebody" class="Level2">'
52 | fetch_conf = {"url": caltrain, "start": start, "end": "</body>", "detag": True}
53 | 
54 | 
55 | def pipe(test=False):
56 |     s1 = SyncPipe("fetch", test=test, conf={"url": health}).output
57 |     s2 = (
58 |         SyncPipe("fetchpage", test=test, conf=fetch_conf)
59 |         .strreplace(conf=replace_conf, assign="content")
60 |         .tokenizer(conf={"delimiter": " "}, emit=True)
61 |         .count()
62 |         .output
63 |     )
64 | 
65 |     print(next(s1)["title"], next(s2)["count"])
66 | 
67 | 
68 | @coroutine
69 | def async_pipe(reactor, test=False):
70 |     s1 = yield AsyncPipe("fetch", test=test, conf={"url": health}).output
71 |     s2 = yield (
72 |         AsyncPipe("fetchpage", test=test, conf=fetch_conf)
73 |         .strreplace(conf=replace_conf, assign="content")
74 |         .tokenizer(conf={"delimiter": " "}, emit=True)
75 |         .count()
76 |         .output
77 |     )
78 | 
79 |     print(next(s1)["title"], next(s2)["count"])
80 | 


--------------------------------------------------------------------------------
/examples/gigs.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | from riko import get_path
 3 | from riko.bado import coroutine
 4 | from riko.collections import SyncPipe, AsyncPipe
 5 | 
 6 | p1_conf = {"url": get_path("gigs.json"), "path": "value.items"}
 7 | p2_conf = {"uniq_key": "link"}
 8 | p3_conf = {
 9 |     "combine": "or",
10 |     "mode": "block",
11 |     "rule": [{"field": "title", "value": "php", "op": "contains"}],
12 | }
13 | 
14 | p4_conf = {"rule": [{"sort_key": "pubDate", "sort_dir": "desc"}]}
15 | 
16 | 
17 | def pipe(test=False):
18 |     stream = (
19 |         SyncPipe("fetchdata", conf=p1_conf, test=test)
20 |         .uniq(conf=p2_conf)
21 |         .filter(conf=p3_conf)
22 |         .sort(conf=p4_conf)
23 |         .list
24 |     )
25 | 
26 |     for i in stream:
27 |         pprint(i)
28 | 
29 |     return stream
30 | 
31 | 
32 | @coroutine
33 | def async_pipe(reactor, test=False):
34 |     stream = yield (
35 |         AsyncPipe("fetchdata", conf=p1_conf, test=test)
36 |         .uniq(conf=p2_conf)
37 |         .filter(conf=p3_conf)
38 |         .sort(conf=p4_conf)
39 |         .output
40 |     )
41 | 
42 |     for i in stream:
43 |         pprint(i)
44 | 


--------------------------------------------------------------------------------
/examples/simple1.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | from riko.bado import coroutine
 3 | from riko.collections import SyncPipe, AsyncPipe
 4 | 
 5 | p1_conf = {
 6 |     "attrs": [{"value": "http://www.caltrain.com/Fares/farechart.html", "key": "url"}]
 7 | }
 8 | 
 9 | p2_conf = {"rule": {"field": "url", "match": {"subkey": "url"}, "replace": "farechart"}}
10 | 
11 | 
12 | def pipe(test=False):
13 |     stream = SyncPipe("itembuilder", conf=p1_conf, test=test).regex(conf=p2_conf).list
14 | 
15 |     for i in stream:
16 |         pprint(str(i["url"]))
17 | 
18 |     return stream
19 | 
20 | 
21 | @coroutine
22 | def async_pipe(reactor, test=False):
23 |     stream = yield (
24 |         AsyncPipe("itembuilder", conf=p1_conf, test=test).regex(conf=p2_conf).list
25 |     )
26 | 
27 |     for i in stream:
28 |         pprint(str(i["url"]))
29 | 


--------------------------------------------------------------------------------
/examples/simple2.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | from riko.bado import coroutine
 3 | from riko.collections import SyncPipe, AsyncPipe
 4 | 
 5 | p232_conf = {
 6 |     "attrs": [
 7 |         {"value": "www.google.com", "key": "link"},
 8 |         {"value": "google", "key": "title"},
 9 |         {"value": "empty", "key": "author"},
10 |     ]
11 | }
12 | 
13 | p421_conf = {"rule": [{"find": "empty", "param": "first", "replace": "ABC"}]}
14 | 
15 | 
16 | def pipe(test=False):
17 |     stream = (
18 |         SyncPipe("itembuilder", conf=p232_conf, test=test)
19 |         .strreplace(conf=p421_conf, field="author", assign="author")
20 |         .list
21 |     )
22 | 
23 |     for i in stream:
24 |         pprint(i)
25 | 
26 |     return stream
27 | 
28 | 
29 | @coroutine
30 | def async_pipe(reactor, test=False):
31 |     stream = yield (
32 |         AsyncPipe("itembuilder", conf=p232_conf, test=test)
33 |         .strreplace(conf=p421_conf, field="author", assign="author")
34 |         .list
35 |     )
36 | 
37 |     for i in stream:
38 |         pprint(i)
39 | 


--------------------------------------------------------------------------------
/examples/split.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | from riko.bado import coroutine
 3 | from riko.collections import SyncPipe, AsyncPipe
 4 | 
 5 | p385_conf = {"type": "date"}
 6 | p385_in = {"content": "12/2/2014"}
 7 | p405_conf = {"format": "%B %d, %Y"}
 8 | p393_conf = {
 9 |     "attrs": [
10 |         {"value": {"terminal": "date", "path": "dateformat"}, "key": "date"},
11 |         {"value": {"terminal": "year", "path": "year"}, "key": "year"},
12 |     ]
13 | }
14 | 
15 | p385_kwargs = {"conf": p385_conf, "inputs": p385_in}
16 | 
17 | 
18 | def pipe(test=False):
19 |     s1, s2 = (
20 |         SyncPipe("input", test=test, **p385_kwargs)
21 |         .dateformat(conf=p405_conf)
22 |         .split()
23 |         .output
24 |     )
25 | 
26 |     p393_kwargs = {"conf": p393_conf, "date": s1, "year": s2, "test": test}
27 |     stream = SyncPipe("itembuilder", **p393_kwargs).list
28 | 
29 |     for i in stream:
30 |         pprint(i)
31 | 
32 |     return stream
33 | 
34 | 
35 | @coroutine
36 | def async_pipe(reactor, test=False):
37 |     s1, s2 = yield (
38 |         AsyncPipe("input", test=test, **p385_kwargs)
39 |         .dateformat(conf=p405_conf)
40 |         .split()
41 |         .output
42 |     )
43 | 
44 |     p393_kwargs = {"conf": p393_conf, "date": s1, "year": s2, "test": test}
45 |     stream = yield AsyncPipe("itembuilder", **p393_kwargs).list
46 | 
47 |     for i in stream:
48 |         pprint(i)
49 | 


--------------------------------------------------------------------------------
/examples/wired.py:
--------------------------------------------------------------------------------
 1 | from pprint import pprint
 2 | from riko.bado import coroutine
 3 | from riko.collections import SyncPipe, AsyncPipe
 4 | 
 5 | p120_conf = {"type": "text"}
 6 | p120_inputs = {"format": "%B %d, %Y"}
 7 | p112_conf = {"type": "date", "default": "5/4/82", "prompt": "enter a date"}
 8 | p151_conf = {"format": {"terminal": "format", "path": "format"}}
 9 | p100_conf = {
10 |     "attrs": {"value": {"terminal": "value", "path": "dateformat"}, "key": "date"}
11 | }
12 | 
13 | p120_kwargs = {"conf": p120_conf, "inputs": p120_inputs, "assign": "format"}
14 | 
15 | 
16 | def pipe(test=False):
17 |     s1 = SyncPipe("input", test=test, **p120_kwargs).output
18 |     s2 = (
19 |         SyncPipe("input", conf=p112_conf, test=test)
20 |         .dateformat(conf=p151_conf, format=s1)
21 |         .output
22 |     )
23 | 
24 |     stream = SyncPipe("itembuilder", conf=p100_conf, value=s2, test=test).list
25 | 
26 |     for i in stream:
27 |         pprint(i)
28 | 
29 |     return stream
30 | 
31 | 
32 | @coroutine
33 | def async_pipe(reactor, test=False):
34 |     s1 = yield AsyncPipe("input", test=test, **p120_kwargs).output
35 |     s2 = yield (
36 |         AsyncPipe("input", conf=p112_conf, test=test)
37 |         .dateformat(conf=p151_conf, format=s1)
38 |         .output
39 |     )
40 | 
41 |     output_kwargs = {"conf": p100_conf, "value": s2, "test": test}
42 |     output = yield (AsyncPipe("itembuilder", **output_kwargs).list)
43 | 
44 |     for i in output:
45 |         pprint(i)
46 | 


--------------------------------------------------------------------------------
/helpers/check-stage:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # A script to disallow syntax errors to be committed
 5 | # by running a checker (lint, pep8, pylint...)  on them
 6 | #
 7 | # to install type ln -s check-stage .git/hooks/pre-commit
 8 | 
 9 | # Redirect output to stderr.
10 | exec 2>&1
11 | 
12 | # set path (necessary for gitx and git-gui)
13 | export PATH=$PATH:/opt/local/bin:/opt/local/sbin:/usr/local/sbin:/usr/local/bin
14 | 
15 | # necessary check for initial commit
16 | if [ git rev-parse --verify HEAD >/dev/null 2>&1 ]; then
17 |   against=HEAD
18 | else
19 |   # Initial commit: diff against an empty tree object
20 |   against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
21 | fi
22 | 
23 | # set Internal Field Separator to newline (dash does not support $'\n')
24 | IFS='
25 | '
26 | 
27 | # get a list of staged files
28 | for LINE in $(git diff-index --cached --full-index $against); do
29 |   SHA=$(echo $LINE | cut -d' ' -f4)
30 |   STATUS=$(echo $LINE | cut -d' ' -f5 | cut -d' ' -f1)
31 |   FILENAME=$(echo $LINE | cut -d' ' -f5 | cut -d' ' -f2)
32 |   FILEEXT=$(echo $FILENAME | sed 's/^.*\.//')
33 | 
34 |   # do not check deleted files
35 |   if [ $STATUS == "D" ]; then
36 |     continue
37 |   fi
38 | 
39 |   # only check files with proper extension
40 |   if [ $FILEEXT == 'php' ]; then
41 |     PROGRAMS='php'
42 |     COMMANDS='php -l'
43 |   elif [ $FILEEXT == 'py' ]; then
44 |     PROGRAMS=$'pep8\npylint'
45 |     COMMANDS=$'pep8 --ignore=W191,E128'
46 |   else
47 |     continue
48 |   fi
49 | 
50 |   for PROGRAM in $PROGRAMS; do
51 |     test $(which $PROGRAM)
52 | 
53 |     if [ $? != 0 ]; then
54 |       echo "$PROGRAM binary does not exist or is not in path"
55 |       exit 1
56 |     fi
57 |   done
58 | 
59 |   # check the staged content for syntax errors
60 |   for COMMAND in $COMMANDS; do
61 |     git cat-file -p $SHA > tmp.txt
62 |     RESULT=$(eval "$COMMAND tmp.txt")
63 | 
64 |     if [ $? != 0 ]; then
65 |       echo "$COMMAND syntax check failed on $FILENAME"
66 |       for LINE in $RESULT; do echo $LINE; done
67 |       rm tmp.txt
68 |       exit 1
69 |     fi
70 |   done
71 | done
72 | 
73 | unset IFS
74 | rm tmp.txt
75 | 
76 | # If there are whitespace errors, print the offending file names and fail.
77 | # exec git diff-index --check --cached $against --
78 | 


--------------------------------------------------------------------------------
/helpers/clean:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # remove build artifacts
 5 | rm -fr build/
 6 | rm -fr dist/
 7 | rm -fr *.egg-info
 8 | 
 9 | # remove Python file artifacts
10 | find . -name '*.pyc' -exec rm -f {} +
11 | find . -name '*.pyo' -exec rm -f {} +
12 | find . -name '*~' -exec rm -f {} +
13 | 


--------------------------------------------------------------------------------
/helpers/pippy:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env sh -u
 2 | #
 3 | 
 4 | cecho(){
 5 |   GREEN="\033[0;32m"
 6 |   YELLOW="\033[1;33m"
 7 |   RED="\033[0;31m"
 8 |   NO_COLOR="\033[0m"
 9 |   printf "${!1}${2} ${NO_COLOR}\n"
10 | }
11 | 
12 | beginswith() { case $2 in "$1"*) true;; *) false;; esac; }
13 | 
14 | _install () {
15 |   package=$1
16 | 
17 |   if beginswith -e "$package"; then
18 |     cecho "GREEN" "installing $package normally…"
19 |     pip install "$package"
20 |   else
21 |     cecho "GREEN" "installing $package from ${PIP_WHEEL_DIR}…"
22 |     pip install --no-index --only-binary=:all: --find-links="$PIP_WHEEL_DIR" "$package"
23 | 
24 |     if [ $? -eq 1 ]; then
25 |       cecho "YELLOW" "$package not found in cache, downloading…"
26 | 
27 |       if beginswith wheel "$package" || beginswith pip "$package"; then
28 |         pip install -U "$package"
29 |       else
30 |         pip download --dest="$PIP_CACHE_DIR" "$package"
31 |         pip wheel --no-index --find-links="$PIP_CACHE_DIR" --wheel-dir="$PIP_WHEEL_DIR" "$package"
32 |         pip install --no-index --only-binary=:all: --find-links="$PIP_WHEEL_DIR" "$package"
33 |       fi
34 |     fi
35 | 
36 |     if [ $? -eq 1 ]; then
37 |       cecho "RED" "$package has no wheel, installing normally…"
38 |       pip install "$package"
39 |     fi
40 |   fi
41 | }
42 | 
43 | parse () {
44 |   file=$1
45 | 
46 |   if beginswith -r "$file"; then
47 |     REQUIREMENTS=`echo "$file" | sed 's/^-r\s*//'`
48 | 
49 |     while read line; do
50 |       parse $line
51 |     done < "$REQUIREMENTS"
52 |   else
53 |     echo "$file"
54 |   fi
55 | }
56 | 
57 | for package in `parse "$@"`; do
58 |   cecho "GREEN" "$package"
59 |   _install "$package"
60 | done
61 | 
62 | 


--------------------------------------------------------------------------------
/helpers/srcdist:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # -*- coding: utf-8 -*-
3 | 
4 | # create a source distribution package
5 | 
6 | python setup.py sdist
7 | gpg --detach-sign -a dist/*.tar.gz
8 | 


--------------------------------------------------------------------------------
/helpers/wheel:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # -*- coding: utf-8 -*-
3 | 
4 | # create a wheel package
5 | 
6 | python setup.py bdist_wheel
7 | gpg --detach-sign -a dist/*.whl
8 | 


--------------------------------------------------------------------------------
/manage.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim: sw=4:ts=4:expandtab
  4 | 
  5 | """ A script to manage development tasks """
  6 | from os import path as p
  7 | from subprocess import call, check_call, CalledProcessError
  8 | from manager import Manager
  9 | 
 10 | manager = Manager()
 11 | BASEDIR = p.dirname(__file__)
 12 | DEF_WHERE = ["riko", "tests", "examples", "setup.py", "manage.py"]
 13 | 
 14 | 
 15 | def _upload():
 16 |     """Upload distribution files"""
 17 |     _uploaddir = p.join(BASEDIR, "dist", "*")
 18 |     url = "https://upload.pypi.org/legacy/"
 19 |     check_call(["twine", "upload", "--repository-url", url, _uploaddir])
 20 | 
 21 | 
 22 | def _sdist():
 23 |     """Create a source distribution package"""
 24 |     check_call(p.join(BASEDIR, "helpers", "srcdist"))
 25 | 
 26 | 
 27 | def _wheel():
 28 |     """Create a wheel package"""
 29 |     check_call(p.join(BASEDIR, "helpers", "wheel"))
 30 | 
 31 | 
 32 | def _clean():
 33 |     """Remove Python file and build artifacts"""
 34 |     check_call(p.join(BASEDIR, "helpers", "clean"))
 35 | 
 36 | 
 37 | @manager.command
 38 | def check():
 39 |     """Check staged changes for lint errors"""
 40 |     exit(call(p.join(BASEDIR, "helpers", "check-stage")))
 41 | 
 42 | 
 43 | @manager.arg("where", "w", help="Modules to check")
 44 | @manager.arg("strict", "s", help="Check with pylint")
 45 | @manager.command
 46 | def lint(where=None, strict=False):
 47 |     """Check style with linters"""
 48 |     extra = where.split(" ") if where else DEF_WHERE
 49 |     args = ["pylint", "--rcfile=tests/pylintrc", "-rn", "-fparseable"]
 50 | 
 51 |     try:
 52 |         if strict:
 53 |             check_call(args + extra)
 54 |         else:
 55 |             check_call(["flake8"] + extra)
 56 |     except CalledProcessError as e:
 57 |         exit(e.returncode)
 58 | 
 59 | 
 60 | @manager.arg("where", "w", help="Modules to check")
 61 | @manager.command
 62 | def prettify(where=None):
 63 |     """Prettify code with black"""
 64 |     extra = where.split(" ") if where else DEF_WHERE
 65 | 
 66 |     try:
 67 |         check_call(["black"] + extra)
 68 |     except CalledProcessError as e:
 69 |         exit(e.returncode)
 70 | 
 71 | 
 72 | @manager.command
 73 | def require():
 74 |     """Create requirements.txt"""
 75 |     cmd = "pip freeze -l | grep -vxFf dev-requirements.txt > requirements.txt"
 76 |     exit(call(cmd, shell=True))
 77 | 
 78 | 
 79 | @manager.arg("where", "w", help="test path", default=None)
 80 | @manager.arg("stop", "x", help="Stop after first error", type=bool, default=False)
 81 | @manager.arg("failed", "f", help="Run failed tests", type=bool, default=False)
 82 | @manager.arg("cover", "c", help="Add coverage report", type=bool, default=False)
 83 | @manager.arg("tox", "t", help="Run tox tests", type=bool, default=False)
 84 | @manager.arg("detox", "d", help="Run detox tests", type=bool, default=False)
 85 | @manager.arg("verbose", "v", help="Use detailed errors", type=bool, default=False)
 86 | @manager.arg(
 87 |     "parallel",
 88 |     "p",
 89 |     help="Run tests in parallel in multiple processes",
 90 |     type=bool,
 91 |     default=False,
 92 | )
 93 | @manager.arg("debug", "D", help="Use nose.loader debugger", type=bool, default=False)
 94 | @manager.command
 95 | def test(where=None, stop=None, **kwargs):
 96 |     """Run nose, tox, and script tests"""
 97 |     opts = "-xv" if stop else "-v"
 98 |     opts += " --with-coverage" if kwargs.get("cover") else ""
 99 |     opts += " --failed" if kwargs.get("failed") else " --with-id"
100 |     opts += " --processes=-1" if kwargs.get("parallel") else ""
101 |     opts += " --detailed-errors" if kwargs.get("verbose") else ""
102 |     opts += " --debug=nose.loader" if kwargs.get("debug") else ""
103 |     opts += " -w %s" % where if where else ""
104 | 
105 |     try:
106 |         if kwargs.get("tox"):
107 |             check_call("tox")
108 |         elif kwargs.get("detox"):
109 |             check_call("detox")
110 |         else:
111 |             check_call(("nosetests %s" % opts).split(" "))
112 |     except CalledProcessError as e:
113 |         exit(e.returncode)
114 | 
115 | 
116 | @manager.command
117 | def register():
118 |     """Register package with PyPI"""
119 |     exit(call("python", p.join(BASEDIR, "setup.py"), "register"))
120 | 
121 | 
122 | @manager.command
123 | def release():
124 |     """Package and upload a release"""
125 |     try:
126 |         _clean()
127 |         _sdist()
128 |         _wheel()
129 |         _upload()
130 |     except CalledProcessError as e:
131 |         exit(e.returncode)
132 | 
133 | 
134 | @manager.command
135 | def build():
136 |     """Create a source distribution and wheel package"""
137 |     try:
138 |         _clean()
139 |         _sdist()
140 |         _wheel()
141 |     except CalledProcessError as e:
142 |         exit(e.returncode)
143 | 
144 | 
145 | @manager.command
146 | def upload():
147 |     """Upload distribution files"""
148 |     try:
149 |         _upload()
150 |     except CalledProcessError as e:
151 |         exit(e.returncode)
152 | 
153 | 
154 | @manager.command
155 | def sdist():
156 |     """Create a source distribution package"""
157 |     try:
158 |         _sdist()
159 |     except CalledProcessError as e:
160 |         exit(e.returncode)
161 | 
162 | 
163 | @manager.command
164 | def wheel():
165 |     """Create a wheel package"""
166 |     try:
167 |         _wheel()
168 |     except CalledProcessError as e:
169 |         exit(e.returncode)
170 | 
171 | 
172 | @manager.command
173 | def clean():
174 |     """Remove Python file and build artifacts"""
175 |     try:
176 |         _clean()
177 |     except CalledProcessError as e:
178 |         exit(e.returncode)
179 | 
180 | 
181 | if __name__ == "__main__":
182 |     manager.main()
183 | 


--------------------------------------------------------------------------------
/optional-requirements.txt:
--------------------------------------------------------------------------------
1 | lxml>=4.5.0,<5.0.0
2 | treq>=18.6.0,<22.0.0
3 | Twisted>=19.10.0,<22.0.0
4 | speedparser3~=0.3.1
5 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=42.0.2", "pkutils>=3.0.0,<4.0.0"]
3 | build-backend = "setuptools.build_meta"
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Babel>=2.8.0,<3.0.0
 2 | chardet>=3.0.4,<4.0.0
 3 | feedparser>=5.2.1,<7.0.0
 4 | meza>=0.42.5,<1.0.0
 5 | Mezmorize>=0.27.0,<1.0.0
 6 | python-dateutil>=2.8.1,<3.0.0
 7 | pygogo>=0.12.0,<2.0.0
 8 | requests>=2.22.0,<3.0.0
 9 | html5lib>=1.0.1,<2.0.0
10 | pytz>=2019.3
11 | 


--------------------------------------------------------------------------------
/riko/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko
 5 | ~~~~
 6 | Provides functions for analyzing and processing streams of structured data
 7 | 
 8 | Examples:
 9 |     basic usage::
10 | 
11 |         >>> from itertools import chain
12 |         >>> from functools import partial
13 |         >>> from riko.modules import itembuilder, strreplace
14 |         >>> from riko.collections import SyncPipe
15 |         >>>
16 |         >>> ib_conf = {
17 |         ...     'attrs': [
18 |         ...         {'key': 'link', 'value': 'www.google.com', },
19 |         ...         {'key': 'title', 'value': 'google', },
20 |         ...         {'key': 'author', 'value': 'Tommy'}]}
21 |         >>>
22 |         >>> sr_conf = {
23 |         ...     'rule': [{'find': 'Tom', 'param': 'first', 'replace': 'Tim'}]}
24 |         >>>
25 |         >>> items = itembuilder.pipe(conf=ib_conf)
26 |         >>> pipe = partial(strreplace.pipe, conf=sr_conf, field='author')
27 |         >>> replaced = map(pipe, items)
28 |         >>> next(chain.from_iterable(replaced)) == {
29 |         ...     'link': 'www.google.com', 'title': 'google',
30 |         ...     'strreplace': 'Timmy', 'author': 'Tommy'}
31 |         True
32 | """
33 | from os import path as p
34 | 
35 | __version__ = "0.67.0"
36 | 
37 | __title__ = "riko"
38 | __package_name__ = "riko"
39 | __author__ = "Reuben Cummings"
40 | __description__ = "A stream processing engine modeled after Yahoo! Pipes."
41 | __email__ = "reubano@gmail.com"
42 | __license__ = "MIT"
43 | __copyright__ = "Copyright 2015 Reuben Cummings"
44 | 
45 | PARENT_DIR = p.abspath(p.dirname(__file__))
46 | ENCODING = "utf-8"
47 | 
48 | 
49 | def get_path(name):
50 |     return "file://%s" % p.join(PARENT_DIR, "data", name)
51 | 


--------------------------------------------------------------------------------
/riko/autorss.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko.autorss
 5 | ~~~~~~~~~~~~
 6 | Provides functions for finding RSS feeds from a site's LINK tags
 7 | """
 8 | import pygogo as gogo
 9 | 
10 | from itertools import chain
11 | from html.parser import HTMLParser
12 | 
13 | from meza.compat import decode
14 | from riko.utils import fetch
15 | from riko.bado import coroutine, return_value, microdom
16 | from riko.bado.io import async_url_open
17 | 
18 | TIMEOUT = 10
19 | logger = gogo.Gogo(__name__, monolog=True).logger
20 | 
21 | 
22 | class LinkParser(HTMLParser):
23 |     def reset(self):
24 |         HTMLParser.reset(self)
25 |         self.entry = iter([])
26 | 
27 |     def handle_starttag(self, tag, attrs):
28 |         entry = dict(attrs)
29 |         alternate = entry.get("rel") == "alternate"
30 |         rss = "rss" in entry.get("type", "")
31 | 
32 |         if (alternate or rss) and "href" in entry:
33 |             entry["link"] = entry["href"]
34 |             entry["tag"] = tag
35 |             self.entry = chain(self.entry, [entry])
36 | 
37 | 
38 | def file2entries(f, parser):
39 |     for line in f:
40 |         parser.feed(decode(line))
41 | 
42 |         for entry in parser.entry:
43 |             yield entry
44 | 
45 | 
46 | def doc2entries(document):
47 |     for node in document.childNodes:
48 |         if hasattr(node, "attributes") and node.attributes:
49 |             entry = node.attributes
50 |             alternate = entry.get("rel") == "alternate"
51 |             rss = "rss" in entry.get("type", "")
52 |         else:
53 |             alternate = rss = None
54 | 
55 |         if (alternate or rss) and "href" in entry:
56 |             entry["link"] = entry["href"]
57 |             entry["tag"] = node.nodeName
58 |             yield entry
59 | 
60 |     for node in document.childNodes:
61 |         for entry in doc2entries(node):
62 |             yield entry
63 | 
64 | 
65 | @coroutine
66 | def async_get_rss(url, convert_charrefs=False):
67 |     try:
68 |         f = yield async_url_open(url, timeout=TIMEOUT)
69 |     except ValueError:
70 |         f = filter(None, url.splitlines())
71 | 
72 |     document = microdom.parse(f, lenient=True)
73 |     return_value(doc2entries(document))
74 | 
75 | 
76 | def get_rss(url, convert_charrefs=False):
77 |     try:
78 |         parser = LinkParser(convert_charrefs=convert_charrefs)
79 |     except TypeError:
80 |         parser = LinkParser()
81 | 
82 |     try:
83 |         f = fetch(url, timeout=TIMEOUT)
84 |     except ValueError:
85 |         f = filter(None, url.splitlines())
86 | 
87 |     return file2entries(f, parser)
88 | 


--------------------------------------------------------------------------------
/riko/bado/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko.bado
 5 | ~~~~~~~~~
 6 | Provides functions for creating asynchronous riko pipes
 7 | 
 8 | Examples:
 9 |     basic usage::
10 | 
11 |         >>> from riko import get_path
12 |         >>> from riko.bado import react
13 | """
14 | 
15 | try:
16 |     from twisted.internet.task import react
17 | except ImportError:
18 |     react = lambda _, _reactor=None: None
19 |     inlineCallbacks = lambda _: lambda: None
20 |     returnValue = lambda _: lambda: None
21 |     backend = "empty"
22 | else:
23 |     from twisted.internet.defer import inlineCallbacks
24 |     from twisted.internet.defer import returnValue
25 | 
26 |     backend = "twisted"
27 | 
28 | 
29 | class Reactor(object):
30 |     fake = False
31 | 
32 | 
33 | reactor = Reactor()
34 | coroutine = inlineCallbacks
35 | return_value = returnValue
36 | _issync = backend == "empty"
37 | _isasync = not _issync
38 | 


--------------------------------------------------------------------------------
/riko/bado/io.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.bado.io
  5 | ~~~~~~~~~~~~
  6 | Provides functions for asynchronously reading files and urls
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko import get_path
 12 |         >>> from riko.bado.io import async_url_open
 13 | """
 14 | import pygogo as gogo
 15 | 
 16 | from io import open
 17 | from tempfile import NamedTemporaryFile
 18 | from os import remove
 19 | 
 20 | from meza.compat import encode
 21 | 
 22 | from . import coroutine, return_value
 23 | 
 24 | try:
 25 |     from twisted.test.proto_helpers import AccumulatingProtocol
 26 | except ImportError:
 27 |     AccumulatingProtocol = object
 28 | else:
 29 |     from twisted.internet.reactor import callLater
 30 |     from twisted.protocols.basic import FileSender
 31 |     from twisted.web.client import getPage, downloadPage
 32 |     from twisted.test.proto_helpers import StringTransport
 33 | 
 34 | logger = gogo.Gogo(__name__, monolog=True).logger
 35 | 
 36 | 
 37 | # http://stackoverflow.com/q/26314586/408556
 38 | # http://stackoverflow.com/q/8157197/408556
 39 | # http://stackoverflow.com/a/33708936/408556
 40 | class FileReader(AccumulatingProtocol):
 41 |     def __init__(self, filename, transform=None, delay=0, verbose=False):
 42 |         self.f = open(filename, "rb")
 43 |         self.transform = transform
 44 |         self.delay = delay
 45 |         self.producer = FileSender()
 46 |         self.logger = gogo.Gogo(__name__, verbose=verbose).logger
 47 | 
 48 |     def cleanup(self, *args):
 49 |         self.f.close()
 50 |         self.producer.stopProducing()
 51 | 
 52 |     def resumeProducing(self):
 53 |         chunk = self.file.read(self.CHUNK_SIZE) if self.file else ""
 54 | 
 55 |         if not chunk:
 56 |             self.file = None
 57 |             self.consumer.unregisterProducer()
 58 | 
 59 |             if self.deferred and self.delay:
 60 |                 callLater(self.delay, self.deferred.callback, self.lastSent)
 61 |             elif self.deferred:
 62 |                 self.deferred.callback(self.lastSent)
 63 | 
 64 |             self.deferred = None
 65 |             return
 66 | 
 67 |     def connectionLost(self, reason):
 68 |         self.logger.debug("connectionLost: %s", reason)
 69 |         self.cleanup()
 70 | 
 71 |     def connectionMade(self):
 72 |         self.logger.debug("Connection made from %s", self.transport.getPeer())
 73 |         args = (self.f, self.transport, self.transform)
 74 |         self.d = self.closedDeferred = self.producer.beginFileTransfer(*args)
 75 | 
 76 |         while not self.d.called:
 77 |             self.producer.resumeProducing()
 78 | 
 79 |         self.d.addErrback(self.logger.error)
 80 |         self.d.addBoth(self.cleanup)
 81 | 
 82 | 
 83 | @coroutine
 84 | def async_read_file(filename, transport, protocol=FileReader, **kwargs):
 85 |     proto = protocol(filename.replace("file://", ""), **kwargs)
 86 |     proto.makeConnection(transport)
 87 |     yield proto.d
 88 |     # return_value(proto.data)
 89 |     return_value(proto.transport.value())
 90 | 
 91 | 
 92 | @coroutine
 93 | def async_get_file(filename, transport, protocol=FileReader, **kwargs):
 94 |     proto = protocol(filename.replace("file://", ""), **kwargs)
 95 |     proto.makeConnection(transport)
 96 |     yield proto.d
 97 |     proto.transport.io.seek(0)
 98 |     return_value(proto.transport.io)
 99 | 
100 | 
101 | @coroutine
102 | def async_url_open(url, timeout=0, **kwargs):
103 |     if url.startswith("http"):
104 |         page = NamedTemporaryFile(delete=False)
105 |         new_url = page.name
106 |         yield downloadPage(encode(url), page, timeout=timeout)
107 |     else:
108 |         page, new_url = None, url
109 | 
110 |     f = yield async_get_file(new_url, StringTransport(), **kwargs)
111 | 
112 |     if not hasattr(f, "name") and url.startswith("file"):
113 |         f.name = url.split("://")[1]
114 | 
115 |     if page:
116 |         page.close()
117 |         remove(page.name)
118 | 
119 |     return_value(f)
120 | 
121 | 
122 | def async_url_read(url, timeout=0, **kwargs):
123 |     if url.startswith("http"):
124 |         content = getPage(encode(url), timeout=timeout)
125 |     else:
126 |         content = async_read_file(url, StringTransport(), **kwargs)
127 | 
128 |     return content
129 | 


--------------------------------------------------------------------------------
/riko/bado/itertools.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.bado.itertools
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | Provides asynchronous ports of various builtin itertools functions
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko import get_path
 12 |         >>> from riko.bado.itertools import coop_reduce
 13 | """
 14 | from functools import partial
 15 | 
 16 | import itertools as it
 17 | 
 18 | from . import coroutine, return_value, reactor
 19 | from .mock import FakeReactor
 20 | 
 21 | try:
 22 |     from twisted.internet.task import Cooperator
 23 | except ImportError:
 24 |     pass
 25 | else:
 26 |     from twisted.internet import task as real_task
 27 |     from twisted.internet.defer import gatherResults
 28 | 
 29 | 
 30 | def get_task():
 31 |     if reactor.fake:
 32 |         task = Cooperator(
 33 |             scheduler=partial(FakeReactor().callLater, FakeReactor._DELAY)
 34 |         )
 35 |     else:
 36 |         task = real_task.Cooperator()
 37 | 
 38 |     return task
 39 | 
 40 | 
 41 | @coroutine
 42 | def coop_reduce(func, iterable, initializer=None):
 43 |     task = get_task()
 44 |     iterable = iter(iterable)
 45 |     x = initializer or next(iterable)
 46 |     result = {}
 47 | 
 48 |     def work(func, it, x):
 49 |         for y in it:
 50 |             result["value"] = x = func(x, y)
 51 |             yield
 52 | 
 53 |     _task = task.cooperate(work(func, iterable, x))
 54 |     yield _task.whenDone()
 55 |     return_value(result["value"])
 56 | 
 57 | 
 58 | def async_reduce(async_func, iterable, initializer=None):
 59 |     it = iter(iterable)
 60 |     x = initializer or next(it)
 61 | 
 62 |     @coroutine
 63 |     def work(async_func, it, x):
 64 |         for y in it:
 65 |             x = yield async_func(x, y)
 66 | 
 67 |         return_value(x)
 68 | 
 69 |     return work(async_func, it, x)
 70 | 
 71 | 
 72 | @coroutine
 73 | def async_map(async_func, iterable, connections=0):
 74 |     """parallel map for deferred callables using cooperative multitasking
 75 |     http://stackoverflow.com/a/20376166/408556
 76 |     """
 77 |     if connections and not reactor.fake:
 78 |         results = []
 79 |         work = (async_func(x).addCallback(results.append) for x in iterable)
 80 |         deferreds = [get_task().coiterate(work) for _ in range(connections)]
 81 |         yield gatherResults(deferreds, consumeErrors=True)
 82 |     else:
 83 |         deferreds = map(async_func, iterable)
 84 |         results = yield gatherResults(deferreds, consumeErrors=True)
 85 | 
 86 |     return_value(results)
 87 | 
 88 | 
 89 | def async_starmap(async_func, iterable):
 90 |     """itertools.starmap for deferred callables"""
 91 |     deferreds = it.starmap(async_func, iterable)
 92 |     return gatherResults(deferreds, consumeErrors=True)
 93 | 
 94 | 
 95 | def async_dispatch(split, *async_funcs, **kwargs):
 96 |     return async_starmap(lambda item, f: f(item), zip(split, async_funcs))
 97 | 
 98 | 
 99 | def async_broadcast(item, *async_funcs, **kwargs):
100 |     return async_dispatch(it.repeat(item), *async_funcs, **kwargs)
101 | 


--------------------------------------------------------------------------------
/riko/bado/mock.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko.bado.mock
 5 | ~~~~~~~~~~~~~~
 6 | Provides classes for mocking a reactor during tests
 7 | 
 8 | Examples:
 9 |     basic usage::
10 | 
11 |         >>> from riko import get_path
12 |         >>> from riko.bado.mock import FakeReactor
13 | """
14 | import pygogo as gogo
15 | 
16 | from . import reactor
17 | 
18 | try:
19 |     from twisted.test.proto_helpers import MemoryReactorClock
20 | except ImportError:
21 |     MemoryReactorClock = object
22 |     FakeReactor = lambda _: lambda: None
23 | 
24 | logger = gogo.Gogo(__name__, monolog=True).logger
25 | 
26 | 
27 | class FakeReactor(MemoryReactorClock):
28 |     """A fake reactor to be used in tests. This reactor doesn't actually do
29 |     much that's useful yet. It accepts TCP connection setup attempts, but
30 |     they will never succeed.
31 | 
32 |     Examples:
33 |         >>> import sys
34 |         >>>
35 |         >>> try:
36 |         ...     from twisted import internet
37 |         ... except ImportError:
38 |         ...     pass
39 |         ... else:
40 |         ...     from twisted.internet.fdesc import readFromFD, setNonBlocking
41 |         ...     FileDescriptor = internet.abstract.FileDescriptor
42 |         ...
43 |         ...     reactor = FakeReactor()
44 |         ...     f = FileDescriptor(reactor)
45 |         ...     f.fileno = sys.__stdout__.fileno
46 |         ...     fd = f.fileno()
47 |         ...     setNonBlocking(fd)
48 |         ...     readFromFD(fd, print)
49 |     """
50 | 
51 |     _DELAY = 1
52 | 
53 |     def __init__(self):
54 |         super(FakeReactor, self).__init__()
55 |         reactor.fake = True
56 |         msg = "Attention! Running fake reactor."
57 |         logger.debug(f"{msg} Some deferreds may not work as intended.")
58 | 
59 |     def callLater(self, when, what, *args, **kwargs):
60 |         """Schedule a unit of work to be done later."""
61 |         delayed = super(FakeReactor, self).callLater(when, what, *args, **kwargs)
62 |         self.pump()
63 |         return delayed
64 | 
65 |     def pump(self):
66 |         """Perform scheduled work"""
67 |         self.advance(self._DELAY)
68 | 


--------------------------------------------------------------------------------
/riko/bado/requests.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko.bado.requests
 5 | ~~~~~~~~~~~~~~~~~~
 6 | Provides functions for asynchronously fetching web pages
 7 | 
 8 | Examples:
 9 |     basic usage::
10 | 
11 |         >>> from riko import get_path
12 |         >>> from riko.bado import requests as treq
13 | """
14 | 
15 | try:
16 |     import treq
17 | except ImportError:
18 |     get = lambda _: lambda: None
19 |     json_content = lambda _: lambda: None
20 | else:
21 |     get = treq.get
22 |     json = treq.json_content
23 | 


--------------------------------------------------------------------------------
/riko/bado/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko.bado.util
 5 | ~~~~~~~~~~~~~~
 6 | Provides functions for creating asynchronous riko pipes
 7 | 
 8 | Examples:
 9 |     basic usage::
10 | 
11 |         >>> from riko import get_path
12 |         >>> from riko.bado.util import xml2etree
13 | """
14 | from os import environ
15 | from sys import executable
16 | from functools import partial
17 | 
18 | from riko.parsers import _make_content, entity2text
19 | 
20 | try:
21 |     from twisted.internet.defer import maybeDeferred, Deferred
22 | except ImportError:
23 |     maybeDeferred = lambda *args: None
24 | else:
25 |     from twisted.internet import defer
26 |     from twisted.internet.utils import getProcessOutput
27 |     from twisted.internet.reactor import callLater
28 | 
29 |     from . import microdom
30 |     from .microdom import EntityReference
31 | 
32 |     async_none = defer.succeed(None)
33 |     async_return = partial(defer.succeed)
34 |     async_partial = lambda f, **kwargs: partial(maybeDeferred, f, **kwargs)
35 | 
36 | 
37 | def async_sleep(seconds):
38 |     d = Deferred()
39 |     callLater(seconds, d.callback, None)
40 |     return d
41 | 
42 | 
43 | def defer_to_process(command):
44 |     return getProcessOutput(executable, ["-c", command], environ)
45 | 
46 | 
47 | def xml2etree(f, xml=True):
48 |     readable = hasattr(f, "read")
49 | 
50 |     if xml and readable:
51 |         parse = microdom.parseXML
52 |     elif readable:
53 |         parse = partial(microdom.parse, lenient=True)
54 |     elif xml:
55 |         parse = microdom.parseXMLString
56 |     else:
57 |         parse = partial(microdom.parseString, lenient=True)
58 | 
59 |     return parse(f)
60 | 
61 | 
62 | def etree2dict(element, tag="content"):
63 |     """Convert a microdom element tree into a dict imitating how Yahoo Pipes
64 |     does it.
65 | 
66 |     TODO: checkout twisted.words.xish
67 |     """
68 |     i = dict(element.attributes) if hasattr(element, "attributes") else {}
69 |     value = element.nodeValue if hasattr(element, "nodeValue") else None
70 | 
71 |     if isinstance(element, EntityReference):
72 |         value = entity2text(value)
73 | 
74 |     i.update(_make_content(i, value, tag))
75 | 
76 |     for child in element.childNodes:
77 |         tag = child.tagName if hasattr(child, "tagName") else "content"
78 |         value = etree2dict(child, tag)
79 | 
80 |         # try to join the content first since microdom likes to split up
81 |         # elements that contain a mix of text and entity reference
82 |         try:
83 |             i.update(_make_content(i, value, tag, append=False))
84 |         except TypeError:
85 |             i.update(_make_content(i, value, tag))
86 | 
87 |     if ("content" in i) and not set(i).difference(["content"]):
88 |         # element is leaf node and doesn't have attributes
89 |         i = i["content"]
90 | 
91 |     return i
92 | 


--------------------------------------------------------------------------------
/riko/data/capnorth.xml:
--------------------------------------------------------------------------------
 1 | <alert xmlns="urn:oasis:names:tc:emergency:cap:1.1"> 
 2 |   <identifier>PAAQ3703390999</identifier> 
 3 |   <sender>PAAQ@nwws.oes.ca.gov</sender> 
 4 |   <sent>2007-04-01T20:32:26-07:00</sent> 
 5 |   <status>Actual</status> 
 6 |   <msgType>Alert</msgType> 
 7 |   <scope>Public</scope> 
 8 |   <code>WEPA41 PAAQ 020332</code> 
 9 |   <code>TSUWCA</code> 
10 |   <info> 
11 |     <category>Geo</category> 
12 |     <event></event> 
13 |     <urgency>Future</urgency> 
14 |     <severity>Severe</severity> 
15 |     <certainty>Possible</certainty> 
16 |     <expires>2007-04-02T00:32:26-07:00</expires> 
17 |     <senderName></senderName> 
18 |     <headline> THIS TSUNAMI ADVISORY IS FOR ALASKA/ BRITISH COLUMBIA/ WASHINGTON/ OREGON
19 |             AND CALIFORNIA ONLY
20 |             </headline> 
21 |     <description>NO - REPEAT NO - WATCH OR WARNING IS IN EFFECT FOR THE STATES
22 |             AND PROVINCES LISTED ABOVE.
23 |             A TSUNAMI HAS BEEN OBSERVED AT THE FOLLOWING SITES
24 |             LOCATION                   LAT    LON     TIME        AMPL
25 |             ------------------------  -----  ------  -------     -----------
26 |             HONIARA  SOLOMAN ISLAND    9.4S  159.9E  2252UTC     0.21M/0.7FT
27 |             PORT VILA  VANUATU        17.8S  168.3E  2327UTC     0.12M/0.4FT
28 |             MANUS  PAPUA NEW GUINEA    2.0S  147.4E  0017UTC     0.09M/0.3FT
29 |             CAPE FERGUSON AUST.       19.3S  147.1E  0135UTC     0.11M/0.4FT
30 |             THE TSUNAMI HAS NOT BEEN OBSERVED ON TIDE GAGES TO THE NORTH
31 |             OF THE SOLOMON ISLANDS. FORECAST MODELS INDICATE THE TSUNAMI
32 |             ENERGY WILL BE MAINLY CONTAINED TO THE SOUTH OF THE SOLOMON
33 |              ISLANDS. THE PACIFIC TSUNAMI WARNING CENTER HAS RECEIVED REPORTS
34 |             OF TSUNAMI-RELATED FATALITIES IN SE PAPUA NG AND THE SOLOMON IS.
35 |             TIME - TIME OF MEASUREMENT
36 |             AMPL - TSUNAMI AMPLITUDES ARE MEASURED RELATIVE TO NORMAL SEA
37 |             LEVEL. THESE ARE NOT CREST-TO-TROUGH HEIGHTS.
38 |             EVALUATION
39 |             BASED ON THE LOCATION - MAGNITUDE AND HISTORIC TSUNAMI RECORDS
40 |             THE EARTHQUAKE WAS NOT SUFFICIENT TO GENERATE A TSUNAMI DAMAGING
41 |             TO CALIFORNIA/ OREGON/ WASHINGTON/ BRITISH COLUMBIA OR ALASKA.
42 |              SOME OF THESE AREAS MAY EXPERIENCE NON-DAMAGING SEA LEVEL CHANGES.
43 |             PRELIMINARY EARTHQUAKE PARAMETERS
44 |             MAGNITUDE - 8.1
45 |             TIME      - 1240 AKDT APR 01 2007
46 |             1340  PDT APR 01 2007
47 |             2040  UTC APR 01 2007
48 |             LOCATION  - 8.6 SOUTH 157.2 EAST
49 |             - SOLOMON ISLANDS
50 |             DEPTH     - 17 MILES/28 KM
51 |             THE PACIFIC TSUNAMI WARNING CENTER IN EWA BEACH HAWAII HAS
52 |             ISSUED A TSUNAMI WARNING FOR AREAS OF THE PACIFIC OUTSIDE OF
53 |             CALIFORNIA/ OREGON/ WASHINGTON/ BRITISH COLUMBIA AND ALASKA.
54 |             TSUNAMI ADVISORIES ARE ISSUED TO REGIONS NOT PRESENTLY IN A
55 |             TSUNAMI WARNING OR WATCH WHEN A TSUNAMI WARNING HAS BEEN ISSUED
56 |             FOR OTHER AREAS OF THE PACIFIC.  NO TSUNAMI WARNING OR WATCH
57 |             IS IN EFFECT FOR CALIFORNIA/ OREGON/ WASHINGTON/ BRITISH
58 |             COLUMBIA AND ALASKA.
59 |             </description> 
60 |     <parameter> 
61 |       <valueName>route</valueName> 
62 |       <value>NCS-NORTH.localhost.localdomain;capserverNorth</value> 
63 |     </parameter> 
64 |     <area> 
65 |       <areaDesc> TSUNAMI MESSAGE NUMBER 7 NWS WEST COAST/ALASKA TSUNAMI WARNING CENTER PALMER AK</areaDesc> 
66 |       <polygon>41.857864,-124.45808 41.99844,-124.16689 41.9884,-123.98616 41.797623,-123.98616 41.4964,-123.88574 41.024487,-123.84558 41.064648,-123.88574 40.422043,-124.15685 39.79952,-123.654816 39.036423,-123.52428 38.49423,-122.982086 38.18296,-122.67082 38.29341,-122.38969 38.223125,-122.11858 37.881744,-122.178825 37.881744,-122.178825 37.40983,-121.87762 37.40983,-121.87762 37.23914,-122.05834 36.978077,-121.606514 36.43588,-121.646675 35.200867,-120.48195 34.64863,-120.40163 34.5181,-119.50799 34.176716,-118.45373 33.915657,-118.29307 33.75501,-117.80107 33.092316,-117.1384 32.540077,-117.00786 32.509956,-118.16254 33.1124,-119.62849 33.98594,-120.55223 33.98594,-120.55223 37.751213,-123.34355 40.341717,-124.59864 41.857864,-124.45808</polygon> 
67 |       <geocode> 
68 |         <valueName>UGC</valueName> 
69 |         <value>PKZ176-175-170&gt;172-155-150-132-136&gt;138-141-140-120-121-125&gt;130- 051&gt;053-041&gt;043-011&gt;013-021-022-031&gt;036-PZZ130&gt;135-150-153-156- 110-250-210-255-350-353-356-450-455-550-530-535-555-670-673-650- 655-750-AKZ191-185-181-171-145-111-101-121-125-131-135-017&gt;029- WAZ001-503-506&gt;511-514&gt;517-021-ORZ001-002-021-022-CAZ001-002- 505-506-006-508-509-514-515-009-034-035-039&gt;046-087-020502-</value> 
70 |       </geocode> 
71 |     </area> 
72 |   </info> 
73 | </alert> 
74 | 


--------------------------------------------------------------------------------
/riko/data/currencies.csv:
--------------------------------------------------------------------------------
  1 | code,location
  2 | AED,United Arab Emirates
  3 | AFN,Afghanistan
  4 | ALL,Albania
  5 | AMD,Armenia
  6 | ANG,Curaçao
  7 | AOA,Angola
  8 | ARS,Argentina
  9 | AUD,Australia
 10 | AWG,Aruba
 11 | AZN,Azerbaijan
 12 | BAM,Bosnia and Herzegovina
 13 | BBD,Barbados
 14 | BDT,Bangladesh
 15 | BGN,Bulgaria
 16 | BHD,Bahrain
 17 | BIF,Burundi
 18 | BMD,Bermuda
 19 | BND,Brunei
 20 | BOB,Bolivia
 21 | BOV,Bolivia
 22 | BRL,Brazil
 23 | BSD,Bahamas
 24 | BTN,Bhutan
 25 | BWP,Botswana
 26 | BYN,Belarus
 27 | BYR,Belarus
 28 | BZD,Belize
 29 | CAD,Canada
 30 | CDF,Democratic Republic of the Congo
 31 | CHE,Switzerland
 32 | CHF,Switzerland
 33 | CHW,Switzerland
 34 | CLF,Chile
 35 | CLP,Chile
 36 | CNY,China
 37 | COP,Colombia
 38 | COU,Colombia
 39 | CRC,Costa Rica
 40 | CUC,Cuba
 41 | CUP,Cuba
 42 | CVE,Cape Verde
 43 | CZK,Czech Republic
 44 | DJF,Djibouti
 45 | DKK,Denmark
 46 | DOP,Dominican Republic
 47 | DZD,Algeria
 48 | EGP,Egypt
 49 | ERN,Eritrea
 50 | ETB,Ethiopia
 51 | EUR,European Union
 52 | FJD,Fiji
 53 | FKP,Falkland Islands
 54 | GBP,United Kingdom
 55 | GEL,Georgia
 56 | GHS,Ghana
 57 | GIP,Gibraltar
 58 | GMD,Gambia
 59 | GNF,Guinea
 60 | GTQ,Guatemala
 61 | GYD,Guyana
 62 | HKD,Hong Kong
 63 | HNL,Honduras
 64 | HRK,Croatia
 65 | HTG,Haiti
 66 | HUF,Hungary
 67 | IDR,Indonesia
 68 | ILS,Israel
 69 | INR,India
 70 | IQD,Iraq
 71 | IRR,Iran
 72 | ISK,Iceland
 73 | JMD,Jamaica
 74 | JOD,Jordan
 75 | JPY,Japan
 76 | KES,Kenya
 77 | KGS,Kyrgyzstan
 78 | KHR,Cambodia
 79 | KMF,Comoros
 80 | KPW,North Korea
 81 | KRW,South Korea
 82 | KWD,Kuwait
 83 | KYD,Cayman Islands
 84 | KZT,Kazakhstan
 85 | LAK,Laos
 86 | LBP,Lebanon
 87 | LKR,Sri Lanka
 88 | LRD,Liberia
 89 | LSL,Lesotho
 90 | LYD,Libya
 91 | MAD,Morocco
 92 | MDL,Moldova
 93 | MGA,Madagascar
 94 | MKD,Macedonia
 95 | MMK,Myanmar
 96 | MNT,Mongolia
 97 | MOP,Macao
 98 | MRO,Mauritania
 99 | MUR,Mauritius
100 | MVR,Maldives
101 | MWK,Malawi
102 | MXN,Mexico
103 | MXV,Mexico
104 | MYR,Malaysia
105 | MZN,Mozambique
106 | NAD,Namibia
107 | NGN,Nigeria
108 | NIO,Nicaragua
109 | NOK,Norway
110 | NPR,Nepal
111 | NZD,New Zealand
112 | OMR,Oman
113 | PAB,Panama
114 | PEN,Peru
115 | PGK,Papua New Guinea
116 | PHP,Philippines
117 | PKR,Pakistan
118 | PLN,Poland
119 | PYG,Paraguay
120 | QAR,Qatar
121 | RON,Romania
122 | RSD,Serbia
123 | RUB,Russia
124 | RWF,Rwanda
125 | SAR,Saudi Arabia
126 | SBD,Solomon Islands
127 | SCR,Seychelles
128 | SDG,Sudan
129 | SEK,Sweden
130 | SGD,Singapore
131 | SHP,Saint Helena
132 | SLL,Sierra Leone
133 | SOS,Somalia
134 | SRD,Suriname
135 | SSP,South Sudan
136 | STD,São Tomé and Príncipe
137 | SVC,El Salvador
138 | SYP,Syria
139 | SZL,Swaziland
140 | THB,Thailand
141 | TJS,Tajikistan
142 | TMT,Turkmenistan
143 | TND,Tunisia
144 | TOP,Tonga
145 | TRY,Turkey
146 | TTD,Trinidad and Tobago
147 | TWD,Taiwan
148 | TZS,Tanzania
149 | UAH,Ukraine
150 | UGX,Uganda
151 | USD,United States
152 | USN,United States
153 | UYI,Uruguay
154 | UYU,Uruguay
155 | UZS,Uzbekistan
156 | VEF,Venezuela
157 | VND,Vietnam
158 | VUV,Vanuatu
159 | WST,Samoa
160 | XAF,Cameroon
161 | XCD,Anguilla
162 | XOF,Benin
163 | XPF,French Polynesia
164 | YER,Yemen
165 | ZAR,South Africa
166 | ZMW,Zambia
167 | ZWL,Zimbabwe
168 | 


--------------------------------------------------------------------------------
/riko/data/fourtitude.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="iso-8859-1"?>
  2 | <rss version="0.91">
  3 |   <channel>
  4 |     <title>Fourtitude.com</title>
  5 |     <link>http://www.fourtitude.com</link>
  6 |     <description>The site for the Audi Enthusiast</description>
  7 |     <language>en-us</language>
  8 |     <copyright>(C) 2005 Vortex Media Group Inc.</copyright>
  9 |     <managingEditor>info@fourtitude.com</managingEditor>
 10 |     <webMaster>info@fourtitude.com</webMaster>
 11 |     <ttl>5</ttl>
 12 |     <image>
 13 |       <title>Fourtitude.com</title>
 14 |       <width>144</width>
 15 |       <height>31</height>
 16 |       <link>http://www.fourtitde.com</link>
 17 |       <url>http://www.fourtitude.com/logorss.gif</url>
 18 |     </image>
 19 |     <item>
 20 |       <title>Audi Sport Travels to British DTM Round with Fondest Memories</title>
 21 |       <link>http://www.fourtitude.com/news/publish/Motorsport_News/article_7760.shtml</link>
 22 |       <pubDate>May 11, 2012 10:01:00 EST</pubDate>
 23 |       <description>
 24 |         <![CDATA[]]>
 25 |       </description>
 26 |       <category>Motorsport News</category>
 27 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Motorsport_News/article_7760.shtml</guid>
 28 |     </item>
 29 |     <item>
 30 |       <title>Audi Hungaria Celebrates Roofing Ceremony for New A3 Plant</title>
 31 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7759.shtml</link>
 32 |       <pubDate>May 11, 2012 09:53:00 EST</pubDate>
 33 |       <description>
 34 |         <![CDATA[]]>
 35 |       </description>
 36 |       <category>Audi News</category>
 37 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7759.shtml</guid>
 38 |     </item>
 39 |     <item>
 40 |       <title>Preview: Audi at the Worthersee Tour</title>
 41 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7758.shtml</link>
 42 |       <pubDate>May 11, 2012 09:33:00 EST</pubDate>
 43 |       <description>
 44 |         <![CDATA[]]>
 45 |       </description>
 46 |       <category>Audi News</category>
 47 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7758.shtml</guid>
 48 |     </item>
 49 |     <item>
 50 |       <title>Audi e-bike Worthersee: High-End Sports Machine</title>
 51 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7757.shtml</link>
 52 |       <pubDate>May 11, 2012 09:05:00 EST</pubDate>
 53 |       <description>
 54 |         <![CDATA[]]>
 55 |       </description>
 56 |       <category>Audi News</category>
 57 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7757.shtml</guid>
 58 |     </item>
 59 |     <item>
 60 |       <title>Pattern of Growth Continues for Audi Group: Record Highs in Deliveries and Earnings</title>
 61 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7756.shtml</link>
 62 |       <pubDate>May 11, 2012 08:26:00 EST</pubDate>
 63 |       <description>
 64 |         <![CDATA[]]>
 65 |       </description>
 66 |       <category>Audi News</category>
 67 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7756.shtml</guid>
 68 |     </item>
 69 |     <item>
 70 |       <title>The New Audi Environmental Magazine: Step by Step to a CO2-Neutral Site</title>
 71 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7755.shtml</link>
 72 |       <pubDate>May 11, 2012 08:19:00 EST</pubDate>
 73 |       <description>
 74 |         <![CDATA[]]>
 75 |       </description>
 76 |       <category>Audi News</category>
 77 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7755.shtml</guid>
 78 |     </item>
 79 |     <item>
 80 |       <title>Audi ultra: Toward Le Mans Victory with Lightweight Design</title>
 81 |       <link>http://www.fourtitude.com/news/publish/Motorsport_News/article_7754.shtml</link>
 82 |       <pubDate>May 11, 2012 08:07:00 EST</pubDate>
 83 |       <description>
 84 |         <![CDATA[]]>
 85 |       </description>
 86 |       <category>Motorsport News</category>
 87 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Motorsport_News/article_7754.shtml</guid>
 88 |     </item>
 89 |     <item>
 90 |       <title>High-Voltage Battery Technology at Audi: Core Competence in Ingolstadt</title>
 91 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7753.shtml</link>
 92 |       <pubDate>May 11, 2012 07:57:00 EST</pubDate>
 93 |       <description>
 94 |         <![CDATA[]]>
 95 |       </description>
 96 |       <category>Audi News</category>
 97 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7753.shtml</guid>
 98 |     </item>
 99 |     <item>
100 |       <title>Audi AG: North American Growth Region with Significantly Increased Sales</title>
101 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7752.shtml</link>
102 |       <pubDate>May 11, 2012 07:47:00 EST</pubDate>
103 |       <description>
104 |         <![CDATA[]]>
105 |       </description>
106 |       <category>Audi News</category>
107 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7752.shtml</guid>
108 |     </item>
109 |     <item>
110 |       <title>Audi Continues Progress on High-Voltage Battery Project House at Ingolstadt/Gaimersheim Site</title>
111 |       <link>http://www.fourtitude.com/news/publish/Audi_News/article_7751.shtml</link>
112 |       <pubDate>May 11, 2012 07:37:00 EST</pubDate>
113 |       <description>
114 |         <![CDATA[]]>
115 |       </description>
116 |       <category>Audi News</category>
117 |       <guid isPermaLink="false">http://www.fourtitude.com/news/publish/Audi_News/article_7751.shtml</guid>
118 |     </item>
119 |   </channel>
120 | </rss>
121 | 


--------------------------------------------------------------------------------
/riko/data/lorem.txt:
--------------------------------------------------------------------------------
 1 | What is Lorem Ipsum?
 2 | Lorem Ipsum is simply dummy text of the printing and typesetting industry.
 3 | Lorem Ipsum has been the industry's standard dummy text ever since the 1500s,
 4 | when an unknown printer took a galley of type and scrambled it to make a type
 5 | specimen book. It has survived not only five centuries, but also the leap
 6 | into electronic typesetting, remaining essentially unchanged. It was
 7 | popularised in the 1960s with the release of Letraset sheets containing Lorem
 8 | Ipsum passages, and more recently with desktop publishing software like Aldus
 9 | PageMaker including versions of Lorem Ipsum.
10 | 
11 | Why do we use it?
12 | It is a long established fact that a reader will be distracted by the readable
13 | content of a page when looking at its layout. The point of using Lorem Ipsum
14 | is that it has a more-or-less normal distribution of letters, as opposed to
15 | using 'Content here, content here', making it look like readable English. Many
16 | desktop publishing packages and web page editors now use Lorem Ipsum as their
17 | default model text, and a search for 'lorem ipsum' will uncover many web sites
18 | still in their infancy. Various versions have evolved over the years, sometimes
19 | by accident, sometimes on purpose (injected humour and the like).
20 | 
21 | 
22 | Where does it come from?
23 | Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots
24 | in a piece of classical Latin literature from 45 BC, making it over 2000 years
25 | old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia,
26 | looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum
27 | passage, and going through the cites of the word in classical literature,
28 | discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and
29 | 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by
30 | Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very
31 | popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor
32 | sit amet..", comes from a line in section 1.10.32.


--------------------------------------------------------------------------------
/riko/data/places.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <zAppointments reminder="15">
 3 |   <appointment>
 4 |     <begin>1181251680</begin>
 5 |     <uid>040000008200E000</uid>
 6 |     <alarmTime>1181572063</alarmTime>
 7 |     <places>
 8 |       <placekey>West Virginia</placekey>
 9 |       <placekey>Wisconsin</placekey>
10 |       <placekey>Wyoming</placekey>
11 |       <placekey>Puerto Rico</placekey>
12 |       <placekey>U.S. Virgin Islands</placekey>
13 |     </places>
14 |     <duration>1800</duration>
15 |     <subject>Bring pizza home</subject>
16 |   </appointment>
17 |   <appointment>
18 |     <begin>1234360800</begin>
19 |     <uid>604f4792-eb89-478b-a14f-dd34d3cc6c21-1234360800</uid>
20 |     <alarmTime>1181572063</alarmTime>
21 |     <places>
22 |       <placekey>Arusha</placekey>
23 |       <placekey>Nairobi</placekey>
24 |       <placekey>Joburg</placekey>
25 |       <placekey>Moshi</placekey>
26 |       <placekey>Kampala</placekey>
27 |     </places>
28 |     <duration>1800</duration>
29 |     <subject>Check MS Office website for updates</subject>
30 |   </appointment>
31 | </zAppointments>
32 | 


--------------------------------------------------------------------------------
/riko/data/quote.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "disclaimer": "Usage subject to terms: https://openexchangerates.org/terms",
  3 |   "license": "https://openexchangerates.org/license",
  4 |   "timestamp": 1534633200,
  5 |   "base": "USD",
  6 |   "rates": {
  7 |     "AED": 3.673181,
  8 |     "AFN": 72.755844,
  9 |     "ALL": 110,
 10 |     "AMD": 482.128618,
 11 |     "ANG": 1.844499,
 12 |     "AOA": 268.6005,
 13 |     "ARS": 29.75,
 14 |     "AUD": 1.3668,
 15 |     "AWG": 1.793003,
 16 |     "AZN": 1.7025,
 17 |     "BAM": 1.717049,
 18 |     "BBD": 2,
 19 |     "BDT": 84.493,
 20 |     "BGN": 1.70985,
 21 |     "BHD": 0.37717,
 22 |     "BIF": 1785,
 23 |     "BMD": 1,
 24 |     "BND": 1.510506,
 25 |     "BOB": 6.909307,
 26 |     "BRL": 3.910602,
 27 |     "BSD": 1,
 28 |     "BTC": 0.000156174096,
 29 |     "BTN": 70.07086,
 30 |     "BWP": 10.877472,
 31 |     "BYN": 2.049958,
 32 |     "BZD": 2.009488,
 33 |     "CAD": 1.305995,
 34 |     "CDF": 1615,
 35 |     "CHF": 0.99561,
 36 |     "CLF": 0.02338,
 37 |     "CLP": 667.7,
 38 |     "CNH": 6.834585,
 39 |     "CNY": 6.87455,
 40 |     "COP": 3005.577576,
 41 |     "CRC": 567.209185,
 42 |     "CUC": 1,
 43 |     "CUP": 25.5,
 44 |     "CVE": 97.1815,
 45 |     "CZK": 22.4652,
 46 |     "DJF": 178,
 47 |     "DKK": 6.5196,
 48 |     "DOP": 49.755,
 49 |     "DZD": 118.820391,
 50 |     "EGP": 17.879,
 51 |     "ERN": 14.994033,
 52 |     "ETB": 27.695,
 53 |     "EUR": 0.873325,
 54 |     "FJD": 2.125348,
 55 |     "FKP": 0.78419,
 56 |     "GBP": 0.78419,
 57 |     "GEL": 2.477417,
 58 |     "GGP": 0.78419,
 59 |     "GHS": 4.875,
 60 |     "GIP": 0.78419,
 61 |     "GMD": 48.175,
 62 |     "GNF": 8935,
 63 |     "GTQ": 7.48659,
 64 |     "GYD": 208.499583,
 65 |     "HKD": 7.850834,
 66 |     "HNL": 24.030079,
 67 |     "HRK": 6.484862,
 68 |     "HTG": 67.361924,
 69 |     "HUF": 282.19,
 70 |     "IDR": 14336.481583,
 71 |     "ILS": 3.660335,
 72 |     "IMP": 0.78419,
 73 |     "INR": 69.795,
 74 |     "IQD": 1190,
 75 |     "IRR": 43156.422013,
 76 |     "ISK": 107.349979,
 77 |     "JEP": 0.78419,
 78 |     "JMD": 134.95,
 79 |     "JOD": 0.710507,
 80 |     "JPY": 110.50517651,
 81 |     "KES": 100.81,
 82 |     "KGS": 68.137481,
 83 |     "KHR": 4071,
 84 |     "KMF": 431.70233,
 85 |     "KPW": 900,
 86 |     "KRW": 1119.5,
 87 |     "KWD": 0.303548,
 88 |     "KYD": 0.833077,
 89 |     "KZT": 359.95,
 90 |     "LAK": 8520,
 91 |     "LBP": 1514.809961,
 92 |     "LKR": 160.430887,
 93 |     "LRD": 154.249852,
 94 |     "LSL": 13.421367,
 95 |     "LYD": 1.39,
 96 |     "MAD": 9.5275,
 97 |     "MDL": 16.650107,
 98 |     "MGA": 3315,
 99 |     "MKD": 53.934737,
100 |     "MMK": 1489.657202,
101 |     "MNT": 2442.166667,
102 |     "MOP": 8.08326,
103 |     "MRO": 357.5,
104 |     "MRU": 35.97,
105 |     "MUR": 34.649,
106 |     "MVR": 15.450044,
107 |     "MWK": 727.061323,
108 |     "MXN": 18.8935,
109 |     "MYR": 4.102481,
110 |     "MZN": 59.041109,
111 |     "NAD": 14.534635,
112 |     "NGN": 361.01,
113 |     "NIO": 31.86,
114 |     "NOK": 8.450943,
115 |     "NPR": 112.109955,
116 |     "NZD": 1.506932,
117 |     "OMR": 0.385058,
118 |     "PAB": 1,
119 |     "PEN": 3.323069,
120 |     "PGK": 3.3172,
121 |     "PHP": 53.294962,
122 |     "PKR": 123.683333,
123 |     "PLN": 3.753402,
124 |     "PYG": 5756.00401,
125 |     "QAR": 3.641,
126 |     "RON": 4.068601,
127 |     "RSD": 103.227962,
128 |     "RUB": 67.0075,
129 |     "RWF": 865.75,
130 |     "SAR": 3.75055,
131 |     "SBD": 7.88911,
132 |     "SCR": 13.588915,
133 |     "SDG": 18.02,
134 |     "SEK": 9.1571,
135 |     "SGD": 1.3713,
136 |     "SHP": 0.78419,
137 |     "SLL": 6542.71,
138 |     "SOS": 578.5,
139 |     "SRD": 7.458,
140 |     "SSP": 130.2634,
141 |     "STD": 21050.59961,
142 |     "STN": 21.55,
143 |     "SVC": 8.748373,
144 |     "SYP": 514.85499,
145 |     "SZL": 14.534541,
146 |     "THB": 33.158,
147 |     "TJS": 9.413073,
148 |     "TMT": 3.499986,
149 |     "TND": 2.755293,
150 |     "TOP": 2.310538,
151 |     "TRY": 6.013976,
152 |     "TTD": 6.73925,
153 |     "TWD": 30.725347,
154 |     "TZS": 2282.465888,
155 |     "UAH": 27.689519,
156 |     "UGX": 3747.579147,
157 |     "USD": 1,
158 |     "UYU": 31.568695,
159 |     "UZS": 7800,
160 |     "VEF": 141572.666667,
161 |     "VND": 23106.485172,
162 |     "VUV": 108.499605,
163 |     "WST": 2.588533,
164 |     "XAF": 572.863647,
165 |     "XAG": 0.06756834,
166 |     "XAU": 0.00078,
167 |     "XCD": 2.70255,
168 |     "XDR": 0.71658,
169 |     "XOF": 572.863647,
170 |     "XPD": 0.00109045,
171 |     "XPF": 104.215394,
172 |     "XPT": 0.00126562,
173 |     "YER": 250.350747,
174 |     "ZAR": 14.56358,
175 |     "ZMW": 10.247,
176 |     "ZWL": 322.355011
177 |   }
178 | }
179 | 


--------------------------------------------------------------------------------
/riko/data/schools.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <response>
 3 |   <data>
 4 |     <row>
 5 |       <district_name>Turkana</district_name>
 6 |       <poverty_rate_2005_06_>94.3</poverty_rate_2005_06_>
 7 |       <number_of_poor_2005_06_>481442</number_of_poor_2005_06_>
 8 |     </row>
 9 |     <row>
10 |       <district_name>Marsabit</district_name>
11 |       <poverty_rate_2005_06_>91.7</poverty_rate_2005_06_>
12 |       <number_of_poor_2005_06_>118786</number_of_poor_2005_06_>
13 |     </row>
14 |     <row>
15 |       <district_name>Mandera</district_name>
16 |       <poverty_rate_2005_06_>87.8</poverty_rate_2005_06_>
17 |       <number_of_poor_2005_06_>225812</number_of_poor_2005_06_>
18 |     </row>
19 |   </data>
20 | </response>
21 | 


--------------------------------------------------------------------------------
/riko/data/users.jyu.fi.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 2 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US">
 3 | 
 4 | <head>
 5 |   <title>HTML tidy service</title>
 6 |   <link rel="stylesheet" href="http://www.w3.org/StyleSheets/base" />
 7 |   <meta name="robots" content="noindex, nofollow" />
 8 | </head>
 9 | 
10 | <body>
11 |   <p>
12 |     <a href="http://www.w3.org/"><img src="http://www.w3.org/Icons/w3c_home" alt="W3C" /></a>
13 |   </p>
14 |   <h1>Tidy your HTML</h1>
15 |   <form method="GET">
16 |     <p>Address of document to tidy:
17 |       <input name="docAddr" value="" />
18 |     </p>
19 |     <p>
20 |       <label>
21 |         <input type="checkbox" name="indent" /> indent</label>
22 |     </p>
23 |     <p>
24 |       <label>
25 |         <input type="checkbox" name="forceXML" /> enforce XML well-formedness of the results</label> (may lead to loss of parts of the originating document if too ill-formed)</p>
26 |     <p>
27 |       <input type="submit" value="get tidy results" />
28 |     </p>
29 |   </form>
30 |   <hr />
31 |   <h2>Stuff used to build this service</h2>
32 |   <ul>
33 |     <li><a href="http://tidy.sourceforge.net/">tidy</a></li>
34 |     <li><a href="http://xmlsoft.org/xmllint.html">xmllint</a> (for enforcing XML well-formedness)</li>
35 |     <li><a href="http://www.python.org/">python</a>, apache, etc.</li>
36 |   </ul>
37 |   <p>See also the <a href="http://dev.w3.org/cvsweb/2000/tidy-svc/">underlying Python script</a>.</p>
38 |   <address>
39 |     script $Revision: 1.15 $ of $Date: 2010/11/22 16:44:06 $
40 |     <br /> by <a href="http://www.w3.org/People/Connolly/">Dan Connolly</a>
41 |     <br /> Further developed and maintained by <a href="http://www.w3.org/People/Dom/">Dominique Hazael-Massieux</a>
42 |   </address>
43 | </body>
44 | 
45 | </html>
46 | 


--------------------------------------------------------------------------------
/riko/data/yql.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?>
 2 | <root>
 3 |   <results>
 4 |     <item>
 5 |       <begin>1181251680</begin>
 6 |       <uid>040000008200E000</uid>
 7 |       <alarmTime>1181572063</alarmTime>
 8 |       <place>Wisconsin</place>
 9 |       <duration>1800</duration>
10 |       <title>Bring pizza home</title>
11 |     </item>
12 |     <item>
13 |       <begin>1234360800</begin>
14 |       <uid>604f4792-eb89-478b-a14f-dd34d3cc6c21-1234360800</uid>
15 |       <alarmTime>1181572063</alarmTime>
16 |       <place>Nairobi</place>
17 |       <duration>1800</duration>
18 |       <title>Check MS Office website for updates</title>
19 |     </item>
20 |   </results>
21 | </root>
22 | 


--------------------------------------------------------------------------------
/riko/dates.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | riko.dates
 5 | ~~~~~~~~~~
 6 | Provides date and time helpers
 7 | """
 8 | from datetime import timedelta, datetime as dt
 9 | from time import strptime
10 | 
11 | import pytz
12 | 
13 | from pytz import utc
14 | from dateutil.tz import gettz, tzoffset
15 | 
16 | DATE_FORMAT = "%m/%d/%Y"
17 | DATETIME_FORMAT = "{0} %H:%M:%S".format(DATE_FORMAT)
18 | TIMEOUT = 60 * 60 * 1
19 | HALF_DAY = 60 * 60 * 12
20 | TODAY = dt.utcnow()
21 | 
22 | 
23 | def gen_tzinfos():
24 |     for zone in pytz.common_timezones:
25 |         try:
26 |             tzdate = pytz.timezone(zone).localize(dt.utcnow(), is_dst=None)
27 |         except pytz.NonExistentTimeError:
28 |             pass
29 |         else:
30 |             tzinfo = gettz(zone)
31 | 
32 |             if tzinfo:
33 |                 yield tzdate.tzname(), tzinfo
34 | 
35 | 
36 | def get_date(unit, count, op):
37 |     new_month = op(TODAY.month, count) % 12 or 12
38 | 
39 |     DATES = {
40 |         "seconds": op(TODAY, timedelta(seconds=count)),
41 |         "minutes": op(TODAY, timedelta(minutes=count)),
42 |         "hours": op(TODAY, timedelta(hours=count)),
43 |         "days": op(TODAY, timedelta(days=count)),
44 |         "weeks": op(TODAY, timedelta(weeks=count)),
45 |         "months": TODAY.replace(month=new_month),
46 |         "years": TODAY.replace(year=op(TODAY.year, count)),
47 |     }
48 | 
49 |     return DATES[unit]
50 | 
51 | 
52 | def normalize_date(date):
53 |     try:
54 |         # See if date is a `time.struct_time`
55 |         # if so, convert it and account for leapseconds
56 |         tt, date = date, dt(*date[:5] + (min(date[5], 59),))
57 |     except TypeError:
58 |         pass
59 |     else:
60 |         is_dst = None if tt[8] == -1 else tt[8]
61 | 
62 |         try:
63 |             tm_zone = tt.tm_zone
64 |         except AttributeError:
65 |             tm_zone = None
66 |             tm_gmtoff = None
67 |         else:
68 |             tm_gmtoff = tt.tm_gmtoff
69 | 
70 |         if tm_zone:
71 |             date = pytz.timezone(tm_zone).localize(date, is_dst=is_dst)
72 |         elif tm_gmtoff:
73 |             offset = tzoffset(None, tm_gmtoff)
74 |             date.replace(tzinfo=offset)
75 | 
76 |     # Set timezone to UTC
77 |     try:
78 |         tzdate = date.astimezone(utc) if date.tzinfo else utc.localize(date)
79 |     except AttributeError:
80 |         tzdate = date
81 | 
82 |     return tzdate
83 | 
84 | 
85 | def get_tt(date):
86 |     formatted = "".join(date.isoformat().rsplit(":", 1))
87 |     sformat = "%Y-%m-%d" if len(formatted) == 10 else "%Y-%m-%dT%H:%M:%S%z"
88 | 
89 |     try:
90 |         tt = strptime(formatted, sformat)
91 |     except ValueError:
92 |         tt = strptime(formatted[:19], "%Y-%m-%dT%H:%M:%S")
93 | 
94 |     return tt
95 | 


--------------------------------------------------------------------------------
/riko/dotdict.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.dotdict
  5 | ~~~~~~~~~~~~
  6 | Provides a class for creating dicts with dot notation access
  7 | """
  8 | import pygogo as gogo
  9 | 
 10 | from functools import reduce
 11 | 
 12 | logger = gogo.Gogo(__name__, monolog=True).logger
 13 | 
 14 | 
 15 | class DotDict(dict):
 16 |     """A dictionary whose keys can be accessed using dot notation
 17 |     >>> r = DotDict({'a': {'content': 'value'}})
 18 |     >>> r.get('a.content') == 'value'
 19 |     True
 20 |     >>> r['a.content'] == 'value'
 21 |     True
 22 |     """
 23 | 
 24 |     def __init__(self, data=None, **kwargs):
 25 |         self.update(data)
 26 | 
 27 |     def _parse_key(self, key=None):
 28 |         try:
 29 |             keys = key.rstrip(".").split(".") if key else []
 30 |         except AttributeError:
 31 |             keys = [key["subkey"]] if key else []
 32 | 
 33 |         return keys
 34 | 
 35 |     def _parse_value(self, value, key, default=None):
 36 |         try:
 37 |             parsed = value[key]
 38 |         except KeyError:
 39 |             try:
 40 |                 parsed = value["value"]
 41 |             except KeyError:
 42 |                 parsed = default
 43 |         except (TypeError, IndexError):
 44 |             if hasattr(value, "append"):
 45 |                 parsed = [v[key] for v in value]
 46 |             else:
 47 |                 parsed = value
 48 | 
 49 |         return default if parsed is None else parsed
 50 | 
 51 |     def __getitem__(self, key):
 52 |         keys = self._parse_key(key)
 53 |         value = super(DotDict, self).__getitem__(keys[0])
 54 | 
 55 |         if len(keys) > 1:
 56 |             return value[".".join(keys[1:])]
 57 |         elif hasattr(value, "keys") and "value" in value:
 58 |             value = value["value"]
 59 | 
 60 |         return DotDict(value) if hasattr(value, "keys") else value
 61 | 
 62 |     def get(self, key=None, default=None, **kwargs):
 63 |         keys = self._parse_key(key)
 64 |         value = DotDict(self.copy())
 65 | 
 66 |         for key in keys:
 67 |             try:
 68 |                 key = int(key)
 69 |             except ValueError:
 70 |                 pass
 71 | 
 72 |             value = self._parse_value(value, key, default)
 73 | 
 74 |         if hasattr(value, "keys") and "terminal" in value:
 75 |             # value fed in from another module
 76 |             stream = kwargs[value["terminal"]]
 77 |             value = next(stream)[value.get("path", "content")]
 78 |         elif hasattr(value, "keys") and "value" in value:
 79 |             value = value["value"]
 80 | 
 81 |         return DotDict(value) if hasattr(value, "keys") else value
 82 | 
 83 |     def delete(self, key):
 84 |         keys = self._parse_key(key)
 85 |         last = keys[-1]
 86 | 
 87 |         try:
 88 |             del reduce(lambda i, k: DotDict(i).get(k), [self] + keys[:-1])[last]
 89 |         except KeyError:
 90 |             pass
 91 | 
 92 |     def set(self, key, value):
 93 |         keys = self._parse_key(key)
 94 |         first = keys[:-1]
 95 |         last = keys[-1]
 96 |         item = self.copy()
 97 |         reduce(lambda i, k: i.setdefault(k, {}), first, item)[last] = value
 98 |         super(DotDict, self).update(item)
 99 | 
100 |     def update(self, data=None):
101 |         if not data:
102 |             return
103 | 
104 |         _dict = dict(data)
105 |         dot_keys = [d for d in _dict if "." in d]
106 | 
107 |         if dot_keys:
108 |             # skip key if a subkey redefines it
109 |             # i.e., 'author.name' has precedence over 'author'
110 |             keys = [".".join(self._parse_key(dk)[:-1]) for dk in dot_keys]
111 |             items = ((k, v) for k, v in _dict.items() if k not in keys)
112 |         else:
113 |             items = _dict.items()
114 | 
115 |         [self.set(key, value) for key, value in items]
116 | 


--------------------------------------------------------------------------------
/riko/modules/count.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.count
  5 | ~~~~~~~~~~~~~~~~~~
  6 | Provides functions for counting the number of items in a stream.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.count import pipe
 12 |         >>>
 13 |         >>> next(pipe({'x': x} for x in range(5))) == {'count': 5}
 14 |         True
 15 | 
 16 | Attributes:
 17 |     OPTS (dict): The default pipe options
 18 |     DEFAULTS (dict): The default parser options
 19 | """
 20 | import itertools as it
 21 | import pygogo as gogo
 22 | 
 23 | from operator import itemgetter
 24 | 
 25 | from . import operator
 26 | 
 27 | OPTS = {"extract": "count_key"}
 28 | DEFAULTS = {"count_key": None}
 29 | logger = gogo.Gogo(__name__, monolog=True).logger
 30 | 
 31 | 
 32 | def parser(stream, key, tuples, **kwargs):
 33 |     """Parses the pipe content
 34 | 
 35 |     Args:
 36 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 37 |             iterator, so consuming it will consume `tuples` as well.
 38 | 
 39 |         key (str): the field to group by.
 40 | 
 41 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 42 |             `item` is an element in the source stream and `objconf` is the item
 43 |             configuration (an Objectify instance). Note: this shares the
 44 |             `stream` iterator, so consuming it will consume `stream` as well.
 45 | 
 46 |         kwargs (dict): Keyword arguments.
 47 | 
 48 |     Kwargs:
 49 |         conf (dict): The pipe configuration.
 50 | 
 51 |     Returns:
 52 |         mixed: The output either a dict or iterable of dicts
 53 | 
 54 |     Examples:
 55 |         >>> from itertools import repeat
 56 |         >>>
 57 |         >>> stream = ({'x': x} for x in range(5))
 58 |         >>> tuples = zip(stream, repeat(None))
 59 |         >>> parser(stream, None, tuples, assign='content') == {'content': 5}
 60 |         True
 61 |         >>> conf = {'count_key': 'word'}
 62 |         >>> kwargs = {'conf': conf}
 63 |         >>> stream = [{'word': 'two'}, {'word': 'one'}, {'word': 'two'}]
 64 |         >>> tuples = zip(stream, repeat(conf['count_key']))
 65 |         >>> counted = parser(stream, conf['count_key'], tuples, **kwargs)
 66 |         >>> next(counted) == {'one': 1}
 67 |         True
 68 |         >>> next(counted) == {'two': 2}
 69 |         True
 70 |     """
 71 |     if key:
 72 |         keyfunc = itemgetter(key)
 73 |         sorted_stream = sorted(stream, key=keyfunc)
 74 |         grouped = it.groupby(sorted_stream, keyfunc)
 75 |         counted = ({key: len(list(group))} for key, group in grouped)
 76 |     else:
 77 |         counted = {kwargs["assign"]: len(list(stream))}
 78 | 
 79 |     return counted
 80 | 
 81 | 
 82 | @operator(DEFAULTS, isasync=True, **OPTS)
 83 | def async_pipe(*args, **kwargs):
 84 |     """An operator that asynchronously and eagerly counts the number of items
 85 |     in a stream. Note that this pipe is not lazy.
 86 | 
 87 |     Args:
 88 |         items (Iter[dict]): The source.
 89 |         kwargs (dict): The keyword arguments passed to the wrapper
 90 | 
 91 |     Kwargs:
 92 |         conf (dict): The pipe configuration. May contain the key 'count_key'.
 93 | 
 94 |             count_key (str): Item attribute to count by. This will group items
 95 |                 in the stream by the given key and report a count for each
 96 |                 group (default: None).
 97 | 
 98 |         assign (str): Attribute to assign parsed content. If `count_key` is set,
 99 |             this is ignored and the group keys are used instead. (default:
100 |             content)
101 | 
102 |     Returns:
103 |         Deferred: twisted.internet.defer.Deferred iterator of the number of
104 |             counted items
105 | 
106 |     Examples:
107 |         >>> from riko.bado import react
108 |         >>> from riko.bado.mock import FakeReactor
109 |         >>>
110 |         >>> def run(reactor):
111 |         ...     callback = lambda x: print(next(x) == {'count': 5})
112 |         ...     items = ({'x': x} for x in range(5))
113 |         ...     d = async_pipe(items)
114 |         ...     return d.addCallbacks(callback, logger.error)
115 |         >>>
116 |         >>> try:
117 |         ...     react(run, _reactor=FakeReactor())
118 |         ... except SystemExit:
119 |         ...     pass
120 |         ...
121 |         True
122 |     """
123 |     return parser(*args, **kwargs)
124 | 
125 | 
126 | @operator(DEFAULTS, **OPTS)
127 | def pipe(*args, **kwargs):
128 |     """An operator that eagerly counts the number of items in a stream.
129 |     Note that this pipe is not lazy.
130 | 
131 |     Args:
132 |         items (Iter[dict]): The source.
133 |         kwargs (dict): The keyword arguments passed to the wrapper
134 | 
135 |     Kwargs:
136 |         conf (dict): The pipe configuration. May contain the key 'count_key'.
137 | 
138 |             count_key (str): Item attribute to count by. This will group items
139 |                 in the stream by the given key and report a count for each
140 |                 group (default: None).
141 | 
142 |         assign (str): Attribute to assign parsed content. If `count_key` is set,
143 |             this is ignored and the group keys are used instead. (default:
144 |             content)
145 | 
146 |     Yields:
147 |         dict: the number of counted items
148 | 
149 |     Examples:
150 |         >>> stream = ({'x': x} for x in range(5))
151 |         >>> next(pipe(stream)) == {'count': 5}
152 |         True
153 |         >>> stream = [{'word': 'two'}, {'word': 'one'}, {'word': 'two'}]
154 |         >>> counted = pipe(stream, conf={'count_key': 'word'})
155 |         >>> next(counted) == {'one': 1}
156 |         True
157 |         >>> next(counted) == {'two': 2}
158 |         True
159 |     """
160 |     return parser(*args, **kwargs)
161 | 


--------------------------------------------------------------------------------
/riko/modules/currencyformat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.currencyformat
  5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for formatting numbers to currency strings.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.currencyformat import pipe
 12 |         >>>
 13 |         >>> next(pipe({'content': '100'}))['currencyformat'] == '$100.00'
 14 |         True
 15 | 
 16 | Attributes:
 17 |     OPTS (dict): The default pipe options
 18 |     DEFAULTS (dict): The default parser options
 19 | """
 20 | from decimal import Decimal
 21 | from babel.numbers import format_currency
 22 | 
 23 | from . import processor
 24 | import pygogo as gogo
 25 | 
 26 | OPTS = {"ftype": "decimal", "field": "content"}
 27 | DEFAULTS = {"currency": "USD"}
 28 | NaN = Decimal("NaN")
 29 | 
 30 | logger = gogo.Gogo(__name__, monolog=True).logger
 31 | 
 32 | 
 33 | def parser(amount, objconf, skip=False, **kwargs):
 34 |     """Parsers the pipe content
 35 | 
 36 |     Args:
 37 |         amount (Decimal): The amount to format
 38 |         objconf (obj): The pipe configuration (an Objectify instance)
 39 |         skip (bool): Don't parse the content
 40 | 
 41 |     Returns:
 42 |         dict: The formatted item
 43 | 
 44 |     Examples:
 45 |         >>> from decimal import Decimal
 46 |         >>> from meza.fntools import Objectify
 47 |         >>>
 48 |         >>> objconf = Objectify({'currency': 'USD'})
 49 |         >>> parser(Decimal('10.33'), objconf) == '$10.33'
 50 |         True
 51 |     """
 52 |     if skip:
 53 |         parsed = kwargs["stream"]
 54 |     elif amount is not None:
 55 |         try:
 56 |             parsed = format_currency(amount, objconf.currency)
 57 |         except ValueError:
 58 |             parsed = NaN
 59 |     else:
 60 |         parsed = NaN
 61 | 
 62 |     return parsed
 63 | 
 64 | 
 65 | @processor(DEFAULTS, isasync=True, **OPTS)
 66 | def async_pipe(*args, **kwargs):
 67 |     """A processor module that asynchronously formats a number to a given
 68 |     currency string.
 69 | 
 70 |     Args:
 71 |         item (dict): The entry to process
 72 |         kwargs (dict): The keyword arguments passed to the wrapper
 73 | 
 74 |     Kwargs:
 75 |         conf (dict): The pipe configuration. May contain the key 'currency'.
 76 | 
 77 |             currency (str): The currency ISO abbreviation (default: USD).
 78 | 
 79 |         assign (str): Attribute to assign parsed content (default:
 80 |             currencyformat)
 81 | 
 82 |         field (str): Item attribute from which to obtain the string to be
 83 |             formatted (default: 'content')
 84 | 
 85 |     Returns:
 86 |         Deferred: twisted.internet.defer.Deferred item with formatted currency
 87 | 
 88 |     Examples:
 89 |         >>> from datetime import date
 90 |         >>> from riko.bado import react
 91 |         >>> from riko.bado.mock import FakeReactor
 92 |         >>>
 93 |         >>> def run(reactor):
 94 |         ...     callback = lambda x: print(next(x)['currencyformat'])
 95 |         ...     d = async_pipe({'content': '10.33'})
 96 |         ...     return d.addCallbacks(callback, logger.error)
 97 |         >>>
 98 |         >>> try:
 99 |         ...     react(run, _reactor=FakeReactor())
100 |         ... except SystemExit:
101 |         ...     pass
102 |         ...
103 |         $10.33
104 |     """
105 |     return parser(*args, **kwargs)
106 | 
107 | 
108 | @processor(DEFAULTS, **OPTS)
109 | def pipe(*args, **kwargs):
110 |     """A processor module that formats a number to a given currency string.
111 | 
112 |     Args:
113 |         item (dict): The entry to process
114 |         kwargs (dict): The keyword arguments passed to the wrapper
115 | 
116 |     Kwargs:
117 |         conf (dict): The pipe configuration. May contain the key 'currency'.
118 | 
119 |             currency (str): The currency ISO abbreviation (default: USD).
120 | 
121 |         assign (str): Attribute to assign parsed content (default:
122 |             currencyformat)
123 | 
124 |         field (str): Item attribute from which to obtain the string to be
125 |             formatted (default: 'content')
126 | 
127 |     Returns:
128 |         dict: an item with formatted date string
129 | 
130 |     Examples:
131 |         >>> next(pipe({'content': '10.33'}))['currencyformat'] == '$10.33'
132 |         True
133 |         >>> conf = {'currency': 'GBP'}
134 |         >>> result = next(pipe({'content': '100'}, conf=conf))
135 |         >>> result['currencyformat'] == '£100.00'
136 |         True
137 |     """
138 |     return parser(*args, **kwargs)
139 | 


--------------------------------------------------------------------------------
/riko/modules/dateformat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.dateformat
  5 | ~~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for formatting dates.
  7 | 
  8 | A wide range of format specifiers can be used to create the output text string.
  9 | The specifiers all begin with a percent sign followed by a single character.
 10 | 
 11 | Here are a few specifiers and how they each format the date/time February 12th,
 12 | 2008 at 8:45 P.M.
 13 | 
 14 |     Specifier                   Formatted Date
 15 |     -------------------------   -------------------------------
 16 |     %m-%d-%Y                    02-12-2008
 17 |     %A, %b %d, %y at %I:%M %p   Tuesday, Feb 12, 08 at 08:45 PM
 18 |     %D 	                        02/12/08
 19 |     %R 	                        20:45
 20 |     %B 	                        February
 21 | 
 22 | Examples:
 23 |     basic usage::
 24 | 
 25 |         >>> from riko.modules.dateformat import pipe
 26 |         >>> from datetime import date
 27 |         >>>
 28 |         >>> next(pipe({'date': date(2015, 5, 4)}))['dateformat']
 29 |         '05/04/2015 00:00:00'
 30 | 
 31 | Attributes:
 32 |     OPTS (dict): The default pipe options
 33 |     DEFAULTS (dict): The default parser options
 34 | """
 35 | from time import strftime
 36 | 
 37 | from . import processor
 38 | import pygogo as gogo
 39 | 
 40 | OPTS = {"field": "date", "ftype": "date"}
 41 | DEFAULTS = {"format": "%m/%d/%Y %H:%M:%S"}
 42 | logger = gogo.Gogo(__name__, monolog=True).logger
 43 | 
 44 | 
 45 | def parser(date, objconf, skip=False, **kwargs):
 46 |     """Obtains the user input
 47 | 
 48 |     Args:
 49 |         date (dict): Must have key 'date' with a date-like object value
 50 |         objconf (obj): The pipe configuration (an Objectify instance)
 51 |         skip (bool): Don't parse the content
 52 | 
 53 |     Returns:
 54 |         dict: The formatted date
 55 | 
 56 |     Examples:
 57 |         >>> from datetime import date
 58 |         >>> from meza.fntools import Objectify
 59 |         >>>
 60 |         >>> objconf = Objectify({'format': '%m/%d/%Y'})
 61 |         >>> parser({'date': date(2015, 5, 4)}, objconf)
 62 |         '05/04/2015'
 63 |     """
 64 |     timetuple = date["date"].timetuple()
 65 |     return kwargs["stream"] if skip else strftime(objconf.format, timetuple)
 66 | 
 67 | 
 68 | @processor(DEFAULTS, isasync=True, **OPTS)
 69 | def async_pipe(*args, **kwargs):
 70 |     """A processor module that asynchronously formats a date.
 71 | 
 72 |     Args:
 73 |         item (dict): The entry to process
 74 |         kwargs (dict): The keyword arguments passed to the wrapper
 75 | 
 76 |     Kwargs:
 77 |         conf (dict): The pipe configuration. May contain the keys 'format' or
 78 |             'field'.
 79 | 
 80 |             format (str): Format string passed to time.strftime (default:
 81 |                 '%m/%d/%Y %H:%M:%S', i.e., '02/12/2008 20:45:00')
 82 | 
 83 |             assign (str): Attribute to assign parsed content (default:
 84 |                 dateformat)
 85 | 
 86 |             field (str): Item attribute from which to obtain the string to be
 87 |                 formatted (default: 'date')
 88 | 
 89 |     Returns:
 90 |         Deferred: twisted.internet.defer.Deferred item with formatted date
 91 | 
 92 |     Examples:
 93 |         >>> from datetime import date
 94 |         >>> from riko.bado import react
 95 |         >>> from riko.bado.mock import FakeReactor
 96 |         >>>
 97 |         >>> def run(reactor):
 98 |         ...     callback = lambda x: print(next(x)['dateformat'])
 99 |         ...     d = async_pipe({'date': date(2015, 5, 4)})
100 |         ...     return d.addCallbacks(callback, logger.error)
101 |         >>>
102 |         >>> try:
103 |         ...     react(run, _reactor=FakeReactor())
104 |         ... except SystemExit:
105 |         ...     pass
106 |         ...
107 |         05/04/2015 00:00:00
108 |     """
109 |     return parser(*args, **kwargs)
110 | 
111 | 
112 | @processor(DEFAULTS, **OPTS)
113 | def pipe(*args, **kwargs):
114 |     """A processor module that formats a date.
115 | 
116 |     Args:
117 |         item (dict): The entry to process
118 |         kwargs (dict): The keyword arguments passed to the wrapper
119 | 
120 |     Kwargs:
121 |         conf (dict): The pipe configuration. May contain the keys 'format' or
122 |             'field'.
123 | 
124 |             format (str): Format string passed to time.strftime (default:
125 |                 '%m/%d/%Y %H:%M:%S', i.e., '02/12/2008 20:45:00')
126 | 
127 |         assign (str): Attribute to assign parsed content (default:
128 |             dateformat)
129 | 
130 |         field (str): Item attribute from which to obtain the string to be
131 |             formatted (default: 'date')
132 | 
133 |     Returns:
134 |         dict: an item with formatted date string
135 | 
136 |     Examples:
137 |         >>> from datetime import date
138 |         >>> item = {'date': date(2015, 5, 4)}
139 |         >>> next(pipe(item))['dateformat']
140 |         '05/04/2015 00:00:00'
141 |         >>> next(pipe(item, conf={'format': '%Y'}))['dateformat']
142 |         '2015'
143 |         >>> next(pipe({'date': '05/04/2015'}))['dateformat']
144 |         '05/04/2015 00:00:00'
145 |     """
146 |     return parser(*args, **kwargs)
147 | 


--------------------------------------------------------------------------------
/riko/modules/feedautodiscovery.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.feedautodiscovery
  5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for finding the all available RSS and Atom feeds in a web
  7 | site.
  8 | 
  9 | Lets you enter a url and then examines those pages for information (like link
 10 | rel tags) about available feeds. If information about more than one feed is
 11 | found, then multiple items are returned. Because more than one feed can be
 12 | returned, the output from this module is often piped into a Fetch Feed module.
 13 | 
 14 | Also note that not all sites provide auto-discovery links on their web site's
 15 | home page. For a simpler alternative, try the Fetch Site Feed Module. It
 16 | returns the content from the first discovered feed.
 17 | 
 18 | Examples:
 19 |     basic usage::
 20 | 
 21 |         >>> from riko import get_path
 22 |         >>> from riko.modules.feedautodiscovery import pipe
 23 |         >>>
 24 |         >>> entry = next(pipe(conf={'url': get_path('bbc.html')}))
 25 |         >>> sorted(entry) == ['href', 'hreflang', 'link', 'rel', 'tag']
 26 |         True
 27 |         >>> entry['link'] == 'file://riko/data/greenhughes.xml'
 28 |         True
 29 | 
 30 | Attributes:
 31 |     OPTS (dict): The default pipe options
 32 |     DEFAULTS (dict): The default parser options
 33 | """
 34 | import pygogo as gogo
 35 | 
 36 | from . import processor
 37 | from riko import autorss
 38 | from riko.utils import get_abspath
 39 | from riko.bado import coroutine, return_value
 40 | 
 41 | 
 42 | OPTS = {"ftype": "none"}
 43 | logger = gogo.Gogo(__name__, monolog=True).logger
 44 | 
 45 | 
 46 | @coroutine
 47 | def async_parser(_, objconf, skip=False, **kwargs):
 48 |     """Asynchronously parses the pipe content
 49 | 
 50 |     Args:
 51 |         _ (None): Ignored
 52 |         objconf (obj): The pipe configuration (an Objectify instance)
 53 |         skip (bool): Don't parse the content
 54 |         kwargs (dict): Keyword arguments
 55 | 
 56 |     Kwargs:
 57 |         stream (dict): The original item
 58 | 
 59 |     Returns:
 60 |         Iter[dict]: Deferred stream
 61 | 
 62 |     Examples:
 63 |         >>> from riko import get_path
 64 |         >>> from riko.bado import react
 65 |         >>> from riko.bado.mock import FakeReactor
 66 |         >>> from meza.fntools import Objectify
 67 |         >>>
 68 |         >>> def run(reactor):
 69 |         ...     callback = lambda x: print(next(x)['link'])
 70 |         ...     objconf = Objectify({'url': get_path('bbc.html')})
 71 |         ...     d = async_parser(None, objconf, stream={})
 72 |         ...     return d.addCallbacks(callback, logger.error)
 73 |         >>>
 74 |         >>> try:
 75 |         ...     react(run, _reactor=FakeReactor())
 76 |         ... except SystemExit:
 77 |         ...     pass
 78 |         ...
 79 |         file://riko/data/greenhughes.xml
 80 |     """
 81 |     if skip:
 82 |         stream = kwargs["stream"]
 83 |     else:
 84 |         url = get_abspath(objconf.url)
 85 |         stream = yield autorss.async_get_rss(url)
 86 | 
 87 |     return_value(stream)
 88 | 
 89 | 
 90 | def parser(_, objconf, skip=False, **kwargs):
 91 |     """Parses the pipe content
 92 | 
 93 |     Args:
 94 |         _ (None): Ignored
 95 |         objconf (obj): The pipe configuration (an Objectify instance)
 96 |         skip (bool): Don't parse the content
 97 |         kwargs (dict): Keyword arguments
 98 | 
 99 |     Kwargs:
100 |         stream (dict): The original item
101 | 
102 |     Returns:
103 |         Iter[dict]: The stream of items
104 | 
105 |     Examples:
106 |         >>> from riko import get_path
107 |         >>> from meza.fntools import Objectify
108 |         >>>
109 |         >>> objconf = Objectify({'url': get_path('bbc.html')})
110 |         >>> result = parser(None, objconf, stream={})
111 |         >>> next(result)['link'] == 'file://riko/data/greenhughes.xml'
112 |         True
113 |     """
114 |     if skip:
115 |         stream = kwargs["stream"]
116 |     else:
117 |         url = get_abspath(objconf.url)
118 |         stream = autorss.get_rss(url)
119 | 
120 |     return stream
121 | 
122 | 
123 | @processor(isasync=True, **OPTS)
124 | def async_pipe(*args, **kwargs):
125 |     """A source that fetches and parses the first feed found on a site.
126 | 
127 |     Args:
128 |         item (dict): The entry to process (not used)
129 |         kwargs (dict): The keyword arguments passed to the wrapper.
130 | 
131 |     Kwargs:
132 |         conf (dict): The pipe configuration. Must contain the key 'url'.
133 | 
134 |             url (str): The web site to fetch
135 | 
136 |     Returns:
137 |         dict: twisted.internet.defer.Deferred an iterator of items
138 | 
139 |     Examples:
140 |         >>> from riko import get_path
141 |         >>> from riko.bado import react
142 |         >>> from riko.bado.mock import FakeReactor
143 |         >>>
144 |         >>> def run(reactor):
145 |         ...     callback = lambda x: print(next(x)['link'])
146 |         ...     d = async_pipe(conf={'url': get_path('bbc.html')})
147 |         ...     return d.addCallbacks(callback, logger.error)
148 |         >>>
149 |         >>> try:
150 |         ...     react(run, _reactor=FakeReactor())
151 |         ...     pass
152 |         ... except SystemExit:
153 |         ...     pass
154 |         ...
155 |         file://riko/data/greenhughes.xml
156 |     """
157 |     return async_parser(*args, **kwargs)
158 | 
159 | 
160 | @processor(**OPTS)
161 | def pipe(*args, **kwargs):
162 |     """A source that fetches and parses the first feed found on a site.
163 | 
164 |     Args:
165 |         item (dict): The entry to process (not used)
166 |         kwargs (dict): The keyword arguments passed to the wrapper
167 | 
168 |     Kwargs:
169 |         conf (dict): The pipe configuration. Must contain the key 'url'.
170 | 
171 |             url (str): The web site to fetch
172 | 
173 |     Yields:
174 |         dict: item
175 | 
176 |     Examples:
177 |         >>> from riko import get_path
178 |         >>> conf = {'url': get_path('bbc.html')}
179 |         >>> next(pipe(conf=conf))['link'] == 'file://riko/data/greenhughes.xml'
180 |         True
181 |     """
182 |     return parser(*args, **kwargs)
183 | 


--------------------------------------------------------------------------------
/riko/modules/fetchsitefeed.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.fetchsitefeed
  5 | ~~~~~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for fetching the first RSS or Atom feed discovered in a web
  7 | site.
  8 | 
  9 | Uses a web site's auto-discovery information to find an RSS or Atom feed. If
 10 | multiple feeds are discovered, only the first one is fetched. If a site changes
 11 | their feed URL in the future, this module can discover the new URL for you (as
 12 | long as the site updates their auto-discovery links). For sites with only one
 13 | stream, this module provides a good alternative to the Fetch Feed module.
 14 | 
 15 | Also note that not all sites provide auto-discovery links on their web site's
 16 | home page.
 17 | 
 18 | This module provides a simpler alternative to the Feed Auto-Discovery Module.
 19 | The latter returns a list of information about all the feeds discovered in a
 20 | site, but (unlike this module) doesn't fetch the feed data itself.
 21 | 
 22 | Examples:
 23 |     basic usage::
 24 | 
 25 |         >>> from riko import get_path
 26 |         >>> from riko.modules.fetchsitefeed import pipe
 27 |         >>>
 28 |         >>> title = 'Using NFC tags in the car'
 29 |         >>> next(pipe(conf={'url': get_path('bbc.html')}))['title'] == title
 30 |         True
 31 | 
 32 | Attributes:
 33 |     OPTS (dict): The default pipe options
 34 |     DEFAULTS (dict): The default parser options
 35 | """
 36 | import pygogo as gogo
 37 | 
 38 | from . import processor
 39 | 
 40 | from riko import autorss
 41 | from riko.utils import gen_entries, get_abspath
 42 | from riko.parsers import parse_rss
 43 | from riko.bado import coroutine, return_value, io
 44 | 
 45 | OPTS = {"ftype": "none"}
 46 | logger = gogo.Gogo(__name__, monolog=True).logger
 47 | 
 48 | 
 49 | @coroutine
 50 | def async_parser(_, objconf, skip=False, **kwargs):
 51 |     """Asynchronously parses the pipe content
 52 | 
 53 |     Args:
 54 |         _ (None): Ignored
 55 |         objconf (obj): The pipe configuration (an Objectify instance)
 56 |         skip (bool): Don't parse the content
 57 |         kwargs (dict): Keyword arguments
 58 | 
 59 |     Kwargs:
 60 |         stream (dict): The original item
 61 | 
 62 |     Returns:
 63 |         Iter[dict]: The stream of items
 64 | 
 65 |     Examples:
 66 |         >>> from riko import get_path
 67 |         >>> from riko.bado import react
 68 |         >>> from riko.bado.mock import FakeReactor
 69 |         >>> from meza.fntools import Objectify
 70 |         >>>
 71 |         >>> def run(reactor):
 72 |         ...     callback = lambda x: print(next(x)['title'])
 73 |         ...     objconf = Objectify({'url': get_path('bbc.html')})
 74 |         ...     d = async_parser(None, objconf, stream={})
 75 |         ...     return d.addCallbacks(callback, logger.error)
 76 |         >>>
 77 |         >>> try:
 78 |         ...     react(run, _reactor=FakeReactor())
 79 |         ... except SystemExit:
 80 |         ...     pass
 81 |         ...
 82 |         Using NFC tags in the car
 83 |     """
 84 |     if skip:
 85 |         stream = kwargs["stream"]
 86 |     else:
 87 |         url = get_abspath(objconf.url)
 88 |         rss = yield autorss.async_get_rss(url)
 89 |         link = get_abspath(next(rss)["link"])
 90 |         content = yield io.async_url_read(link)
 91 |         parsed = parse_rss(content)
 92 |         stream = gen_entries(parsed)
 93 | 
 94 |     return_value(stream)
 95 | 
 96 | 
 97 | def parser(_, objconf, skip=False, **kwargs):
 98 |     """Parses the pipe content
 99 | 
100 |     Args:
101 |         _ (None): Ignored
102 |         objconf (obj): The pipe configuration (an Objectify instance)
103 |         skip (bool): Don't parse the content
104 |         kwargs (dict): Keyword arguments
105 | 
106 |     Kwargs:
107 |         stream (dict): The original item
108 | 
109 |     Returns:
110 |         Iter[dict]: The stream of items
111 | 
112 |     Examples:
113 |         >>> from riko import get_path
114 |         >>> from meza.fntools import Objectify
115 |         >>>
116 |         >>> objconf = Objectify({'url': get_path('bbc.html')})
117 |         >>> result = parser(None, objconf, stream={})
118 |         >>> next(result)['title'] == 'Using NFC tags in the car'
119 |         True
120 |     """
121 |     if skip:
122 |         stream = kwargs["stream"]
123 |     else:
124 |         url = get_abspath(objconf.url)
125 |         rss = autorss.get_rss(url)
126 |         objconf.url = get_abspath(next(rss)["link"])
127 | 
128 |         parsed = parse_rss(**objconf)
129 |         stream = gen_entries(parsed)
130 | 
131 |     return stream
132 | 
133 | 
134 | @processor(isasync=True, **OPTS)
135 | def async_pipe(*args, **kwargs):
136 |     """A source that fetches and parses the first feed found on a site.
137 | 
138 |     Args:
139 |         item (dict): The entry to process (not used)
140 |         kwargs (dict): The keyword arguments passed to the wrapper.
141 | 
142 |     Kwargs:
143 |         conf (dict): The pipe configuration. Must contain the key 'url'.
144 | 
145 |             url (str): The web site to fetch
146 | 
147 |     Returns:
148 |         dict: twisted.internet.defer.Deferred an iterator of items
149 | 
150 |     Examples:
151 |         >>> from riko import get_path
152 |         >>> from riko.bado import react
153 |         >>> from riko.bado.mock import FakeReactor
154 |         >>>
155 |         >>> def run(reactor):
156 |         ...     callback = lambda x: print(next(x)['title'])
157 |         ...     d = async_pipe(conf={'url': get_path('bbc.html')})
158 |         ...     return d.addCallbacks(callback, logger.error)
159 |         >>>
160 |         >>> try:
161 |         ...     react(run, _reactor=FakeReactor())
162 |         ...     pass
163 |         ... except SystemExit:
164 |         ...     pass
165 |         ...
166 |         Using NFC tags in the car
167 |     """
168 |     return async_parser(*args, **kwargs)
169 | 
170 | 
171 | @processor(**OPTS)
172 | def pipe(*args, **kwargs):
173 |     """A source that fetches and parses the first feed found on a site.
174 | 
175 |     Args:
176 |         item (dict): The entry to process (not used)
177 |         kwargs (dict): The keyword arguments passed to the wrapper
178 | 
179 |     Kwargs:
180 |         conf (dict): The pipe configuration. Must contain the key 'url'.
181 | 
182 |             url (str): The web site to fetch
183 | 
184 |     Yields:
185 |         dict: item
186 | 
187 |     Examples:
188 |         >>> from riko import get_path
189 |         >>> title = 'Using NFC tags in the car'
190 |         >>> next(pipe(conf={'url': get_path('bbc.html')}))['title'] == title
191 |         True
192 |     """
193 |     return parser(*args, **kwargs)
194 | 


--------------------------------------------------------------------------------
/riko/modules/fetchtext.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.fetchtext
  5 | ~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for fetching text data sources.
  7 | 
  8 | Accesses and extracts data from text sources on the web. This data can then be
  9 | merged with other data in your Pipe.
 10 | 
 11 | Examples:
 12 |     basic usage::
 13 | 
 14 |         >>> from riko import get_path
 15 |         >>> from riko.modules.fetchtext import pipe
 16 |         >>>
 17 |         >>> conf = {'url': get_path('lorem.txt')}
 18 |         >>> next(pipe(conf=conf))['content'] == 'What is Lorem Ipsum?'
 19 |         True
 20 | 
 21 | Attributes:
 22 |     OPTS (dict): The default pipe options
 23 |     DEFAULTS (dict): The default parser options
 24 | """
 25 | import pygogo as gogo
 26 | 
 27 | from . import processor
 28 | from riko import ENCODING
 29 | from riko.utils import fetch, auto_close, get_abspath
 30 | from riko.bado import coroutine, return_value, io
 31 | 
 32 | OPTS = {"ftype": "none", "assign": "content"}
 33 | DEFAULTS = {"encoding": ENCODING}
 34 | logger = gogo.Gogo(__name__, monolog=True).logger
 35 | 
 36 | 
 37 | @coroutine
 38 | def async_parser(_, objconf, skip=False, **kwargs):
 39 |     """Asynchronously parses the pipe content
 40 | 
 41 |     Args:
 42 |         _ (None): Ignored
 43 |         objconf (obj): The pipe configuration (an Objectify instance)
 44 |         skip (bool): Don't parse the content
 45 |         kwargs (dict): Keyword arguments
 46 | 
 47 |     Kwargs:
 48 |         stream (dict): The original item
 49 | 
 50 |     Returns:
 51 |         Iter[dict]: The stream of items
 52 | 
 53 |     Examples:
 54 |         >>> from riko import get_path
 55 |         >>> from riko.bado import react
 56 |         >>> from riko.bado.mock import FakeReactor
 57 |         >>> from meza.fntools import Objectify
 58 |         >>>
 59 |         >>> def run(reactor):
 60 |         ...     callback = lambda x: print(next(x)['content'])
 61 |         ...     url = get_path('lorem.txt')
 62 |         ...     objconf = Objectify({'url': url, 'encoding': ENCODING})
 63 |         ...     d = async_parser(None, objconf, assign='content')
 64 |         ...     return d.addCallbacks(callback, logger.error)
 65 |         >>>
 66 |         >>> try:
 67 |         ...     react(run, _reactor=FakeReactor())
 68 |         ... except SystemExit:
 69 |         ...     pass
 70 |         ...
 71 |         What is Lorem Ipsum?
 72 |     """
 73 |     if skip:
 74 |         stream = kwargs["stream"]
 75 |     else:
 76 |         url = get_abspath(objconf.url)
 77 |         f = yield io.async_url_open(url)
 78 |         assign = kwargs["assign"]
 79 |         encoding = objconf.encoding
 80 |         _stream = ({assign: line.strip().decode(encoding)} for line in f)
 81 |         stream = auto_close(_stream, f)
 82 | 
 83 |     return_value(stream)
 84 | 
 85 | 
 86 | def parser(_, objconf, skip=False, **kwargs):
 87 |     """Parses the pipe content
 88 | 
 89 |     Args:
 90 |         _ (None): Ignored
 91 |         objconf (obj): The pipe configuration (an Objectify instance)
 92 |         skip (bool): Don't parse the content
 93 |         kwargs (dict): Keyword arguments
 94 | 
 95 |     Kwargs:
 96 |         stream (dict): The original item
 97 | 
 98 |     Returns:
 99 |         Iter[dict]: The stream of items
100 | 
101 |     Examples:
102 |         >>> from riko import get_path
103 |         >>> from meza.fntools import Objectify
104 |         >>>
105 |         >>> url = get_path('lorem.txt')
106 |         >>> objconf = Objectify({'url': url, 'encoding': ENCODING})
107 |         >>> result = parser(None, objconf, assign='content')
108 |         >>> next(result)['content'] == 'What is Lorem Ipsum?'
109 |         True
110 |     """
111 |     if skip:
112 |         stream = kwargs["stream"]
113 |     else:
114 |         f = fetch(decode=True, **objconf)
115 |         _stream = ({kwargs["assign"]: line.strip()} for line in f)
116 |         stream = auto_close(_stream, f)
117 | 
118 |     return stream
119 | 
120 | 
121 | @processor(DEFAULTS, isasync=True, **OPTS)
122 | def async_pipe(*args, **kwargs):
123 |     """A source that asynchronously fetches and parses an XML or JSON file to
124 |     return the entries.
125 | 
126 |     Args:
127 |         item (dict): The entry to process
128 |         kwargs (dict): The keyword arguments passed to the wrapper
129 | 
130 |     Kwargs:
131 |         conf (dict): The pipe configuration. Must contain the key 'url'. May
132 |             contain the key 'encoding'.
133 | 
134 |             url (str): The web site to fetch.
135 |             encoding (str): The file encoding (default: utf-8).
136 | 
137 |         assign (str): Attribute to assign parsed content (default: content)
138 | 
139 | 
140 |     Returns:
141 |         Deferred: twisted.internet.defer.Deferred stream of items
142 | 
143 |     Examples:
144 |         >>> from riko import get_path
145 |         >>> from riko.bado import react
146 |         >>> from riko.bado.mock import FakeReactor
147 |         >>>
148 |         >>> def run(reactor):
149 |         ...     callback = lambda x: print(next(x)['content'])
150 |         ...     conf = {'url': get_path('lorem.txt')}
151 |         ...     d = async_pipe(conf=conf)
152 |         ...     return d.addCallbacks(callback, logger.error)
153 |         >>>
154 |         >>> try:
155 |         ...     react(run, _reactor=FakeReactor())
156 |         ... except SystemExit:
157 |         ...     pass
158 |         ...
159 |         What is Lorem Ipsum?
160 |     """
161 |     return async_parser(*args, **kwargs)
162 | 
163 | 
164 | @processor(DEFAULTS, **OPTS)
165 | def pipe(*args, **kwargs):
166 |     """A source that fetches and parses an XML or JSON file to
167 |     return the entries.
168 | 
169 |     Args:
170 |         item (dict): The entry to process
171 |         kwargs (dict): The keyword arguments passed to the wrapper
172 | 
173 |     Kwargs:
174 |         conf (dict): The pipe configuration. Must contain the key 'url'. May
175 |             contain the key 'encoding'.
176 | 
177 |             url (str): The web site to fetch
178 |             encoding (str): The file encoding (default: utf-8).
179 | 
180 |         assign (str): Attribute to assign parsed content (default: content)
181 | 
182 |     Returns:
183 |         dict: an iterator of items
184 | 
185 |     Examples:
186 |         >>> from riko import get_path
187 |         >>>
188 |         >>> conf = {'url': get_path('lorem.txt')}
189 |         >>> next(pipe(conf=conf))['content'] == 'What is Lorem Ipsum?'
190 |         True
191 |     """
192 |     return parser(*args, **kwargs)
193 | 


--------------------------------------------------------------------------------
/riko/modules/geolocate.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.geolocate
  5 | ~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for obtaining the geo location of an ip address, street
  7 | address, currency code, or lat/lon coordinates.
  8 | 
  9 | Examples:
 10 |     basic usage::
 11 | 
 12 |         >>> from riko.modules.geolocate import pipe
 13 |         >>>
 14 |         >>> address = '123 Bakersville St., London'
 15 |         >>> geolocate = next(pipe({'content': address}))['geolocate']
 16 |         >>> geolocate['country'] == 'United States'
 17 |         True
 18 | 
 19 | 
 20 | Attributes:
 21 |     OPTS (dict): The default pipe options
 22 |     DEFAULTS (dict): The default parser options
 23 | """
 24 | import pygogo as gogo
 25 | 
 26 | from . import processor
 27 | from riko.utils import cast
 28 | 
 29 | 
 30 | OPTS = {"ftype": "text", "field": "content"}
 31 | DEFAULTS = {"type": "street_address"}
 32 | logger = gogo.Gogo(__name__, monolog=True).logger
 33 | 
 34 | 
 35 | def parser(address, objconf, skip=False, **kwargs):
 36 |     """Parses the pipe content
 37 | 
 38 |     Args:
 39 |         address (str): The address to lookup
 40 |         objconf (obj): The pipe configuration (an Objectify instance)
 41 |         skip (bool): Don't parse the content
 42 |         kwargs (dict): Keyword arguments
 43 | 
 44 |     Kwargs:
 45 |         assign (str): Attribute to assign parsed content (default: geolocate)
 46 |         stream (dict): The original item
 47 | 
 48 |     Returns:
 49 |         Tuple(dict, bool): Tuple of (item, skip)
 50 | 
 51 |     Examples:
 52 |         >>> from riko import get_path
 53 |         >>> from meza.fntools import Objectify
 54 |         >>>
 55 |         >>> item = {'content': 'GBP'}
 56 |         >>> objconf = Objectify({'type': 'currency'})
 57 |         >>> kwargs = {'stream': item, 'assign': 'content'}
 58 |         >>> country = 'United Kingdom'
 59 |         >>> parser(item['content'], objconf, **kwargs)['country'] == country
 60 |         True
 61 |     """
 62 |     if skip:
 63 |         location = kwargs["stream"]
 64 |     else:
 65 |         location = cast(address, "location", loc_type=objconf.type)
 66 | 
 67 |     return location
 68 | 
 69 | 
 70 | # @processor(DEFAULTS, isasync=True, **OPTS)
 71 | # def async_pipe(*args, **kwargs):
 72 | #     """A processor module that asynchronously performs basic arithmetic, such
 73 | #     as addition and subtraction.
 74 | 
 75 | #     Args:
 76 | #         item (dict): The entry to process
 77 | #         kwargs (dict): The keyword arguments passed to the wrapper
 78 | 
 79 | #     Kwargs:
 80 | #         conf (dict): The pipe configuration. May contain the key 'type'.
 81 | 
 82 | #             type (str): The type of geolocation to perform. Must be one of
 83 | #                 'coordinates', 'street_address', 'ip_address', or 'currency'
 84 | #                 (default: 'street_address').
 85 | 
 86 | #         assign (str): Attribute to assign parsed content (default: geolocate)
 87 | #         field (str): Item attribute from which to obtain the first address to
 88 | #             operate on (default: 'content')
 89 | 
 90 | #     Returns:
 91 | #         Deferred: twisted.internet.defer.Deferred item with formatted currency
 92 | 
 93 | #     Examples:
 94 | #         >>> from riko.bado import react
 95 | #         >>> from riko.bado.mock import FakeReactor
 96 | #         >>>
 97 | #         >>> def run(reactor):
 98 | #         ...     callback = lambda x: print(next(x)['geolocate']['country'])
 99 | #         ...     conf = {'type': 'currency'}
100 | #         ...     d = async_pipe({'content': 'GBP'}, conf=conf)
101 | #         ...     return d.addCallbacks(callback, logger.error)
102 | #         >>>
103 | #         >>> try:
104 | #         ...     react(run, _reactor=FakeReactor())
105 | #         ... except SystemExit:
106 | #         ...     pass
107 | #         ...
108 | #         United Kingdom
109 | #     """
110 | #     return parser(*args, **kwargs)
111 | 
112 | 
113 | @processor(DEFAULTS, **OPTS)
114 | def pipe(*args, **kwargs):
115 |     """A processor module that performs basic arithmetic, such as addition and
116 |     subtraction.
117 | 
118 |     Args:
119 |         item (dict): The entry to process
120 |         kwargs (dict): The keyword arguments passed to the wrapper
121 | 
122 |     Kwargs:
123 |         conf (dict): The pipe configuration. May contain the key 'type'.
124 | 
125 |             type (str): The type of geolocation to perform. Must be one of
126 |                 'coordinates', 'street_address', 'ip_address', or 'currency'
127 |                 (default: 'street_address').
128 | 
129 |         assign (str): Attribute to assign parsed content (default: geolocate)
130 |         field (str): Item attribute from which to obtain the first address to
131 |             operate on (default: 'content')
132 | 
133 |     Returns:
134 |         dict: an item with math result
135 | 
136 |     Examples:
137 |         >>> conf = {'type': 'currency'}
138 |         >>> geolocate = next(pipe({'content': 'INR'}, conf=conf))['geolocate']
139 |         >>> geolocate['country'] == 'India'
140 |         True
141 |         >>> address = '123 Bakersville St., London'
142 |         >>> kwargs = {'field': 'address', 'assign': 'result'}
143 |         >>> geolocate = next(pipe({'address': address}, **kwargs))['result']
144 |         >>> geolocate['country'] == 'United States'
145 |         True
146 |     """
147 |     return parser(*args, **kwargs)
148 | 


--------------------------------------------------------------------------------
/riko/modules/hash.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.hash
  5 | ~~~~~~~~~~~~~~~~~
  6 | Provides functions for hashing text.
  7 | 
  8 | Note: If the PYTHONHASHSEED environment variable is set to an integer value,
  9 | it is used as a fixed seed for generating the hash. Its purpose is to allow
 10 | repeatable hashing across python processes and versions. The integer must be a
 11 | decimal number in the range [0, 4294967295].
 12 | 
 13 | Specifying the value 0 will disable hash randomization. If this variable is set
 14 | to `random`, a random value is used to seed the hashes. Hash randomization is
 15 | is enabled by default for Python 3.2.3+, and disabled otherwise.
 16 | 
 17 | Examples:
 18 |     basic usage::
 19 | 
 20 |         >>> from riko.modules.hash import pipe
 21 |         >>>
 22 |         >>> _hash = ctypes.c_uint(hash('hello world')).value
 23 |         >>> next(pipe({'content': 'hello world'}))['hash'] == _hash
 24 |         True
 25 | 
 26 | Attributes:
 27 |     OPTS (dict): The default pipe options
 28 |     DEFAULTS (dict): The default parser options
 29 | """
 30 | import ctypes
 31 | 
 32 | from . import processor
 33 | import pygogo as gogo
 34 | 
 35 | OPTS = {"ftype": "text", "ptype": "none", "field": "content"}
 36 | DEFAULTS = {}
 37 | logger = gogo.Gogo(__name__, monolog=True).logger
 38 | 
 39 | 
 40 | def parser(word, _, skip=False, **kwargs):
 41 |     """Parsers the pipe content
 42 | 
 43 |     Args:
 44 |         item (obj): The entry to process (a DotDict instance)
 45 |         _ (None): Ignored.
 46 |         skip (bool): Don't parse the content
 47 |         kwargs (dict): Keyword arguments
 48 | 
 49 |     Kwargs:
 50 |         assign (str): Attribute to assign parsed content (default: hash)
 51 |         stream (dict): The original item
 52 | 
 53 |     Returns:
 54 |         dict: The item
 55 | 
 56 |     Examples:
 57 |         >>> from meza.fntools import Objectify
 58 |         >>>
 59 |         >>> _hash = ctypes.c_uint(hash('hello world')).value
 60 |         >>> item = {'content': 'hello world'}
 61 |         >>> kwargs = {'stream': item}
 62 |         >>> parser(item['content'], None, **kwargs) == _hash
 63 |         True
 64 |     """
 65 |     return kwargs["stream"] if skip else ctypes.c_uint(hash(word)).value
 66 | 
 67 | 
 68 | @processor(DEFAULTS, isasync=True, **OPTS)
 69 | def async_pipe(*args, **kwargs):
 70 |     """A processor module that asynchronously hashes the field of an item.
 71 | 
 72 |     Args:
 73 |         item (dict): The entry to process
 74 |         kwargs (dict): The keyword arguments passed to the wrapper
 75 | 
 76 |     Kwargs:
 77 |         assign (str): Attribute to assign parsed content (default: hash)
 78 |         field (str): Item attribute to operate on (default: 'content')
 79 | 
 80 |     Returns:
 81 |        Deferred: twisted.internet.defer.Deferred item with hashed content
 82 | 
 83 |     Examples:
 84 |         >>> from riko.bado import react
 85 |         >>> from riko.bado.mock import FakeReactor
 86 |         >>>
 87 |         >>> _hash = ctypes.c_uint(hash('hello world')).value
 88 |         >>>
 89 |         >>> def run(reactor):
 90 |         ...     callback = lambda x: print(next(x)['hash'] == _hash)
 91 |         ...     d = async_pipe({'content': 'hello world'})
 92 |         ...     return d.addCallbacks(callback, logger.error)
 93 |         >>>
 94 |         >>> try:
 95 |         ...     react(run, _reactor=FakeReactor())
 96 |         ... except SystemExit:
 97 |         ...     pass
 98 |         ...
 99 |         True
100 |     """
101 |     return parser(*args, **kwargs)
102 | 
103 | 
104 | @processor(**OPTS)
105 | def pipe(*args, **kwargs):
106 |     """A processor that hashes the field of an item.
107 | 
108 |     Args:
109 |         item (dict): The entry to process
110 |         kwargs (dict): The keyword arguments passed to the wrapper
111 | 
112 |     Kwargs:
113 |         assign (str): Attribute to assign parsed content (default: hash)
114 |         field (str): Item attribute to operate on (default: 'content')
115 | 
116 |     Yields:
117 |         dict: an item with hashed content
118 | 
119 |     Examples:
120 |         >>> _hash = ctypes.c_uint(hash('hello world')).value
121 |         >>> next(pipe({'content': 'hello world'}))['hash'] == _hash
122 |         True
123 |         >>> _hash = ctypes.c_uint(hash('greeting')).value
124 |         >>> kwargs = {'field': 'title', 'assign': 'result'}
125 |         >>> next(pipe({'title': 'greeting'}, **kwargs))['result'] == _hash
126 |         True
127 |     """
128 |     return parser(*args, **kwargs)
129 | 


--------------------------------------------------------------------------------
/riko/modules/itembuilder.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.itembuilder
  5 | ~~~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for creating a single-item data source
  7 | 
  8 | With the Item Builder module, you can create a single-item data source by
  9 | assigning values to one or more item attributes. The module lets you assign
 10 | a value to an attribute.
 11 | 
 12 | Item Builder's strength is its ability to restructure and rename multiple
 13 | elements in a stream. When Item Builder is fed an input stream, the assigned
 14 | values can be existing attributes of the stream. These attributes can be
 15 | reassigned or used to create entirely new attributes.
 16 | 
 17 | Examples:
 18 |     basic usage::
 19 | 
 20 |         >>> from riko.modules.itembuilder import pipe
 21 |         >>>
 22 |         >>> attrs = {'key': 'title', 'value': 'the title'}
 23 |         >>> next(pipe(conf={'attrs': attrs}))['title'] == 'the title'
 24 |         True
 25 | 
 26 | Attributes:
 27 |     OPTS (dict): The default pipe options
 28 |     DEFAULTS (dict): The default parser options
 29 | """
 30 | 
 31 | from . import processor
 32 | import pygogo as gogo
 33 | from riko.dotdict import DotDict
 34 | 
 35 | OPTS = {"listize": True, "extract": "attrs", "ftype": "none"}
 36 | logger = gogo.Gogo(__name__, monolog=True).logger
 37 | 
 38 | 
 39 | def parser(_, attrs, skip=False, **kwargs):
 40 |     """Parses the pipe content
 41 | 
 42 |     Args:
 43 |         _ (None): Ignored
 44 |         attrs (List[dict]): Attributes
 45 |         skip (bool): Don't parse the content
 46 |         kwargs (dict): Keyword arguments
 47 | 
 48 |     Kwargs:
 49 |         stream (dict): The original item
 50 | 
 51 |     Returns:
 52 |         Iter(dict): The stream of items
 53 | 
 54 |     Examples:
 55 |         >>> from meza.fntools import Objectify
 56 |         >>> attrs = [
 57 |         ...     {'key': 'title', 'value': 'the title'},
 58 |         ...     {'key': 'desc', 'value': 'the desc'}]
 59 |         >>> result = parser(None, map(Objectify, attrs))
 60 |         >>> result == {'title': 'the title', 'desc': 'the desc'}
 61 |         True
 62 |     """
 63 |     items = ((a.key, a.value) for a in attrs)
 64 |     return kwargs["stream"] if skip else DotDict(items)
 65 | 
 66 | 
 67 | @processor(isasync=True, **OPTS)
 68 | def async_pipe(*args, **kwargs):
 69 |     """A source that asynchronously builds an item.
 70 | 
 71 |     Args:
 72 |         item (dict): The entry to process
 73 |         kwargs (dict): The keyword arguments passed to the wrapper
 74 | 
 75 |     Kwargs:
 76 |         conf (dict): The pipe configuration. Must contain the key 'attrs'.
 77 | 
 78 |             attrs (dict): can be either a dict or list of dicts. Must contain
 79 |                 the keys 'key' and 'value'.
 80 | 
 81 |                 key (str): the attribute name
 82 |                 value (str): the attribute value
 83 | 
 84 |     Returns:
 85 |         dict: twisted.internet.defer.Deferred an iterator of items
 86 | 
 87 |     Examples:
 88 |         >>> from riko.bado import react
 89 |         >>> from riko.bado.mock import FakeReactor
 90 |         >>>
 91 |         >>> def run(reactor):
 92 |         ...     callback = lambda x: print(next(x)['title'])
 93 |         ...     attrs = [
 94 |         ...         {'key': 'title', 'value': 'the title'},
 95 |         ...         {'key': 'desc.content', 'value': 'the desc'}]
 96 |         ...
 97 |         ...     d = async_pipe(conf={'attrs': attrs})
 98 |         ...     return d.addCallbacks(callback, logger.error)
 99 |         >>>
100 |         >>> try:
101 |         ...     react(run, _reactor=FakeReactor())
102 |         ...     pass
103 |         ... except SystemExit:
104 |         ...     pass
105 |         ...
106 |         the title
107 |     """
108 |     return parser(*args, **kwargs)
109 | 
110 | 
111 | @processor(**OPTS)
112 | def pipe(*args, **kwargs):
113 |     """A source that builds an item.
114 | 
115 |     Args:
116 |         item (dict): The entry to process
117 |         kwargs (dict): The keyword arguments passed to the wrapper
118 | 
119 |     Kwargs:
120 |         conf (dict): The pipe configuration. Must contain the key 'attrs'.
121 | 
122 |             attrs (dict): can be either a dict or list of dicts. Must contain
123 |                 the keys 'key' and 'value'.
124 | 
125 |                 key (str): the attribute name
126 |                 value (str): the attribute value
127 | 
128 |     Yields:
129 |         dict: an item
130 | 
131 |     Examples:
132 |         >>> attrs = [
133 |         ...     {'key': 'title', 'value': 'the title'},
134 |         ...     {'key': 'desc.content', 'value': 'the desc'}]
135 |         >>> next(pipe(conf={'attrs': attrs})) == {
136 |         ...     'title': 'the title', 'desc': {'content': 'the desc'}}
137 |         True
138 |     """
139 |     return parser(*args, **kwargs)
140 | 


--------------------------------------------------------------------------------
/riko/modules/reverse.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.reverse
  5 | ~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for flipping the order of all items in a stream.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.reverse import pipe
 12 |         >>>
 13 |         >>> next(pipe({'x': x} for x in range(5))) == {'x': 4}
 14 |         True
 15 | 
 16 | Attributes:
 17 |     OPTS (dict): The default pipe options
 18 |     DEFAULTS (dict): The default parser options
 19 | """
 20 | 
 21 | from . import operator
 22 | import pygogo as gogo
 23 | 
 24 | # disable `dictize` since we do not need to access the configuration
 25 | OPTS = {"dictize": False}
 26 | logger = gogo.Gogo(__name__, monolog=True).logger
 27 | 
 28 | 
 29 | def parser(stream, objconf, tuples, **kwargs):
 30 |     """Parses the pipe content
 31 | 
 32 |     Args:
 33 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 34 |             iterator, so consuming it will consume `tuples` as well.
 35 | 
 36 |         objconf (obj): the item independent configuration (an Objectify
 37 |             instance).
 38 | 
 39 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 40 |             `item` is an element in the source stream and `objconf` is the item
 41 |             configuration (an Objectify instance). Note: this shares the
 42 |             `stream` iterator, so consuming it will consume `stream` as well.
 43 | 
 44 |         kwargs (dict): Keyword arguments.
 45 | 
 46 |     Returns:
 47 |         Iter(dict): The output stream
 48 | 
 49 |     Examples:
 50 |         >>> from itertools import repeat
 51 |         >>>
 52 |         >>> kwargs = {}
 53 |         >>> stream = ({'x': x} for x in range(5))
 54 |         >>> tuples = zip(stream, repeat(None))
 55 |         >>> next(parser(stream, None, tuples, **kwargs)) == {'x': 4}
 56 |         True
 57 |     """
 58 |     return reversed(list(stream))
 59 | 
 60 | 
 61 | @operator(isasync=True, **OPTS)
 62 | def async_pipe(*args, **kwargs):
 63 |     """An operator that asynchronously reverses the order of source items in
 64 |     a stream. Note that this pipe is not lazy.
 65 | 
 66 |     Args:
 67 |         items (Iter[dict]): The source.
 68 |         kwargs (dict): The keyword arguments passed to the wrapper
 69 | 
 70 |     Returns:
 71 |         Deferred: twisted.internet.defer.Deferred iterator of the number of
 72 |             counted items
 73 | 
 74 |     Examples:
 75 |         >>> from riko.bado import react
 76 |         >>> from riko.bado.mock import FakeReactor
 77 |         >>>
 78 |         >>> def run(reactor):
 79 |         ...     callback = lambda x: print(next(x) == {'x': 4})
 80 |         ...     items = ({'x': x} for x in range(5))
 81 |         ...     d = async_pipe(items)
 82 |         ...     return d.addCallbacks(callback, logger.error)
 83 |         >>>
 84 |         >>> try:
 85 |         ...     react(run, _reactor=FakeReactor())
 86 |         ... except SystemExit:
 87 |         ...     pass
 88 |         ...
 89 |         True
 90 |     """
 91 |     return parser(*args, **kwargs)
 92 | 
 93 | 
 94 | @operator(**OPTS)
 95 | def pipe(*args, **kwargs):
 96 |     """An operator that eagerly reverses the order of source items in a stream.
 97 | 
 98 |     Args:
 99 |         items (Iter[dict]): The source.
100 |         kwargs (dict): The keyword arguments passed to the wrapper
101 | 
102 |     Yields:
103 |         dict: an item
104 | 
105 |     Examples:
106 |         >>> items = ({'x': x} for x in range(5))
107 |         >>> next(pipe(items)) == {'x': 4}
108 |         True
109 |     """
110 |     return parser(*args, **kwargs)
111 | 


--------------------------------------------------------------------------------
/riko/modules/simplemath.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.simplemath
  5 | ~~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for performing simple mathematical operations, e.g.,
  7 | addition, subtraction, multiplication, division, modulo, averages, etc.
  8 | 
  9 | Examples:
 10 |     basic usage::
 11 | 
 12 |         >>> from decimal import Decimal
 13 |         >>> from riko.modules.simplemath import pipe
 14 |         >>>
 15 |         >>> conf = {'op': 'divide', 'other': '5'}
 16 |         >>> next(pipe({'content': '10'}, conf=conf))['simplemath']
 17 |         Decimal('2')
 18 | 
 19 | Attributes:
 20 |     OPTS (dict): The default pipe options
 21 |     DEFAULTS (dict): The default parser options
 22 | """
 23 | import operator
 24 | 
 25 | from . import processor
 26 | import pygogo as gogo
 27 | 
 28 | OPTS = {"ftype": "decimal", "ptype": "decimal", "field": "content"}
 29 | DEFAULTS = {}
 30 | logger = gogo.Gogo(__name__, monolog=True).logger
 31 | 
 32 | 
 33 | def mean(*nums):
 34 |     try:
 35 |         return sum(nums) / len(nums)
 36 |     except ZeroDivisionError:
 37 |         return float("inf")
 38 | 
 39 | 
 40 | OPS = {
 41 |     "add": operator.add,
 42 |     "subtract": operator.sub,
 43 |     "multiply": operator.mul,
 44 |     "mean": mean,
 45 |     "divide": operator.truediv,
 46 |     "floor": operator.floordiv,
 47 |     "modulo": operator.mod,
 48 |     "power": operator.pow,
 49 | }
 50 | 
 51 | 
 52 | def parser(num, objconf, skip=False, **kwargs):
 53 |     """Parsers the pipe content
 54 | 
 55 |     Args:
 56 |         num (Decimal): The first number to operate on
 57 |         objconf (obj): The pipe configuration (an Objectify instance)
 58 |         skip (bool): Don't parse the content
 59 | 
 60 |     Returns:
 61 |         dict: The formatted item
 62 | 
 63 |     Examples:
 64 |         >>> from meza.fntools import Objectify
 65 |         >>> conf = {'op': 'divide', 'other': 4}
 66 |         >>> objconf = Objectify(conf)
 67 |         >>> parser(10, objconf, conf=conf)
 68 |         2.5
 69 |     """
 70 |     operation = OPS[kwargs["conf"]["op"]]
 71 |     return kwargs["stream"] if skip else operation(num, objconf.other)
 72 | 
 73 | 
 74 | @processor(DEFAULTS, isasync=True, **OPTS)
 75 | def async_pipe(*args, **kwargs):
 76 |     """A processor module that asynchronously performs basic arithmetic, such
 77 |     as addition and subtraction.
 78 | 
 79 |     Args:
 80 |         item (dict): The entry to process
 81 |         kwargs (dict): The keyword arguments passed to the wrapper
 82 | 
 83 |     Kwargs:
 84 |         conf (dict): The pipe configuration. Must contain the keys 'other'
 85 |             and 'op'.
 86 | 
 87 |             other (number): The second number to operate on.
 88 |             op (str): The math operation. Must be one of 'addition',
 89 |                 'subtraction', 'multiplication', 'division', 'modulo',
 90 |                 'floor', 'power', or 'mean'.
 91 | 
 92 |         assign (str): Attribute to assign parsed content (default: simplemath)
 93 |         field (str): Item attribute from which to obtain the first number to
 94 |             operate on (default: 'content')
 95 | 
 96 |     Returns:
 97 |         Deferred: twisted.internet.defer.Deferred item with formatted currency
 98 | 
 99 |     Examples:
100 |         >>> from riko.bado import react
101 |         >>> from riko.bado.mock import FakeReactor
102 |         >>>
103 |         >>> def run(reactor):
104 |         ...     callback = lambda x: print(next(x)['simplemath'])
105 |         ...     conf = {'op': 'divide', 'other': '5'}
106 |         ...     d = async_pipe({'content': '10'}, conf=conf)
107 |         ...     return d.addCallbacks(callback, logger.error)
108 |         >>>
109 |         >>> try:
110 |         ...     react(run, _reactor=FakeReactor())
111 |         ... except SystemExit:
112 |         ...     pass
113 |         ...
114 |         2
115 |     """
116 |     return parser(*args, **kwargs)
117 | 
118 | 
119 | @processor(DEFAULTS, **OPTS)
120 | def pipe(*args, **kwargs):
121 |     """A processor module that performs basic arithmetic, such as addition and
122 |     subtraction.
123 | 
124 |     Args:
125 |         item (dict): The entry to process
126 |         kwargs (dict): The keyword arguments passed to the wrapper
127 | 
128 |     Kwargs:
129 |         conf (dict): The pipe configuration. Must contain the keys 'other'
130 |             and 'op'.
131 | 
132 |             other (number): The second number to operate on.
133 |             op (str): The math operation. Must be one of 'addition',
134 |                 'subtraction', 'multiplication', 'division', 'modulo',
135 |                 'floor', 'power', or 'mean'.
136 | 
137 |         assign (str): Attribute to assign parsed content (default: simplemath)
138 |         field (str): Item attribute from which to obtain the first number to
139 |             operate on (default: 'content')
140 | 
141 |     Returns:
142 |         dict: an item with math result
143 | 
144 |     Examples:
145 |         >>> from decimal import Decimal
146 |         >>> conf = {'op': 'divide', 'other': '5'}
147 |         >>> next(pipe({'content': '10'}, conf=conf))['simplemath']
148 |         Decimal('2')
149 |         >>> kwargs = {'conf': conf, 'field': 'num', 'assign': 'result'}
150 |         >>> next(pipe({'num': '10'}, **kwargs))['result']
151 |         Decimal('2')
152 |     """
153 |     return parser(*args, **kwargs)
154 | 


--------------------------------------------------------------------------------
/riko/modules/slugify.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.slugify
  5 | ~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for slugifying text.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.slugify import pipe
 12 |         >>>
 13 |         >>> next(pipe({'content': 'hello world'}))['slugify'] == 'hello-world'
 14 |         True
 15 | 
 16 | Attributes:
 17 |     OPTS (dict): The default pipe options
 18 |     DEFAULTS (dict): The default parser options
 19 | """
 20 | import pygogo as gogo
 21 | 
 22 | from slugify import slugify
 23 | from . import processor
 24 | 
 25 | OPTS = {"ftype": "text", "extract": "separator", "field": "content"}
 26 | DEFAULTS = {"separator": "-"}
 27 | logger = gogo.Gogo(__name__, monolog=True).logger
 28 | 
 29 | 
 30 | def parser(word, separator, skip=False, **kwargs):
 31 |     """Parsers the pipe content
 32 | 
 33 |     Args:
 34 |         word (str): The string to transform
 35 |         separator (str): The slug separator.
 36 |         skip (bool): Don't parse the content
 37 |         kwargs (dict): Keyword arguments
 38 | 
 39 |     Kwargs:
 40 |         assign (str): Attribute to assign parsed content (default: slugify)
 41 |         stream (dict): The original item
 42 | 
 43 |     Returns:
 44 |         dict: The item
 45 | 
 46 |     Examples:
 47 |         >>> from meza.fntools import Objectify
 48 |         >>>
 49 |         >>> item = {'content': 'hello world'}
 50 |         >>> kwargs = {'stream': item}
 51 |         >>> parser(item['content'], '-', **kwargs) == 'hello-world'
 52 |         True
 53 |     """
 54 |     if skip:
 55 |         parsed = kwargs["stream"]
 56 |     else:
 57 |         parsed = slugify(word.strip(), separator=separator)
 58 | 
 59 |     return parsed
 60 | 
 61 | 
 62 | @processor(DEFAULTS, isasync=True, **OPTS)
 63 | def async_pipe(*args, **kwargs):
 64 |     """A processor module that asynchronously slugifies the field of an item.
 65 | 
 66 |     Args:
 67 |         item (dict): The entry to process
 68 |         kwargs (dict): The keyword arguments passed to the wrapper
 69 | 
 70 |     Kwargs:
 71 |         assign (str): Attribute to assign parsed content (default: slugify)
 72 |         field (str): Item attribute to operate on (default: 'content')
 73 | 
 74 |     Returns:
 75 |        Deferred: twisted.internet.defer.Deferred item with slugified content
 76 | 
 77 |     Examples:
 78 |         >>> from riko.bado import react
 79 |         >>> from riko.bado.mock import FakeReactor
 80 |         >>>
 81 |         >>> def run(reactor):
 82 |         ...     callback = lambda x: print(next(x)['slugify'] == 'hello-world')
 83 |         ...     d = async_pipe({'content': 'hello world'})
 84 |         ...     return d.addCallbacks(callback, logger.error)
 85 |         >>>
 86 |         >>> try:
 87 |         ...     react(run, _reactor=FakeReactor())
 88 |         ... except SystemExit:
 89 |         ...     pass
 90 |         ...
 91 |         True
 92 |     """
 93 |     return parser(*args, **kwargs)
 94 | 
 95 | 
 96 | @processor(DEFAULTS, **OPTS)
 97 | def pipe(*args, **kwargs):
 98 |     """A processor that slugifies the field of an item.
 99 | 
100 |     Args:
101 |         item (dict): The entry to process
102 |         kwargs (dict): The keyword arguments passed to the wrapper
103 | 
104 |     Kwargs:
105 |         conf (dict): The pipe configuration. May contain the key 'separator'.
106 |             separator (str): The slug separator (default: '-')
107 | 
108 |         assign (str): Attribute to assign parsed content (default: slugify)
109 |         field (str): Item attribute to operate on (default: 'content')
110 | 
111 |     Yields:
112 |         dict: an item with slugified content
113 | 
114 |     Examples:
115 |         >>> next(pipe({'content': 'hello world'}))['slugify'] == 'hello-world'
116 |         True
117 |         >>> slugified = 'hello_world'
118 |         >>> conf = {'separator': '_'}
119 |         >>> item = {'title': 'hello world'}
120 |         >>> kwargs = {'conf': conf, 'field': 'title', 'assign': 'result'}
121 |         >>> next(pipe(item, **kwargs))['result'] == slugified
122 |         True
123 |     """
124 |     return parser(*args, **kwargs)
125 | 


--------------------------------------------------------------------------------
/riko/modules/split.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.split
  5 | ~~~~~~~~~~~~~~~~~~
  6 | Provides functions for splitting a stream into identical copies
  7 | 
  8 | Use split when you want to perform different operations on data from the same
  9 | stream. The Union module is the reverse of Split, it merges multiple input
 10 | streams into a single combined stream.
 11 | 
 12 | Examples:
 13 |     basic usage::
 14 | 
 15 |         >>> from riko.modules.split import pipe
 16 |         >>>
 17 |         >>> stream1, stream2 = pipe({'x': x} for x in range(5))
 18 |         >>> next(stream1) == {'x': 0}
 19 |         True
 20 | 
 21 | Attributes:
 22 |     OPTS (dict): The default pipe options
 23 |     DEFAULTS (dict): The default parser options
 24 | """
 25 | 
 26 | from copy import deepcopy
 27 | 
 28 | from . import operator
 29 | import pygogo as gogo
 30 | 
 31 | OPTS = {"extract": "splits", "ptype": "int", "objectify": False}
 32 | DEFAULTS = {"splits": 2}
 33 | logger = gogo.Gogo(__name__, monolog=True).logger
 34 | 
 35 | 
 36 | def parser(stream, splits, tuples, **kwargs):
 37 |     """Parses the pipe content
 38 | 
 39 |     Args:
 40 |         stream (Iter[dict]): The source stream. Note: this shares the `tuples`
 41 |             iterator, so consuming it will consume `tuples` as well.
 42 | 
 43 |         splits (int): the number of copies to create.
 44 | 
 45 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, splits)
 46 |             `item` is an element in the source stream (a DotDict instance)
 47 |             and `splits` is an int. Note: this shares the `stream` iterator,
 48 |             so consuming it will consume `stream` as well.
 49 | 
 50 |         kwargs (dict): Keyword arguments.
 51 | 
 52 |     Yields:
 53 |         Iter(dict): a stream of items
 54 | 
 55 |     Examples:
 56 |         >>> from itertools import repeat
 57 |         >>>
 58 |         >>> conf = {'splits': 3}
 59 |         >>> kwargs = {'conf': conf}
 60 |         >>> stream = (({'x': x}) for x in range(5))
 61 |         >>> tuples = zip(stream, repeat(conf['splits']))
 62 |         >>> streams = parser(stream, conf['splits'], tuples, **kwargs)
 63 |         >>> next(next(streams)) == {'x': 0}
 64 |         True
 65 |     """
 66 |     source = list(stream)
 67 | 
 68 |     # deepcopy each item so that each split is independent
 69 |     for num in range(splits):
 70 |         yield map(deepcopy, source)
 71 | 
 72 | 
 73 | @operator(DEFAULTS, isasync=True, **OPTS)
 74 | def async_pipe(*args, **kwargs):
 75 |     """An operator that asynchronously and eagerly splits a stream into identical
 76 |     copies. Note that this pipe is not lazy.
 77 | 
 78 |     Args:
 79 |         items (Iter[dict]): The source stream.
 80 |         kwargs (dict): The keyword arguments passed to the wrapper
 81 | 
 82 |     Kwargs:
 83 |         conf (dict): The pipe configuration. May contain the key 'splits'.
 84 | 
 85 |             splits (int): the number of copies to create (default: 2).
 86 | 
 87 |     Returns:
 88 |         Deferred: twisted.internet.defer.Deferred iterable of streams
 89 | 
 90 |     Examples:
 91 |         >>> from riko.bado import react
 92 |         >>> from riko.bado.mock import FakeReactor
 93 |         >>>
 94 |         >>> def run(reactor):
 95 |         ...     callback = lambda x: print(next(next(x)) == {'x': 0})
 96 |         ...     d = async_pipe({'x': x} for x in range(5))
 97 |         ...     return d.addCallbacks(callback, logger.error)
 98 |         >>>
 99 |         >>> try:
100 |         ...     react(run, _reactor=FakeReactor())
101 |         ... except SystemExit:
102 |         ...     pass
103 |         ...
104 |         True
105 |     """
106 |     return parser(*args, **kwargs)
107 | 
108 | 
109 | @operator(DEFAULTS, **OPTS)
110 | def pipe(*args, **kwargs):
111 |     """An operator that eagerly splits a stream into identical copies.
112 |     Note that this pipe is not lazy.
113 | 
114 |     Args:
115 |         items (Iter[dict]): The source stream.
116 |         kwargs (dict): The keyword arguments passed to the wrapper
117 | 
118 |     Kwargs:
119 |         conf (dict): The pipe configuration. May contain the key 'splits'.
120 | 
121 |             splits (int): the number of copies to create (default: 2).
122 | 
123 |     Yields:
124 |         Iter(dict): a stream of items
125 | 
126 |     Examples:
127 |         >>> items = [{'x': x} for x in range(5)]
128 |         >>> stream1, stream2 = pipe(items)
129 |         >>> next(stream1) == {'x': 0}
130 |         True
131 |         >>> len(list(pipe(items, conf={'splits': '3'})))
132 |         3
133 |     """
134 |     return parser(*args, **kwargs)
135 | 


--------------------------------------------------------------------------------
/riko/modules/strconcat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.strconcat
  5 | ~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for concatenating strings (aka stringbuilder).
  7 | 
  8 | Useful when you need to build a string from multiple substrings, some coded
  9 | into the pipe, other parts supplied when the pipe is run.
 10 | 
 11 | Examples:
 12 |     basic usage::
 13 | 
 14 |         >>> from riko.modules.strconcat import pipe
 15 |         >>>
 16 |         >>> item = {'word': 'hello'}
 17 |         >>> part = [{'subkey': 'word'}, {'value': ' world'}]
 18 |         >>> next(pipe(item, conf={'part': part}))['strconcat'] == 'hello world'
 19 |         True
 20 | 
 21 | Attributes:
 22 |     OPTS (dict): The default pipe options
 23 |     DEFAULTS (dict): The default parser options
 24 | """
 25 | import pygogo as gogo
 26 | 
 27 | from . import processor
 28 | 
 29 | OPTS = {"listize": True, "extract": "part"}
 30 | logger = gogo.Gogo(__name__, monolog=True).logger
 31 | 
 32 | 
 33 | def parser(_, parts, skip=False, **kwargs):
 34 |     """Parses the pipe content
 35 | 
 36 |     Args:
 37 |         _ (dict): The item (ignored)
 38 |         parts (List[str]): The content to concatenate
 39 |         skip (bool): Don't parse the content
 40 |         kwargs (dict): Keyword arguments
 41 | 
 42 |     Kwargs:
 43 |         stream (dict): The original item
 44 | 
 45 |     Returns:
 46 |         str: The concatenated string
 47 | 
 48 |     Examples:
 49 |         >>> parser(None, ['one', 'two']) == 'onetwo'
 50 |         True
 51 |     """
 52 |     if skip:
 53 |         parsed = kwargs["stream"]
 54 |     else:
 55 |         parsed = "".join(str(p) for p in parts if p)
 56 | 
 57 |     return parsed
 58 | 
 59 | 
 60 | @processor(isasync=True, **OPTS)
 61 | def async_pipe(*args, **kwargs):
 62 |     """A processor module that asynchronously concatenates strings.
 63 | 
 64 |     Args:
 65 |         item (dict): The entry to process
 66 |         kwargs (dict): The keyword arguments passed to the wrapper
 67 | 
 68 |     Kwargs:
 69 |         conf (dict): The pipe configuration. Must contain the key 'part'.
 70 | 
 71 |             part (dict): can be either a dict or list of dicts. Must contain
 72 |                 one of the following keys: 'value', 'subkey', or 'terminal'.
 73 | 
 74 |                 value (str): The substring value
 75 |                 subkey (str): The item attribute from which to obtain a
 76 |                     substring
 77 | 
 78 |                 terminal (str): The id of a pipe from which to obtain a
 79 |                     substring
 80 | 
 81 |         assign (str): Attribute to assign parsed content (default: strconcat)
 82 | 
 83 |     Returns:
 84 |        Deferred: twisted.internet.defer.Deferred item with concatenated content
 85 | 
 86 |     Examples:
 87 |         >>> from riko.bado import react
 88 |         >>> from riko.bado.mock import FakeReactor
 89 |         >>>
 90 |         >>> def run(reactor):
 91 |         ...     callback = lambda x: print(next(x)['strconcat'])
 92 |         ...     item = {'title': 'Hello world'}
 93 |         ...     part = [{'subkey': 'title'}, {'value': 's'}]
 94 |         ...     d = async_pipe(item, conf={'part': part})
 95 |         ...     return d.addCallbacks(callback, logger.error)
 96 |         >>>
 97 |         >>> try:
 98 |         ...     react(run, _reactor=FakeReactor())
 99 |         ... except SystemExit:
100 |         ...     pass
101 |         ...
102 |         Hello worlds
103 |     """
104 |     return parser(*args, **kwargs)
105 | 
106 | 
107 | @processor(**OPTS)
108 | def pipe(*args, **kwargs):
109 |     """A processor that concatenates strings.
110 | 
111 |     Args:
112 |         item (dict): The entry to process
113 |         kwargs (dict): The keyword arguments passed to the wrapper
114 | 
115 |     Kwargs:
116 |         conf (dict): The pipe configuration. Must contain the key 'part'.
117 | 
118 |             part (dict): can be either a dict or list of dicts. Must contain
119 |                 one of the following keys: 'value', 'subkey', or 'terminal'.
120 | 
121 |                 value (str): The substring value
122 |                 subkey (str): The item attribute from which to obtain a
123 |                     substring
124 | 
125 |                 terminal (str): The id of a pipe from which to obtain a
126 |                     substring
127 | 
128 |         assign (str): Attribute to assign parsed content (default: strconcat)
129 | 
130 |     Yields:
131 |         dict: an item with concatenated content
132 | 
133 |     Examples:
134 |         >>> item = {'img': {'src': 'http://www.site.com'}}
135 |         >>> part = [
136 |         ...     {'value': '<img src="'}, {'subkey': 'img.src'}, {'value': '">'}
137 |         ... ]
138 |         >>> conf = {'part': part}
139 |         >>> resp = '<img src="http://www.site.com">'
140 |         >>> next(pipe(item, conf=conf))['strconcat'] == resp
141 |         True
142 |         >>> next(pipe(item, conf=conf, assign='result'))['result'] == resp
143 |         True
144 |     """
145 |     return parser(*args, **kwargs)
146 | 


--------------------------------------------------------------------------------
/riko/modules/substr.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.substr
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for obtaining a portion of a string.
  7 | 
  8 | You enter two numbers to tell the module the starting character position and
  9 | the length of the resulting substring. If your input string is "ABCDEFG", then
 10 | a From value of 2 and length of 4 gives you a resulting string of "CDEF".
 11 | Notice that the first character in the original string is 0, not 1.
 12 | 
 13 | If you enter too long a length, the module just returns a substring to the end
 14 | of the input string, so if you enter a From of 3 and a length of 100, you'll
 15 | get a result of "DEFG".
 16 | Examples:
 17 |     basic usage::
 18 | 
 19 |         >>> from riko.modules.substr import pipe
 20 |         >>>
 21 |         >>> conf = {'start': '3', 'length': '4'}
 22 |         >>> item = {'content': 'hello world'}
 23 |         >>> next(pipe(item, conf=conf))['substr'] == 'lo w'
 24 |         True
 25 | 
 26 | Attributes:
 27 |     OPTS (dict): The default pipe options
 28 |     DEFAULTS (dict): The default parser options
 29 | """
 30 | 
 31 | from . import processor
 32 | import pygogo as gogo
 33 | 
 34 | OPTS = {"ftype": "text", "ptype": "int", "field": "content"}
 35 | DEFAULTS = {"start": 0, "length": 0}
 36 | logger = gogo.Gogo(__name__, monolog=True).logger
 37 | 
 38 | 
 39 | def parser(word, objconf, skip=False, **kwargs):
 40 |     """Parses the pipe content
 41 | 
 42 |     Args:
 43 |         word (str): The string to parse
 44 |         objconf (obj): The pipe configuration (an Objectify instance)
 45 |         skip (bool): Don't parse the content
 46 |         kwargs (dict): Keyword arguments
 47 | 
 48 |     Kwargs:
 49 |         assign (str): Attribute to assign parsed content (default: substr)
 50 |         stream (dict): The original item
 51 | 
 52 |     Returns:
 53 |         dict: The item
 54 | 
 55 |     Examples:
 56 |         >>> from meza.fntools import Objectify
 57 |         >>>
 58 |         >>> item = {'content': 'hello world'}
 59 |         >>> conf = {'start': 3, 'length': 4}
 60 |         >>> args = item['content'], Objectify(conf)
 61 |         >>> kwargs = {'stream': item, 'conf': conf}
 62 |         >>> parser(*args, **kwargs) == 'lo w'
 63 |         True
 64 |     """
 65 |     end = objconf.start + objconf.length if objconf.length else None
 66 |     return kwargs["stream"] if skip else word[objconf.start : end]
 67 | 
 68 | 
 69 | @processor(DEFAULTS, isasync=True, **OPTS)
 70 | def async_pipe(*args, **kwargs):
 71 |     """A processor module that asynchronously returns a substring of a field
 72 |     of an item.
 73 | 
 74 |     Args:
 75 |         item (dict): The entry to process
 76 |         kwargs (dict): The keyword arguments passed to the wrapper
 77 | 
 78 |     Kwargs:
 79 |         conf (dict): The pipe configuration. May contain the keys 'start' or
 80 |             'length'.
 81 | 
 82 |             start (int): starting position (default: 0)
 83 |             length (int): count of characters to return (default: 0, i.e., all)
 84 | 
 85 |         assign (str): Attribute to assign parsed content (default: substr)
 86 |         field (str): Item attribute to operate on (default: 'content')
 87 | 
 88 |     Returns:
 89 |        Deferred: twisted.internet.defer.Deferred item with transformed content
 90 | 
 91 |     Examples:
 92 |         >>> from riko.bado import react
 93 |         >>> from riko.bado.mock import FakeReactor
 94 |         >>>
 95 |         >>> def run(reactor):
 96 |         ...     callback = lambda x: print(next(x)['substr'])
 97 |         ...     conf = {'start': '3', 'length': '4'}
 98 |         ...     d = async_pipe({'content': 'hello world'}, conf=conf)
 99 |         ...     return d.addCallbacks(callback, logger.error)
100 |         >>>
101 |         >>> try:
102 |         ...     react(run, _reactor=FakeReactor())
103 |         ... except SystemExit:
104 |         ...     pass
105 |         ...
106 |         lo w
107 |     """
108 |     return parser(*args, **kwargs)
109 | 
110 | 
111 | @processor(**OPTS)
112 | def pipe(*args, **kwargs):
113 |     """A processor that returns a substring of a field of an item.
114 | 
115 |     Args:
116 |         item (dict): The entry to process
117 |         kwargs (dict): The keyword arguments passed to the wrapper
118 | 
119 |     Kwargs:
120 |         conf (dict): The pipe configuration. May contain the keys 'start' or
121 |             'length'.
122 | 
123 |             start (int): starting position (default: 0)
124 |             length (int): count of characters to return (default: 0, i.e., all)
125 | 
126 |         assign (str): Attribute to assign parsed content (default: substr)
127 |         field (str): Item attribute to operate on (default: 'content')
128 | 
129 |     Yields:
130 |         dict: an item with the substring
131 | 
132 |     Examples:
133 |         >>> conf = {'start': '3', 'length': '4'}
134 |         >>> item = {'content': 'hello world'}
135 |         >>> next(pipe(item, conf=conf))['substr'] == 'lo w'
136 |         True
137 |         >>> conf = {'start': '3'}
138 |         >>> kwargs = {'conf': conf, 'field': 'title', 'assign': 'result'}
139 |         >>> next(pipe({'title': 'Greetings'}, **kwargs))['result'] == 'etings'
140 |         True
141 |     """
142 |     return parser(*args, **kwargs)
143 | 


--------------------------------------------------------------------------------
/riko/modules/sum.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.sum
  5 | ~~~~~~~~~~~~~~~~
  6 | Provides functions for summing the items in a stream.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.sum import pipe
 12 |         >>>
 13 |         >>> stream = pipe({'content': x} for x in range(5))
 14 |         >>> next(stream) == {'sum': Decimal('10')}
 15 |         True
 16 | 
 17 | Attributes:
 18 |     OPTS (dict): The default pipe options
 19 |     DEFAULTS (dict): The default parser options
 20 | """
 21 | import itertools as it
 22 | import pygogo as gogo
 23 | 
 24 | from operator import itemgetter
 25 | from decimal import Decimal
 26 | 
 27 | from . import operator
 28 | 
 29 | OPTS = {}
 30 | DEFAULTS = {"sum_key": "content", "group_key": None}
 31 | logger = gogo.Gogo(__name__, monolog=True).logger
 32 | 
 33 | 
 34 | def parser(stream, objconf, tuples, **kwargs):
 35 |     """Parses the pipe content
 36 | 
 37 |     Args:
 38 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 39 |             iterator, so consuming it will consume `tuples` as well.
 40 | 
 41 |         objconf (obj): The pipe configuration (an Objectify instance)
 42 | 
 43 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 44 |             `item` is an element in the source stream and `objconf` is the item
 45 |             configuration (an Objectify instance). Note: this shares the
 46 |             `stream` iterator, so consuming it will consume `stream` as well.
 47 | 
 48 |         kwargs (dict): Keyword arguments.
 49 | 
 50 |     Kwargs:
 51 |         conf (dict): The pipe configuration.
 52 | 
 53 |     Returns:
 54 |         mixed: The output either a dict or iterable of dicts
 55 | 
 56 |     Examples:
 57 |         >>> from itertools import repeat
 58 |         >>> from meza.fntools import Objectify
 59 |         >>>
 60 |         >>> stream = ({'content': x} for x in range(5))
 61 |         >>> objconf = Objectify({'sum_key': 'content'})
 62 |         >>> tuples = zip(stream, repeat(objconf))
 63 |         >>> args = (stream, objconf, tuples)
 64 |         >>> parser(*args, assign='content') == {'content': Decimal('10')}
 65 |         True
 66 |         >>> objconf = Objectify({'sum_key': 'amount', 'group_key': 'x'})
 67 |         >>> stream = [
 68 |         ...     {'amount': 2, 'x': 'one'},
 69 |         ...     {'amount': 1, 'x': 'one'},
 70 |         ...     {'amount': 2, 'x': 'two'}]
 71 |         >>> tuples = zip(stream, repeat(objconf))
 72 |         >>> summed = parser(stream, objconf, tuples)
 73 |         >>> next(summed) == {'one': Decimal('3')}
 74 |         True
 75 |         >>> next(summed) == {'two': Decimal('2')}
 76 |         True
 77 |     """
 78 |     _sum = lambda group: sum(Decimal(g[objconf.sum_key]) for g in group)
 79 | 
 80 |     if objconf.group_key:
 81 |         keyfunc = itemgetter(objconf.group_key)
 82 |         sorted_stream = sorted(stream, key=keyfunc)
 83 |         grouped = it.groupby(sorted_stream, keyfunc)
 84 |         summed = ({key: _sum(group)} for key, group in grouped)
 85 |     else:
 86 |         summed = {kwargs["assign"]: _sum(stream)}
 87 | 
 88 |     return summed
 89 | 
 90 | 
 91 | @operator(DEFAULTS, isasync=True, **OPTS)
 92 | def async_pipe(*args, **kwargs):
 93 |     """An operator that asynchronously and eagerly sums fields of items
 94 |     in a stream. Note that this pipe is not lazy if `group_key` is specified.
 95 | 
 96 |     Args:
 97 |         items (Iter[dict]): The source.
 98 |         kwargs (dict): The keyword arguments passed to the wrapper
 99 | 
100 |     Kwargs:
101 |         conf (dict): The pipe configuration. May contain the keys 'sum_key' or
102 |             'group_key'.
103 | 
104 |             sum_key (str): Item attribute to sum. (default: 'content').
105 | 
106 |             group_key (str): Item attribute to sum by. This will group items
107 |                 in the stream by the given key and report a sum for each
108 |                 group (default: None).
109 | 
110 |         assign (str): Attribute to assign parsed content. If `sum_key` is set,
111 |             this is ignored and the group keys are used instead. (default:
112 |             content)
113 | 
114 |     Returns:
115 |         Deferred: twisted.internet.defer.Deferred iterator of the summed items
116 | 
117 |     Examples:
118 |         >>> from riko.bado import react
119 |         >>> from riko.bado.mock import FakeReactor
120 |         >>>
121 |         >>> def run(reactor):
122 |         ...     callback = lambda x: print(next(x) == {'sum': Decimal('10')})
123 |         ...     items = ({'content': x} for x in range(5))
124 |         ...     d = async_pipe(items)
125 |         ...     return d.addCallbacks(callback, logger.error)
126 |         >>>
127 |         >>> try:
128 |         ...     react(run, _reactor=FakeReactor())
129 |         ... except SystemExit:
130 |         ...     pass
131 |         ...
132 |         True
133 |     """
134 |     return parser(*args, **kwargs)
135 | 
136 | 
137 | @operator(DEFAULTS, **OPTS)
138 | def pipe(*args, **kwargs):
139 |     """An operator that eagerly sums fields of items in a stream.
140 |     Note that this pipe is not lazy if `group_key` is specified.
141 | 
142 |     Args:
143 |         items (Iter[dict]): The source.
144 |         kwargs (dict): The keyword arguments passed to the wrapper
145 | 
146 |     Kwargs:
147 |         conf (dict): The pipe configuration. May contain the keys 'sum_key' or
148 |             'group_key'.
149 | 
150 |             sum_key (str): Item attribute to sum. (default: 'content').
151 | 
152 |             group_key (str): Item attribute to sum by. This will group items
153 |                 in the stream by the given key and report a sum for each
154 |                 group (default: None).
155 | 
156 |         assign (str): Attribute to assign parsed content. If `sum_key` is set,
157 |             this is ignored and the group keys are used instead. (default:
158 |             content)
159 | 
160 |     Yields:
161 |         dict: the summed items
162 | 
163 |     Examples:
164 |         >>> stream = ({'content': x} for x in range(5))
165 |         >>> next(pipe(stream)) == {'sum': Decimal('10')}
166 |         True
167 |         >>> stream = [
168 |         ...     {'amount': 2, 'x': 'one'},
169 |         ...     {'amount': 1, 'x': 'one'},
170 |         ...     {'amount': 2, 'x': 'two'}]
171 |         >>> summed = pipe(stream, conf={'sum_key': 'amount', 'group_key': 'x'})
172 |         >>> next(summed) == {'one': Decimal('3')}
173 |         True
174 |         >>> next(summed) == {'two': Decimal('2')}
175 |         True
176 |     """
177 |     return parser(*args, **kwargs)
178 | 


--------------------------------------------------------------------------------
/riko/modules/tail.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.tail
  5 | ~~~~~~~~~~~~~~~~~
  6 | Provides functions for truncating a stream to the last N items.
  7 | 
  8 | Contrast this with the Truncate module, which limits the output to the first N
  9 | items.
 10 | 
 11 | Examples:
 12 |     basic usage::
 13 | 
 14 |         >>> from riko.modules.tail import pipe
 15 |         >>>
 16 |         >>> items = ({'x': x} for x in range(5))
 17 |         >>> next(pipe(items, conf={'count': 2})) == {'x': 3}
 18 |         True
 19 | 
 20 | Attributes:
 21 |     OPTS (dict): The default pipe options
 22 |     DEFAULTS (dict): The default parser options
 23 | """
 24 | from collections import deque
 25 | 
 26 | from . import operator
 27 | import pygogo as gogo
 28 | 
 29 | OPTS = {"ptype": "int"}
 30 | logger = gogo.Gogo(__name__, monolog=True).logger
 31 | 
 32 | 
 33 | def parser(stream, objconf, tuples, **kwargs):
 34 |     """Parses the pipe content
 35 | 
 36 |     Args:
 37 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 38 |             iterator, so consuming it will consume `tuples` as well.
 39 | 
 40 |         objconf (obj): the item independent configuration (an Objectify
 41 |             instance).
 42 | 
 43 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 44 |             `item` is an element in the source stream and `objconf` is the item
 45 |             configuration (an Objectify instance). Note: this shares the
 46 |             `stream` iterator, so consuming it will consume `stream` as well.
 47 | 
 48 |         kwargs (dict): Keyword arguments.
 49 | 
 50 |     Returns:
 51 |         List(dict): The output stream
 52 | 
 53 |     Examples:
 54 |         >>> from meza.fntools import Objectify
 55 |         >>> from itertools import repeat
 56 |         >>>
 57 |         >>> kwargs = {'count': 2}
 58 |         >>> objconf = Objectify(kwargs)
 59 |         >>> stream = ({'x': x} for x in range(5))
 60 |         >>> tuples = zip(stream, repeat(objconf))
 61 |         >>> parser(stream, objconf, tuples, **kwargs)[0] == {'x': 3}
 62 |         True
 63 |     """
 64 |     return deque(stream, objconf.count)
 65 | 
 66 | 
 67 | @operator(isasync=True, **OPTS)
 68 | def async_pipe(*args, **kwargs):
 69 |     """An operator that asynchronously truncates a stream to the last N items.
 70 | 
 71 |     Args:
 72 |         items (Iter[dict]): The source.
 73 |         kwargs (dict): The keyword arguments passed to the wrapper
 74 | 
 75 |     Kwargs:
 76 |         conf (dict): The pipe configuration. Must contain the key 'count'.
 77 | 
 78 |             count (int): desired stream length
 79 | 
 80 |     Returns:
 81 |         Deferred: twisted.internet.defer.Deferred truncated stream
 82 | 
 83 |     Examples:
 84 |         >>> from riko.bado import react
 85 |         >>> from riko.bado.mock import FakeReactor
 86 |         >>>
 87 |         >>> def run(reactor):
 88 |         ...     callback = lambda x: print(next(x) == {'x': 3})
 89 |         ...     items = ({'x': x} for x in range(5))
 90 |         ...     d = async_pipe(items, conf={'count': 2})
 91 |         ...     return d.addCallbacks(callback, logger.error)
 92 |         >>>
 93 |         >>> try:
 94 |         ...     react(run, _reactor=FakeReactor())
 95 |         ... except SystemExit:
 96 |         ...     pass
 97 |         ...
 98 |         True
 99 |     """
100 |     return parser(*args, **kwargs)
101 | 
102 | 
103 | @operator(**OPTS)
104 | def pipe(*args, **kwargs):
105 |     """An operator that truncates a stream to the last N items.
106 | 
107 |     Args:
108 |         items (Iter[dict]): The source.
109 |         kwargs (dict): The keyword arguments passed to the wrapper
110 | 
111 |     Kwargs:
112 |         conf (dict): The pipe configuration. Must contain the key 'count'.
113 | 
114 |             count (int): desired stream length
115 | 
116 |     Yields:
117 |         dict: an item
118 | 
119 |     Examples:
120 |         >>> items = [{'x': x} for x in range(5)]
121 |         >>> next(pipe(items, conf={'count': 2})) == {'x': 3}
122 |         True
123 |     """
124 |     return parser(*args, **kwargs)
125 | 


--------------------------------------------------------------------------------
/riko/modules/tokenizer.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.tokenizer
  5 | ~~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for splitting a string into an array of strings.
  7 | 
  8 | A delimiter string (often just a single character) tells the module where to
  9 | split the input string. The delimiter string doesn't appear in the output.
 10 | 
 11 | Examples:
 12 |     basic usage::
 13 | 
 14 |         >>> from riko.modules.tokenizer import pipe
 15 |         >>>
 16 |         >>> item = {'content': 'Once,twice,thrice'}
 17 |         >>> next(pipe(item))['tokenizer'][0] == {'content': 'Once'}
 18 |         True
 19 | 
 20 | Attributes:
 21 |     OPTS (dict): The default pipe options
 22 |     DEFAULTS (dict): The default parser options
 23 | """
 24 | import pygogo as gogo
 25 | 
 26 | from . import processor
 27 | 
 28 | OPTS = {"ftype": "text", "field": "content"}
 29 | DEFAULTS = {"delimiter": ",", "dedupe": False, "sort": False, "token_key": "content"}
 30 | 
 31 | logger = gogo.Gogo(__name__, monolog=True).logger
 32 | 
 33 | 
 34 | def parser(content, objconf, skip=False, **kwargs):
 35 |     """Parses the pipe content
 36 | 
 37 |     Args:
 38 |         content (str): The content to tokenize
 39 |         objconf (obj): The pipe configuration (an Objectify instance)
 40 |         skip (bool): Don't parse the content
 41 | 
 42 |     Returns:
 43 |         Iter[dict]: The stream of items
 44 | 
 45 |     Examples:
 46 |         >>> from meza.fntools import Objectify
 47 |         >>> objconf = Objectify({'delimiter': '//', 'token_key': 'token'})
 48 |         >>> content = 'Once//twice//thrice//no more'
 49 |         >>> result = parser(content, objconf)
 50 |         >>> next(result) == {'token': 'Once'}
 51 |         True
 52 |     """
 53 |     if skip:
 54 |         stream = kwargs["stream"]
 55 |     else:
 56 |         splits = [s.strip() for s in content.split(objconf.delimiter) if s]
 57 |         deduped = set(splits) if objconf.dedupe else splits
 58 |         keyfunc = lambda s: s.lower()
 59 |         chunks = sorted(deduped, key=keyfunc) if objconf.sort else deduped
 60 |         stream = ({objconf.token_key: chunk} for chunk in chunks)
 61 | 
 62 |     return stream
 63 | 
 64 | 
 65 | @processor(DEFAULTS, isasync=True, **OPTS)
 66 | def async_pipe(*args, **kwargs):
 67 |     """A processor module that asynchronously splits a string by a delimiter.
 68 | 
 69 |     Args:
 70 |         item (dict): The entry to process
 71 |         kwargs (dict): The keyword arguments passed to the wrapper
 72 | 
 73 |     Kwargs:
 74 |         conf (dict): The pipe configuration. May contain the keys 'delimiter',
 75 |             'dedupe', 'sort', or 'token_key'.
 76 | 
 77 |             delimiter (str): the delimiter string (default: ',')
 78 |             dedupe (bool): Remove duplicates (default: False).
 79 |             sort (bool): Sort tokens (default: False)
 80 | 
 81 |             token_key (str): Attribute to assign individual tokens (default:
 82 |                 content)
 83 | 
 84 |         assign (str): Attribute to assign parsed content (default:
 85 |             tokenizer)
 86 | 
 87 |         field (str): Item attribute from which to obtain the string to be
 88 |             tokenized (default: 'content')
 89 | 
 90 |         emit (bool): Return the stream as is and don't assign it to an item
 91 |             attribute (default: False)
 92 | 
 93 |     Returns:
 94 |         Deferred: twisted.internet.defer.Deferred item with tokenized content
 95 | 
 96 |     Examples:
 97 |         >>> from riko.bado import react
 98 |         >>> from riko.bado.mock import FakeReactor
 99 |         >>>
100 |         >>> def run(reactor):
101 |         ...     resp = {'content': 'Once'}
102 |         ...     attr = 'tokenizer'
103 |         ...     callback = lambda x: print(next(x)[attr][0] == resp)
104 |         ...     item = {'content': 'Once,twice,thrice,no more'}
105 |         ...     d = async_pipe(item)
106 |         ...     return d.addCallbacks(callback, logger.error)
107 |         >>>
108 |         >>> try:
109 |         ...     react(run, _reactor=FakeReactor())
110 |         ... except SystemExit:
111 |         ...     pass
112 |         ...
113 |         True
114 |     """
115 |     return parser(*args, **kwargs)
116 | 
117 | 
118 | @processor(DEFAULTS, **OPTS)
119 | def pipe(*args, **kwargs):
120 |     """A processor that splits a string by a delimiter.
121 | 
122 |     Args:
123 |         item (dict): The entry to process
124 |         kwargs (dict): The keyword arguments passed to the wrapper
125 | 
126 |     Kwargs:
127 |         conf (dict): The pipe configuration. May contain the keys 'delimiter',
128 |             'dedupe', 'sort', or 'token_key'.
129 | 
130 |             delimiter (str): the delimiter string (default: ',')
131 |             dedupe (bool): Remove duplicates (default: False).
132 |             sort (bool): Sort tokens (default: False)
133 |             token_key (str): Attribute to assign individual tokens (default:
134 |                 content)
135 | 
136 |         assign (str): Attribute to assign parsed content (default:
137 |             tokenizer)
138 | 
139 |         field (str): Item attribute from which to obtain the string to be
140 |             tokenized (default: content)
141 | 
142 |         emit (bool): Return the stream as is and don't assign it to an item
143 |             attribute (default: False)
144 | 
145 |     Returns:
146 |         dict: an item with tokenized content
147 | 
148 |     Examples:
149 |         >>> item = {'description': 'Once//twice//thrice//no more'}
150 |         >>> conf = {'delimiter': '//', 'sort': True}
151 |         >>> kwargs = {'field': 'description', 'assign': 'tokens'}
152 |         >>> resp = {'content': 'no more'}
153 |         >>> next(pipe(item, conf=conf, **kwargs))['tokens'][0] == resp
154 |         True
155 |         >>> kwargs.update({'emit': True})
156 |         >>> conf.update({'token_key': 'token'})
157 |         >>> next(pipe(item, conf=conf, **kwargs)) == {'token': 'no more'}
158 |         True
159 |     """
160 |     return parser(*args, **kwargs)
161 | 


--------------------------------------------------------------------------------
/riko/modules/truncate.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.truncate
  5 | ~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for returning a specified number of items from a stream.
  7 | 
  8 | Contrast this with the tail module, which also limits the number of items,
  9 | but returns items from the bottom of the stream.
 10 | 
 11 | Examples:
 12 |     basic usage::
 13 | 
 14 |         >>> from riko.modules.truncate import pipe
 15 |         >>>
 16 |         >>> items = ({'x': x} for x in range(5))
 17 |         >>> len(list(pipe(items, conf={'count': '4'})))
 18 |         4
 19 | 
 20 | Attributes:
 21 |     OPTS (dict): The default pipe options
 22 |     DEFAULTS (dict): The default parser options
 23 | """
 24 | from itertools import islice
 25 | 
 26 | from . import operator
 27 | import pygogo as gogo
 28 | 
 29 | OPTS = {"ptype": "int"}
 30 | DEFAULTS = {"start": 0}
 31 | logger = gogo.Gogo(__name__, monolog=True).logger
 32 | 
 33 | 
 34 | def parser(stream, objconf, tuples, **kwargs):
 35 |     """Parses the pipe content
 36 | 
 37 |     Args:
 38 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 39 |             iterator, so consuming it will consume `tuples` as well.
 40 | 
 41 |         objconf (obj): the item independent configuration (an Objectify
 42 |             instance).
 43 | 
 44 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 45 |             `item` is an element in the source stream and `objconf` is the item
 46 |             configuration (an Objectify instance). Note: this shares the
 47 |             `stream` iterator, so consuming it will consume `stream` as well.
 48 | 
 49 |         kwargs (dict): Keyword arguments.
 50 | 
 51 |     Returns:
 52 |         Iter(dict): The output stream
 53 | 
 54 |     Examples:
 55 |         >>> from meza.fntools import Objectify
 56 |         >>> from itertools import repeat
 57 |         >>>
 58 |         >>> kwargs = {'count': 4, 'start': 0}
 59 |         >>> objconf = Objectify(kwargs)
 60 |         >>> stream = ({'x': x} for x in range(5))
 61 |         >>> tuples = zip(stream, repeat(objconf))
 62 |         >>> len(list(parser(stream, objconf, tuples, **kwargs)))
 63 |         4
 64 |     """
 65 |     start = objconf.start
 66 |     stop = start + objconf.count
 67 |     return islice(stream, start, stop)
 68 | 
 69 | 
 70 | @operator(DEFAULTS, isasync=True, **OPTS)
 71 | def async_pipe(*args, **kwargs):
 72 |     """An operator that asynchronously returns a specified number of items
 73 |     from a stream.
 74 | 
 75 |     Args:
 76 |         items (Iter[dict]): The source.
 77 |         kwargs (dict): The keyword arguments passed to the wrapper
 78 | 
 79 |     Kwargs:
 80 |         conf (dict): The pipe configuration. Must contain the key 'count'.
 81 |             May contain the key 'start'.
 82 | 
 83 |             count (int): desired stream length
 84 |             start (int): starting location (default: 0)
 85 | 
 86 |     Returns:
 87 |         Deferred: twisted.internet.defer.Deferred truncated stream
 88 | 
 89 |     Examples:
 90 |         >>> from riko.bado import react
 91 |         >>> from riko.bado.mock import FakeReactor
 92 |         >>>
 93 |         >>> def run(reactor):
 94 |         ...     callback = lambda x: print(len(list(x)))
 95 |         ...     items = ({'x': x} for x in range(5))
 96 |         ...     d = async_pipe(items, conf={'count': 4})
 97 |         ...     return d.addCallbacks(callback, logger.error)
 98 |         >>>
 99 |         >>> try:
100 |         ...     react(run, _reactor=FakeReactor())
101 |         ... except SystemExit:
102 |         ...     pass
103 |         ...
104 |         4
105 |     """
106 |     return parser(*args, **kwargs)
107 | 
108 | 
109 | @operator(DEFAULTS, **OPTS)
110 | def pipe(*args, **kwargs):
111 |     """An operator that returns a specified number of items from a stream.
112 | 
113 |     Args:
114 |         items (Iter[dict]): The source.
115 |         kwargs (dict): The keyword arguments passed to the wrapper
116 | 
117 |     Kwargs:
118 |         conf (dict): The pipe configuration. Must contain the key 'count'.
119 |             May contain the key 'start'.
120 | 
121 |             start (int): starting location (default: 0)
122 |             count (int): desired stream length
123 | 
124 |     Yields:
125 |         dict: an item
126 | 
127 |     Examples:
128 |         >>> items = [{'x': x} for x in range(5)]
129 |         >>> len(list(pipe(items, conf={'count': '4'})))
130 |         4
131 |         >>> stream = pipe(items, conf={'count': '2', 'start': '2'})
132 |         >>> next(stream) == {'x': 2}
133 |         True
134 |     """
135 |     return parser(*args, **kwargs)
136 | 


--------------------------------------------------------------------------------
/riko/modules/typecast.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.typecast
  5 | ~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for casting fields into specific types.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.typecast import pipe
 12 |         >>>
 13 |         >>> conf = {'type': 'date'}
 14 |         >>> next(pipe({'content': '5/4/82'}, conf=conf))['typecast']['year']
 15 |         1982
 16 | 
 17 | Attributes:
 18 |     OPTS (dict): The default pipe options
 19 |     DEFAULTS (dict): The default parser options
 20 | """
 21 | import pygogo as gogo
 22 | 
 23 | from . import processor
 24 | from riko.utils import cast
 25 | 
 26 | OPTS = {"field": "content"}
 27 | DEFAULTS = {"type": "text"}
 28 | logger = gogo.Gogo(__name__, monolog=True).logger
 29 | 
 30 | 
 31 | def parser(content, objconf, skip=False, **kwargs):
 32 |     """Parsers the pipe content
 33 | 
 34 |     Args:
 35 |         content (scalar): The content to cast
 36 |         objconf (obj): The pipe configuration (an Objectify instance)
 37 |         skip (bool): Don't parse the content
 38 |         kwargs (dict): Keyword arguments
 39 | 
 40 |     Kwargs:
 41 |         assign (str): Attribute to assign parsed content (default: typecast)
 42 |         stream (dict): The original item
 43 | 
 44 |     Returns:
 45 |         dict: The item
 46 | 
 47 |     Examples:
 48 |         >>> from meza.fntools import Objectify
 49 |         >>>
 50 |         >>> item = {'content': '1.0'}
 51 |         >>> objconf = Objectify({'type': 'int'})
 52 |         >>> kwargs = {'stream': item, 'assign': 'content'}
 53 |         >>> parser(item['content'], objconf, **kwargs)
 54 |         1
 55 |     """
 56 |     return kwargs["stream"] if skip else cast(content, objconf.type)
 57 | 
 58 | 
 59 | @processor(DEFAULTS, isasync=True, **OPTS)
 60 | def async_pipe(*args, **kwargs):
 61 |     """A processor module that asynchronously parses a URL into its components.
 62 | 
 63 |     Args:
 64 |         item (dict): The entry to process
 65 |         kwargs (dict): The keyword arguments passed to the wrapper
 66 | 
 67 |     Kwargs:
 68 |         conf (dict): The pipe configuration. May contain the key 'type'.
 69 |             type (str): The object type to cast to (default: text)
 70 | 
 71 |         assign (str): Attribute to assign parsed content (default: typecast)
 72 |         field (str): Item attribute to operate on (default: 'content')
 73 | 
 74 |     Returns:
 75 |        Deferred: twisted.internet.defer.Deferred item with type casted content
 76 | 
 77 |     Examples:
 78 |         >>> from riko.bado import react
 79 |         >>> from riko.bado.mock import FakeReactor
 80 |         >>>
 81 |         >>> def run(reactor):
 82 |         ...     callback = lambda x: print(next(x)['typecast'])
 83 |         ...     d = async_pipe({'content': '1.0'}, conf={'type': 'int'})
 84 |         ...     return d.addCallbacks(callback, logger.error)
 85 |         >>>
 86 |         >>> try:
 87 |         ...     react(run, _reactor=FakeReactor())
 88 |         ... except SystemExit:
 89 |         ...     pass
 90 |         ...
 91 |         1
 92 |     """
 93 |     return parser(*args, **kwargs)
 94 | 
 95 | 
 96 | @processor(DEFAULTS, **OPTS)
 97 | def pipe(*args, **kwargs):
 98 |     """A processor that parses a URL into its components.
 99 | 
100 |     Args:
101 |         item (dict): The entry to process
102 |         kwargs (dict): The keyword arguments passed to the wrapper
103 | 
104 |     Kwargs:
105 |         conf (dict): The pipe configuration. May contain the key 'type'.
106 |             type (str): The object type to cast to (default: text)
107 | 
108 |         assign (str): Attribute to assign parsed content (default: typecast)
109 |         field (str): Item attribute to operate on (default: 'content')
110 | 
111 |     Yields:
112 |         dict: an item with type casted content
113 | 
114 |     Examples:
115 |         >>> from datetime import datetime as dt
116 |         >>> next(pipe({'content': '1.0'}, conf={'type': 'int'}))['typecast']
117 |         1
118 |         >>> item = {'content': '5/4/82'}
119 |         >>> conf = {'type': 'date'}
120 |         >>> date = next(pipe(item, conf=conf, emit=True))['date']
121 |         >>> date.isoformat() == '1982-05-04T00:00:00+00:00'
122 |         True
123 |         >>> item = {'content': dt(1982, 5, 4).timetuple()}
124 |         >>> date = next(pipe(item, conf=conf, emit=True))['date']
125 |         >>> date.isoformat() == '1982-05-04T00:00:00+00:00'
126 |         True
127 |         >>> item = {'content': 'False'}
128 |         >>> conf = {'type': 'bool'}
129 |         >>> next(pipe(item, conf=conf, emit=True))
130 |         False
131 |     """
132 |     return parser(*args, **kwargs)
133 | 


--------------------------------------------------------------------------------
/riko/modules/udf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.udf
  5 | ~~~~~~~~~~~~~~~~
  6 | Provides functions for performing an arbitrary (user-defined) function on stream
  7 | items.
  8 | 
  9 | Examples:
 10 |     basic usage::
 11 | 
 12 |         >>> from riko.modules.udf import pipe
 13 |         >>>
 14 |         >>> items = [{'x': x} for x in range(5)]
 15 |         >>> func = lambda item: {'y': item['x'] + 3}
 16 |         >>> next(pipe(items, func=func))
 17 |         {'y': 3}
 18 | """
 19 | from . import operator
 20 | import pygogo as gogo
 21 | 
 22 | logger = gogo.Gogo(__name__, monolog=True).logger
 23 | 
 24 | 
 25 | def parser(stream, objconf, tuples, **kwargs):
 26 |     """Parses the pipe content
 27 | 
 28 |     Args:
 29 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 30 |             iterator, so consuming it will consume `tuples` as well.
 31 | 
 32 |         objconf (obj): the item independent configuration (an Objectify
 33 |             instance).
 34 | 
 35 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 36 |             `item` is an element in the source stream and `objconf` is the item
 37 |             configuration (an Objectify instance). Note: this shares the
 38 |             `stream` iterator, so consuming it will consume `stream` as well.
 39 | 
 40 |         kwargs (dict): Keyword arguments.
 41 | 
 42 |     Returns:
 43 |         Iter(dict): The output stream
 44 | 
 45 |     Examples:
 46 |         >>> from meza.fntools import Objectify
 47 |         >>> from itertools import repeat
 48 |         >>>
 49 |         >>> func = lambda item: {'y': item['x'] + 3}
 50 |         >>> stream = ({'x': x} for x in range(5))
 51 |         >>> tuples = zip(stream, repeat(None))
 52 |         >>> next(parser(stream, None, tuples, func=func))
 53 |         {'y': 3}
 54 |     """
 55 |     return map(kwargs["func"], stream)
 56 | 
 57 | 
 58 | @operator(isasync=True)
 59 | def async_pipe(*args, **kwargs):
 60 |     """An operator that asynchronously performs an arbitrary (user-defined) function on
 61 |     items of a stream.
 62 | 
 63 |     Args:
 64 |         items (Iter[dict]): The source.
 65 |         kwargs (dict): The keyword arguments passed to the wrapper
 66 | 
 67 |     Kwargs:
 68 |         func (callable): User defined function to apply to each stream item.
 69 | 
 70 |     Returns:
 71 |         Deferred: twisted.internet.defer.Deferred truncated stream
 72 | 
 73 |     Examples:
 74 |         >>> from riko.bado import react
 75 |         >>> from riko.bado.mock import FakeReactor
 76 |         >>>
 77 |         >>> def run(reactor):
 78 |         ...     callback = lambda x: print(next(x))
 79 |         ...     func = lambda item: {'y': item['x'] + 3}
 80 |         ...     items = ({'x': x} for x in range(5))
 81 |         ...     d = async_pipe(items, func=func)
 82 |         ...     return d.addCallbacks(callback, logger.error)
 83 |         >>>
 84 |         >>> try:
 85 |         ...     react(run, _reactor=FakeReactor())
 86 |         ... except SystemExit:
 87 |         ...     pass
 88 |         ...
 89 |         {'y': 3}
 90 |     """
 91 |     return parser(*args, **kwargs)
 92 | 
 93 | 
 94 | @operator()
 95 | def pipe(*args, **kwargs):
 96 |     """An operator that performs an arbitrary (user-defined) function on items of a
 97 |     stream.
 98 | 
 99 |     Args:
100 |         items (Iter[dict]): The source.
101 |         kwargs (dict): The keyword arguments passed to the wrapper
102 | 
103 |     Kwargs:
104 |         func (callable): User defined function to apply to each stream item.
105 | 
106 |     Yields:
107 |         dict: an item
108 | 
109 |     Examples:
110 |         >>> items = [{'x': x} for x in range(5)]
111 |         >>> func = lambda item: {'y': item['x'] + 3}
112 |         >>> next(pipe(items, func=func))
113 |         {'y': 3}
114 |     """
115 |     return parser(*args, **kwargs)
116 | 


--------------------------------------------------------------------------------
/riko/modules/union.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.union
  5 | ~~~~~~~~~~~~~~~~~~
  6 | Provides functions for merging separate sources into a single stream of items.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.union import pipe
 12 |         >>>
 13 |         >>> items = ({'x': x} for x in range(5))
 14 |         >>> other1 = ({'x': x + 5} for x in range(5))
 15 |         >>> other2 = ({'x': x + 10} for x in range(5))
 16 |         >>> len(list(pipe(items, others=[other1, other2])))
 17 |         15
 18 | 
 19 | Attributes:
 20 |     OPTS (dict): The default pipe options
 21 |     DEFAULTS (dict): The default parser options
 22 | """
 23 | import pygogo as gogo
 24 | 
 25 | from itertools import chain
 26 | 
 27 | from . import operator
 28 | from riko.utils import multiplex
 29 | 
 30 | # disable `dictize` since we do not need to access the configuration
 31 | OPTS = {"dictize": False}
 32 | logger = gogo.Gogo(__name__, monolog=True).logger
 33 | 
 34 | 
 35 | def parser(stream, objconf, tuples, **kwargs):
 36 |     """Parses the pipe content
 37 | 
 38 |     Args:
 39 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 40 |             iterator, so consuming it will consume `tuples` as well.
 41 | 
 42 |         objconf (obj): the item independent configuration (an Objectify
 43 |             instance).
 44 | 
 45 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
 46 |             `item` is an element in the source stream and `objconf` is the item
 47 |             configuration (an Objectify instance). Note: this shares the
 48 |             `stream` iterator, so consuming it will consume `stream` as well.
 49 | 
 50 |         kwargs (dict): Keyword arguments.
 51 | 
 52 |     Kwargs:
 53 |         others (List[Iter(dict)]): List of streams to join
 54 | 
 55 |     Returns:
 56 |         Iter(dict): The output stream
 57 | 
 58 |     Examples:
 59 |         >>> from itertools import repeat
 60 |         >>>
 61 |         >>> stream = ({'x': x} for x in range(5))
 62 |         >>> other1 = ({'x': x + 5} for x in range(5))
 63 |         >>> other2 = ({'x': x + 10} for x in range(5))
 64 |         >>> kwargs = {'others': [other1, other2]}
 65 |         >>> tuples = zip(stream, repeat(None))
 66 |         >>> len(list(parser(stream, None, tuples, **kwargs)))
 67 |         15
 68 |     """
 69 |     return chain(stream, multiplex(kwargs["others"]))
 70 | 
 71 | 
 72 | @operator(isasync=True, **OPTS)
 73 | def async_pipe(*args, **kwargs):
 74 |     """An operator that asynchronously merges multiple source streams together.
 75 | 
 76 |     Args:
 77 |         items (Iter[dict]): The source.
 78 |         kwargs (dict): The keyword arguments passed to the wrapper
 79 | 
 80 |     Kwargs:
 81 |         others (List[Iter(dict)]): List of streams to join
 82 | 
 83 |     Returns:
 84 |         Deferred: twisted.internet.defer.Deferred iterator of the merged streams
 85 | 
 86 |     Examples:
 87 |         >>> from riko.bado import react
 88 |         >>> from riko.bado.mock import FakeReactor
 89 |         >>>
 90 |         >>> def run(reactor):
 91 |         ...     callback = lambda x: print(len(list(x)))
 92 |         ...     items = ({'x': x} for x in range(5))
 93 |         ...     other1 = ({'x': x + 5} for x in range(5))
 94 |         ...     other2 = ({'x': x + 10} for x in range(5))
 95 |         ...     d = async_pipe(items, others=[other1, other2])
 96 |         ...     return d.addCallbacks(callback, logger.error)
 97 |         >>>
 98 |         >>> try:
 99 |         ...     react(run, _reactor=FakeReactor())
100 |         ... except SystemExit:
101 |         ...     pass
102 |         ...
103 |         15
104 |     """
105 |     return parser(*args, **kwargs)
106 | 
107 | 
108 | @operator(**OPTS)
109 | def pipe(*args, **kwargs):
110 |     """An operator that merges multiple streams together.
111 | 
112 |     Args:
113 |         items (Iter[dict]): The source.
114 |         kwargs (dict): The keyword arguments passed to the wrapper
115 | 
116 |     Kwargs:
117 |         others (List[Iter(dict)]): List of streams to join
118 | 
119 |     Yields:
120 |         dict: a merged stream item
121 | 
122 |     Examples:
123 |         >>> items = ({'x': x} for x in range(5))
124 |         >>> other1 = ({'x': x + 5} for x in range(5))
125 |         >>> other2 = ({'x': x + 10} for x in range(5))
126 |         >>> len(list(pipe(items, others=[other1, other2])))
127 |         15
128 |     """
129 |     return parser(*args, **kwargs)
130 | 


--------------------------------------------------------------------------------
/riko/modules/uniq.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.uniq
  5 | ~~~~~~~~~~~~~~~~~
  6 | Provides functions for filtering out non unique items from a stream according
  7 | to a specified field.
  8 | 
  9 | Removes duplicate items. You select the element to filter on, and Unique
 10 | removes the duplicates
 11 | 
 12 | Examples:
 13 |     basic usage::
 14 | 
 15 |         >>> from riko.modules.uniq import pipe
 16 |         >>>
 17 |         >>> items = ({'x': x, 'mod': x % 2} for x in range(5))
 18 |         >>> list(pipe(items, conf={'uniq_key': 'mod'})) == [
 19 |         ...     {'x': 0, 'mod': 0}, {'x': 1, 'mod': 1}]
 20 |         True
 21 | 
 22 | Attributes:
 23 |     OPTS (dict): The default pipe options
 24 |     DEFAULTS (dict): The default parser options
 25 | """
 26 | import pygogo as gogo
 27 | 
 28 | from collections import deque
 29 | from . import operator
 30 | 
 31 | OPTS = {}
 32 | DEFAULTS = {"uniq_key": "content", "limit": 1024}
 33 | logger = gogo.Gogo(__name__, monolog=True).logger
 34 | 
 35 | 
 36 | def parser(stream, objconf, tuples, **kwargs):
 37 |     """Parses the pipe content
 38 | 
 39 |     Args:
 40 |         stream (Iter[dict]): The source. Note: this shares the `tuples`
 41 |             iterator, so consuming it will consume `tuples` as well.
 42 | 
 43 |         objconf (obj): The pipe configuration (an Objectify instance)
 44 | 
 45 |         tuples (Iter[(dict, obj)]): Iterable of tuples of (item, rules)
 46 |             `item` is an element in the source stream (a DotDict instance)
 47 |             and `rules` is the rule configuration (an Objectify instance).
 48 |             Note: this shares the `stream` iterator, so consuming it will
 49 |             consume `stream` as well.
 50 | 
 51 |         kwargs (dict): Keyword arguments.
 52 | 
 53 |     Yields:
 54 |         dict: The output
 55 | 
 56 |     Examples:
 57 |         >>> from itertools import repeat
 58 |         >>> from meza.fntools import Objectify
 59 |         >>>
 60 |         >>> conf = {'uniq_key': 'mod', 'limit': 256}
 61 |         >>> objconf = Objectify(conf)
 62 |         >>> kwargs = {'conf': conf}
 63 |         >>> stream = ({'x': x, 'mod': x % 2} for x in range(5))
 64 |         >>> tuples = zip(stream, repeat(objconf))
 65 |         >>> list(parser(stream, objconf, tuples, **kwargs)) == [
 66 |         ...     {'x': 0, 'mod': 0}, {'x': 1, 'mod': 1}]
 67 |         True
 68 |     """
 69 |     key, limit = objconf.uniq_key, int(objconf.limit)
 70 |     seen = deque(maxlen=limit)
 71 | 
 72 |     for item in stream:
 73 |         value = item.get(key)
 74 | 
 75 |         if value not in seen:
 76 |             seen.append(value)
 77 |             yield item
 78 | 
 79 | 
 80 | @operator(DEFAULTS, isasync=True, **OPTS)
 81 | def async_pipe(*args, **kwargs):
 82 |     """An operator that asynchronously filters out non unique items according
 83 |     to a specified field.
 84 | 
 85 |     Args:
 86 |         items (Iter[dict]): The source.
 87 |         kwargs (dict): The keyword arguments passed to the wrapper
 88 | 
 89 |     Kwargs:
 90 |         conf (dict): The pipe configuration. May contain the keys 'uniq_key' or
 91 |             'limit'.
 92 | 
 93 |             uniq_key (str): Item attribute which should be unique (default:
 94 |                 'content').
 95 | 
 96 |             limit (int): Maximum number of unique items to track (default:
 97 |                 1024)
 98 | 
 99 |     Returns:
100 |         Deferred: twisted.internet.defer.Deferred stream
101 | 
102 |     Examples:
103 |         >>> from riko.bado import react
104 |         >>> from riko.bado.mock import FakeReactor
105 |         >>>
106 |         >>> def run(reactor):
107 |         ...     callback = lambda x: print([i['mod'] for i in x])
108 |         ...     items = ({'x': x, 'mod': x % 2} for x in range(5))
109 |         ...     d = async_pipe(items, conf={'uniq_key': 'mod'})
110 |         ...     return d.addCallbacks(callback, logger.error)
111 |         >>>
112 |         >>> try:
113 |         ...     react(run, _reactor=FakeReactor())
114 |         ... except SystemExit:
115 |         ...     pass
116 |         ...
117 |         [0, 1]
118 |     """
119 |     return parser(*args, **kwargs)
120 | 
121 | 
122 | @operator(DEFAULTS, **OPTS)
123 | def pipe(*args, **kwargs):
124 |     """An operator that filters out non unique items according to a specified
125 |     field.
126 | 
127 |     Args:
128 |         items (Iter[dict]): The source.
129 |         kwargs (dict): The keyword arguments passed to the wrapper
130 | 
131 |     Kwargs:
132 |         conf (dict): The pipe configuration. May contain the keys 'uniq_key' or
133 |             'limit'.
134 | 
135 |             uniq_key (str): Item attribute which should be unique (default:
136 |                 'content').
137 | 
138 |             limit (int): Maximum number of unique items to track (default:
139 |                 1024)
140 | 
141 |     Yields:
142 |         dict: an item
143 | 
144 |     Examples:
145 |         >>> items = [{'content': x, 'mod': x % 2} for x in range(5)]
146 |         >>> list(pipe(items, conf={'uniq_key': 'mod'})) == [
147 |         ...     {'mod': 0, 'content': 0}, {'mod': 1, 'content': 1}]
148 |         True
149 |         >>> stream = pipe(items)
150 |         >>> next(stream) == {'mod': 0, 'content': 0}
151 |         True
152 |         >>> [item['content'] for item in stream]
153 |         [1, 2, 3, 4]
154 |     """
155 |     return parser(*args, **kwargs)
156 | 


--------------------------------------------------------------------------------
/riko/modules/urlparse.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # vim: sw=4:ts=4:expandtab
  3 | """
  4 | riko.modules.urlparse
  5 | ~~~~~~~~~~~~~~~~~~~~~
  6 | Provides functions for parsing a URL into its six components.
  7 | 
  8 | Examples:
  9 |     basic usage::
 10 | 
 11 |         >>> from riko.modules.urlparse import pipe
 12 |         >>>
 13 |         >>> item = {'content': 'http://yahoo.com'}
 14 |         >>> scheme = {'component': 'scheme', 'content': 'http'}
 15 |         >>> next(pipe(item))['urlparse'][0] == scheme
 16 |         True
 17 | 
 18 | Attributes:
 19 |     OPTS (dict): The default pipe options
 20 |     DEFAULTS (dict): The default parser options
 21 | """
 22 | import pygogo as gogo
 23 | 
 24 | from urllib.parse import urlparse
 25 | from . import processor
 26 | 
 27 | OPTS = {"ftype": "text", "field": "content"}
 28 | DEFAULTS = {"parse_key": "content"}
 29 | logger = gogo.Gogo(__name__, monolog=True).logger
 30 | 
 31 | 
 32 | def parser(url, objconf, skip=False, **kwargs):
 33 |     """Parsers the pipe content
 34 | 
 35 |     Args:
 36 |         url (str): The link to parse
 37 |         objconf (obj): The pipe configuration (an Objectify instance)
 38 |         skip (bool): Don't parse the content
 39 |         kwargs (dict): Keyword arguments
 40 | 
 41 |     Kwargs:
 42 |         assign (str): Attribute to assign parsed content (default: urlparse)
 43 |         stream (dict): The original item
 44 | 
 45 |     Returns:
 46 |         dict: The item
 47 | 
 48 |     Examples:
 49 |         >>> from meza.fntools import Objectify
 50 |         >>>
 51 |         >>> objconf = Objectify({'parse_key': 'value'})
 52 |         >>> result = parser('http://yahoo.com', objconf)
 53 |         >>> next(result) == {'component': 'scheme', 'value': 'http'}
 54 |         True
 55 |     """
 56 |     if skip:
 57 |         stream = kwargs["stream"]
 58 |     else:
 59 |         parsed = urlparse(url)
 60 | 
 61 |         # noqa pylint: disable=dict-items-not-iterating
 62 |         items = parsed._asdict().items()
 63 |         stream = ({"component": k, objconf.parse_key: v} for k, v in items)
 64 | 
 65 |     return stream
 66 | 
 67 | 
 68 | @processor(DEFAULTS, isasync=True, **OPTS)
 69 | def async_pipe(*args, **kwargs):
 70 |     """A processor module that asynchronously parses a URL into its components.
 71 | 
 72 |     Args:
 73 |         item (dict): The entry to process
 74 |         kwargs (dict): The keyword arguments passed to the wrapper
 75 | 
 76 |     Kwargs:
 77 |         assign (str): Attribute to assign parsed content (default: urlparse)
 78 |         field (str): Item attribute to operate on (default: 'content')
 79 | 
 80 |     Returns:
 81 |        Deferred: twisted.internet.defer.Deferred item with parsed content
 82 | 
 83 |     Examples:
 84 |         >>> from riko.bado import react
 85 |         >>> from riko.bado.mock import FakeReactor
 86 |         >>>
 87 |         >>> scheme = {'component': 'scheme', 'content': 'http'}
 88 |         >>>
 89 |         >>> def run(reactor):
 90 |         ...     callback = lambda x: print(next(x)['urlparse'][0] == scheme)
 91 |         ...     d = async_pipe({'content': 'http://yahoo.com'})
 92 |         ...     return d.addCallbacks(callback, logger.error)
 93 |         >>>
 94 |         >>> try:
 95 |         ...     react(run, _reactor=FakeReactor())
 96 |         ... except SystemExit:
 97 |         ...     pass
 98 |         ...
 99 |         True
100 |     """
101 |     return parser(*args, **kwargs)
102 | 
103 | 
104 | @processor(DEFAULTS, **OPTS)
105 | def pipe(*args, **kwargs):
106 |     """A processor that parses a URL into its components.
107 | 
108 |     Args:
109 |         item (dict): The entry to process
110 |         kwargs (dict): The keyword arguments passed to the wrapper
111 | 
112 |     Kwargs:
113 |         conf (dict): The pipe configuration. May contain the key 'parse_key'.
114 | 
115 |             parse_key (str): Attribute to assign individual tokens (default:
116 |                 content)
117 | 
118 |         assign (str): Attribute to assign parsed content (default: urlparse)
119 |         field (str): Item attribute to operate on (default: 'content')
120 | 
121 |     Yields:
122 |         dict: an item with parsed content
123 | 
124 |     Examples:
125 |         >>> item = {'content': 'http://yahoo.com'}
126 |         >>> scheme = {'component': 'scheme', 'content': 'http'}
127 |         >>> next(pipe(item))['urlparse'][0] == scheme
128 |         True
129 |         >>> conf = {'parse_key': 'value'}
130 |         >>> next(pipe(item, conf=conf, emit=True)) == {
131 |         ...     'component': 'scheme', 'value': 'http'}
132 |         True
133 |     """
134 |     return parser(*args, **kwargs)
135 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [wheel]
 2 | universal=1
 3 | 
 4 | [nosetests]
 5 | verbosity=1
 6 | # detailed-errors=1
 7 | cover-package=riko
 8 | with-doctest=1
 9 | 
10 | [unittest]
11 | plugins=doctest
12 | process-restartworker=1
13 | 
14 | [doctest]
15 | always-on=True
16 | 
17 | [flake8]
18 | ignore=W191,E203,E126,E128,E122,E731,F403,F401,BLK100
19 | max-complexity=10
20 | max-line-length=88
21 | exclude=./build/*,./dist/*,./.tox/*,./app/lib/.ropeproject/*,./eggs/*
22 | 
23 | [pylint]
24 | max-line-length=88
25 | 
26 | [pylint.messages_control]
27 | disable=C0330,C0326
28 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | import sys
 5 | 
 6 | from os import path as p
 7 | from setuptools import setup, find_packages
 8 | 
 9 | import pkutils
10 | 
11 | PARENT_DIR = p.abspath(p.dirname(__file__))
12 | 
13 | sys.dont_write_bytecode = True
14 | requirements = list(pkutils.parse_requirements("requirements.txt"))
15 | dev_requirements = list(pkutils.parse_requirements("dev-requirements.txt"))
16 | optional = "optional-requirements.txt"
17 | opt_requirements = set(pkutils.parse_requirements(optional))
18 | readme = pkutils.read("README.rst")
19 | module = pkutils.parse_module(p.join(PARENT_DIR, "riko", "__init__.py"))
20 | license = module.__license__
21 | version = module.__version__
22 | project = module.__title__
23 | description = module.__description__
24 | user = "nerevu"
25 | 
26 | # Setup requirements
27 | setup_require = [r for r in dev_requirements if "pkutils" in r]
28 | 
29 | # Optional requirements
30 | xml_require = [r for r in opt_requirements if not r.lower().startswith("t")]
31 | async_require = list(opt_requirements.difference(xml_require))
32 | 
33 | setup(
34 |     name=project,
35 |     version=version,
36 |     description=description,
37 |     long_description=readme,
38 |     author=module.__author__,
39 |     author_email=module.__email__,
40 |     url=pkutils.get_url(project, user),
41 |     download_url=pkutils.get_dl_url(project, user, version),
42 |     packages=find_packages(exclude=["tests"]),
43 |     include_package_data=True,
44 |     package_data={
45 |         "data": ["data/*"],
46 |         "helpers": ["helpers/*"],
47 |         "tests": ["tests/*"],
48 |         "docs": ["docs/*"],
49 |         "examples": ["examples/*"],
50 |     },
51 |     install_requires=requirements,
52 |     extras_require={
53 |         "xml": xml_require,
54 |         "async": async_require,
55 |         "develop": dev_requirements,
56 |     },
57 |     setup_requires=setup_require,
58 |     test_suite="nose.collector",
59 |     tests_require=dev_requirements,
60 |     license=license,
61 |     zip_safe=False,
62 |     keywords=[project] + description.split(" "),
63 |     classifiers=[
64 |         pkutils.get_license(license),
65 |         pkutils.get_status(version),
66 |         "Natural Language :: English",
67 |         "Programming Language :: Python",
68 |         "Programming Language :: Python :: 3",
69 |         "Programming Language :: Python :: 3 :: Only",
70 |         "Programming Language :: Python :: 3.7",
71 |         "Programming Language :: Python :: 3.8",
72 |         "Programming Language :: Python :: 3.9",
73 |         "Programming Language :: Python :: Implementation :: CPython",
74 |         "Programming Language :: Python :: Implementation :: PyPy",
75 |         "Environment :: Console",
76 |         "Topic :: Software Development :: Libraries :: Python Modules",
77 |         "Intended Audience :: Developers",
78 |         "Operating System :: POSIX :: Linux",
79 |         "Operating System :: MacOS :: MacOS X",
80 |         "Operating System :: Microsoft :: Windows",
81 |     ],
82 |     platforms=["MacOS X", "Windows", "Linux"],
83 |     scripts=[p.join("bin", "runpipe")],
84 | )
85 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # vim: sw=4:ts=4:expandtab
 3 | """
 4 | tests
 5 | ~~~~~
 6 | 
 7 | Provides application unit tests
 8 | """
 9 | 
10 | 
11 | def setup_package():
12 |     """database context creation"""
13 |     global initialized
14 |     initialized = True
15 |     print("Test Package Setup\n")
16 | 
17 | 
18 | def teardown_package():
19 |     """database context removal"""
20 |     global initialized
21 |     initialized = False
22 |     print("Test Package Teardown\n")
23 | 


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | # vim: sw=4:ts=4:expandtab
  4 | 
  5 | """
  6 | tests.test
  7 | ~~~~~~~~~~
  8 | 
  9 | Provides scripttests to test riko runpipe CLI functionality.
 10 | """
 11 | 
 12 | import sys
 13 | import pygogo as gogo
 14 | 
 15 | from difflib import unified_diff
 16 | from os import path as p
 17 | from io import StringIO, open
 18 | from timeit import default_timer as timer
 19 | 
 20 | from scripttest import TestFileEnvironment
 21 | 
 22 | sys.path.append("../riko")
 23 | 
 24 | try:
 25 |     from riko.bado import _isasync
 26 | except ImportError:
 27 |     _isasync = False
 28 | 
 29 | PARENT_DIR = p.abspath(p.dirname(p.dirname(__file__)))
 30 | 
 31 | 
 32 | def main(script, tests, verbose=False, stop=True):
 33 |     """Main method
 34 | 
 35 |     Returns 0 on success, 1 on failure
 36 |     """
 37 |     failures = 0
 38 |     logger = gogo.Gogo(__name__, verbose=verbose).logger
 39 |     short_script = p.basename(script)
 40 |     env = TestFileEnvironment(".scripttest")
 41 | 
 42 |     start = timer()
 43 | 
 44 |     for pos, test in enumerate(tests):
 45 |         num = pos + 1
 46 |         opts, arguments, expected = test
 47 |         joined_opts = " ".join(opts) if opts else ""
 48 |         joined_args = '"%s"' % '" "'.join(arguments) if arguments else ""
 49 |         command = "%s %s %s" % (script, joined_opts, joined_args)
 50 |         short_command = "%s %s %s" % (short_script, joined_opts, joined_args)
 51 |         result = env.run(command, cwd=PARENT_DIR, expect_stderr=True)
 52 |         output = result.stdout
 53 | 
 54 |         if isinstance(expected, bool):
 55 |             text = StringIO(output).read()
 56 |             outlines = [str(bool(text))]
 57 |             checklines = StringIO(str(expected)).readlines()
 58 |         elif p.isfile(expected):
 59 |             outlines = StringIO(output).readlines()
 60 | 
 61 |             with open(expected, encoding="utf-8") as f:
 62 |                 checklines = f.readlines()
 63 |         else:
 64 |             outlines = StringIO(output).readlines()
 65 |             checklines = StringIO(expected).readlines()
 66 | 
 67 |         args = [checklines, outlines]
 68 |         kwargs = {"fromfile": "expected", "tofile": "got"}
 69 |         diffs = "".join(unified_diff(*args, **kwargs))
 70 | 
 71 |         if diffs:
 72 |             failures += 1
 73 |             msg = "ERROR! Output from test #%i:\n  %s\n" % (num, short_command)
 74 |             msg += "doesn't match:\n  %s\n" % expected
 75 |             msg += diffs if diffs else ""
 76 |         else:
 77 |             logger.debug(output)
 78 |             msg = "Scripttest #%i: %s ... ok" % (num, short_command)
 79 | 
 80 |         logger.info(msg)
 81 | 
 82 |         if stop and failures:
 83 |             break
 84 | 
 85 |     time = timer() - start
 86 |     logger.info("%s" % "-" * 70)
 87 |     end = "FAILED (failures=%i)" % failures if failures else "OK"
 88 |     logger.info("Ran %i scripttests in %0.3fs\n\n%s" % (num, time, end))
 89 |     sys.exit(failures)
 90 | 
 91 | 
 92 | if __name__ == "__main__":
 93 |     demo = p.join(PARENT_DIR, "bin", "runpipe")
 94 |     benchmark = p.join(PARENT_DIR, "bin", "benchmark")
 95 |     text = "Deadline to clear up health law eligibility near 682\n"
 96 |     runpipe_tests = [([], ["demo"], text), ([], ["simple1"], "'farechart'\n")]
 97 | 
 98 |     if _isasync:
 99 |         runpipe_tests += [
100 |             (["-a"], ["demo"], text),
101 |             (["-a"], ["simple1"], "'farechart'\n"),
102 |         ]
103 | 
104 |     main(demo, runpipe_tests)
105 |     main(benchmark, [([], [], "")])
106 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | toxworkdir={homedir}/.tox/venvcache/riko
 3 | envlist=py{37,38,39,py3}-{test,style,optional}
 4 | 
 5 | [testenv]
 6 | distribute=False
 7 | whitelist_externals =
 8 |   {toxinidir}/helpers/pippy
 9 |   manage
10 | 
11 | install_command={toxinidir}/helpers/pippy {packages}
12 | setenv =
13 |   PYTHONHASHSEED=94967295
14 |   PYTHONWARNINGS=all
15 |   PIP_CACHE_DIR={homedir}/.pip/packages
16 |   PIP_WHEEL_DIR={homedir}/.pip/wheels
17 |   PIP_FIND_LINKS=file://{homedir}/.pip/wheels
18 | 
19 | commands =
20 |   pypy3-{optional,style}: true
21 |   optional: manage test
22 |   test: manage test
23 |   style: manage lint
24 | 
25 | deps =
26 |   -r{toxinidir}/dev-requirements.txt
27 |   -r{toxinidir}/requirements.txt
28 |   py{37,38,39}-{optional}: -r{toxinidir}/optional-requirements.txt
29 | 
30 | [testenv:pypy3]
31 | basepython==pypy3.7-7.3.2
32 | 


--------------------------------------------------------------------------------