├── .editorconfig ├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── CONTRIBUTING.rst ├── HISTORY.rst ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── Makefile ├── authors.rst ├── conf.py ├── contributing.rst ├── flatson.rst ├── history.rst ├── index.rst ├── installation.rst ├── make.bat ├── modules.rst ├── readme.rst └── usage.rst ├── flatson ├── __init__.py └── flatson.py ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── requirements.txt └── test_flatson.py └── tox.ini /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 4 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | charset = utf-8 11 | end_of_line = lf 12 | 13 | [*.bat] 14 | indent_style = tab 15 | end_of_line = crlf 16 | 17 | [LICENSE] 18 | insert_final_newline = false 19 | 20 | [Makefile] 21 | indent_style = tab 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | htmlcov 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | 38 | # Complexity 39 | output/*.html 40 | output/*/index.html 41 | 42 | # Sphinx 43 | docs/_build 44 | 45 | # IDE 46 | .idea -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 2.7 3 | 4 | env: 5 | - TOXENV=py27 6 | - TOXENV=pypy 7 | # TODO: fix tests to work in PY3 8 | # - TOXENV=py33 9 | # - TOXENV=py34 10 | 11 | install: 12 | - pip install -U tox twine wheel 13 | 14 | script: 15 | - tox 16 | 17 | deploy: 18 | provider: pypi 19 | distributions: sdist bdist_wheel 20 | user: scrapinghub 21 | password: 22 | secure: dyFz0kl/AT6+GC7+i1Oeo8qT3DSjeue6eTtnpjIQdCq2NEyvKaVflrYNa58l6LPkq3LdQmK4NhMRjVTKXE87rb38FrVo/VVTql00lYZGOHcU+bcNk2rH2orb16SWv38AuKvaBNGnl1ReCd41asFWV9YRY+Cu6wBk1RDNcCQ5mUJ3GxJRoF2xvCL+SIQQbZSMEdT+RL+b67GtN6BahTXppOdiS5t7YJoH3NQUFeLnBNrZReGZnQjr1UgW3IMHHvVM/AlfdEPwCp66PuzjcOzYPlHaF2BKKZ4JGDfeStAzIuLPsE7Q02g2WiE92MWOP5Kn8X0Qn9IkqgQItXREHll8wfldqMiWEJuaVkge/uEGQHuni00eu4Gf+0d5XNA96r49W1zSjR0ADXOFvxGX4AnZ2LXPXL/d1Di9xsHs6cu2LVGeSFHWn3sYAMJrhRO0KHd4KznpEDO7iBgvzaihFl8YGabuQ8ia7PSb6wv1WL0YyWN0PW6ZjYjnMwp7b/Kv7DV/gZxE1qvG1a7G0NQW3DDJpezoGb1iGmuiCQ1LrXLYjGAfNOF1NGX2yl5kV+Fw23cvHyKNyaVAtR3wx9HcLGeLE6Iyb6+cYewMIXHaZ31z0UDrcFleHRiuQveriDFOAhs0YAH2KtBMVTATaVrvGxlMPxjY8N/KHfG89TZcwkNhBXo= 23 | on: 24 | tags: true 25 | repo: scrapinghub/flatson 26 | condition: $TOXENV == py27 27 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * ScrapingHub 9 | 10 | Contributors 11 | ------------ 12 | 13 | * Elias Dorneles 14 | * Bernardo Botella 15 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Contributions are welcome, and they are greatly appreciated! Every 6 | little bit helps, and credit will always be given. 7 | 8 | You can contribute in many ways: 9 | 10 | Types of Contributions 11 | ---------------------- 12 | 13 | Report Bugs 14 | ~~~~~~~~~~~ 15 | 16 | Report bugs at https://github.com/scrapinghub/flatson/issues. 17 | 18 | If you are reporting a bug, please include: 19 | 20 | * Your operating system name and version. 21 | * Any details about your local setup that might be helpful in troubleshooting. 22 | * Detailed steps to reproduce the bug. 23 | 24 | Fix Bugs 25 | ~~~~~~~~ 26 | 27 | Look through the GitHub issues for bugs. Anything `tagged with "bug" 28 | `_ is open to whoever wants 29 | to implement it. 30 | 31 | Implement Features 32 | ~~~~~~~~~~~~~~~~~~ 33 | 34 | Look through the GitHub issues for features. Anything tagged with "feature" 35 | is open to whoever wants to implement it. 36 | 37 | Write Documentation 38 | ~~~~~~~~~~~~~~~~~~~ 39 | 40 | Flatson could always use more documentation, whether as part of the 41 | official Flatson docs, in docstrings, or even on the web in blog posts, 42 | articles, and such. 43 | 44 | Submit Feedback 45 | ~~~~~~~~~~~~~~~ 46 | 47 | The best way to send feedback is to file an issue at https://github.com/scrapinghub/flatson/issues. 48 | 49 | If you are proposing a feature: 50 | 51 | * Explain in detail how it would work. 52 | * Keep the scope as narrow as possible, to make it easier to implement. 53 | * Remember that this is a volunteer-driven project, and that contributions 54 | are welcome :) 55 | 56 | Get Started! 57 | ------------ 58 | 59 | Ready to contribute? Here's how to set up `flatson` for local development. 60 | 61 | 1. Fork the `flatson` repo on GitHub. 62 | 2. Clone your fork locally:: 63 | 64 | $ git clone git@github.com:your_name_here/flatson.git 65 | 66 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 67 | 68 | $ mkvirtualenv flatson 69 | $ cd flatson/ 70 | $ python setup.py develop 71 | 72 | 4. Create a branch for local development:: 73 | 74 | $ git checkout -b name-of-your-bugfix-or-feature 75 | 76 | Now you can make your changes locally. 77 | 78 | 5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox:: 79 | 80 | $ flake8 flatson tests 81 | $ python setup.py test 82 | $ tox 83 | 84 | To get flake8 and tox, just pip install them into your virtualenv. 85 | 86 | 6. Commit your changes and push your branch to GitHub:: 87 | 88 | $ git add . 89 | $ git commit -m "Your detailed description of your changes." 90 | $ git push origin name-of-your-bugfix-or-feature 91 | 92 | 7. Submit a pull request through the GitHub website. 93 | 94 | Pull Request Guidelines 95 | ----------------------- 96 | 97 | Before you submit a pull request, check that it meets these guidelines: 98 | 99 | 1. The pull request should include tests. 100 | 2. If the pull request adds functionality, the docs should be updated. Put 101 | your new functionality into a function with a docstring, and add the 102 | feature to the list in README.rst. 103 | 3. The pull request should work for Python 2.6, 2.7, 3.3, and 3.4, and for PyPy. Check 104 | https://travis-ci.org/scrapinghub/flatson/pull_requests 105 | and make sure that the tests pass for all supported Python versions. 106 | 107 | Tips 108 | ---- 109 | 110 | To run a subset of tests:: 111 | 112 | $ python -m unittest tests.test_flatson 113 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | .. :changelog: 2 | 3 | History 4 | ------- 5 | 6 | 0.1.0 (2015-09-25) 7 | --------------------- 8 | 9 | * First release on PyPI. 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, ScrapingHub 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | * Neither the name of Flatson nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS.rst 2 | include CONTRIBUTING.rst 3 | include HISTORY.rst 4 | include LICENSE 5 | include README.rst 6 | 7 | recursive-include tests * 8 | recursive-exclude * __pycache__ 9 | recursive-exclude * *.py[co] 10 | 11 | recursive-include docs *.rst conf.py Makefile make.bat 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean-pyc clean-build docs clean 2 | 3 | help: 4 | @echo "clean - remove all build, test, coverage and Python artifacts" 5 | @echo "clean-build - remove build artifacts" 6 | @echo "clean-pyc - remove Python file artifacts" 7 | @echo "clean-test - remove test and coverage artifacts" 8 | @echo "lint - check style with flake8" 9 | @echo "test - run tests quickly with the default Python" 10 | @echo "test-all - run tests on every Python version with tox" 11 | @echo "coverage - check code coverage quickly with the default Python" 12 | @echo "docs - generate Sphinx HTML documentation, including API docs" 13 | @echo "release - package and upload a release" 14 | @echo "dist - package" 15 | @echo "install - install the package to the active Python's site-packages" 16 | 17 | clean: clean-build clean-pyc clean-test 18 | 19 | clean-build: 20 | rm -fr build/ 21 | rm -fr dist/ 22 | rm -fr .eggs/ 23 | find . -name '*.egg-info' -exec rm -fr {} + 24 | find . -name '*.egg' -exec rm -f {} + 25 | 26 | clean-pyc: 27 | find . -name '*.pyc' -exec rm -f {} + 28 | find . -name '*.pyo' -exec rm -f {} + 29 | find . -name '*~' -exec rm -f {} + 30 | find . -name '__pycache__' -exec rm -fr {} + 31 | 32 | clean-test: 33 | rm -fr .tox/ 34 | rm -f .coverage 35 | rm -fr htmlcov/ 36 | 37 | lint: 38 | flake8 flatson tests 39 | 40 | test: 41 | nosetests -s -v 42 | 43 | test-all: 44 | tox 45 | 46 | coverage: 47 | coverage run --source flatson setup.py test 48 | coverage report -m 49 | coverage html 50 | python -m webbrowser htmlcov/index.html 51 | 52 | docs: 53 | rm -f docs/flatson.rst 54 | rm -f docs/modules.rst 55 | sphinx-apidoc -o docs/ flatson 56 | $(MAKE) -C docs clean 57 | $(MAKE) -C docs html 58 | python -m webbrowser docs/_build/html/index.html 59 | 60 | servedocs: docs 61 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 62 | 63 | release: clean 64 | python setup.py sdist upload 65 | python setup.py bdist_wheel upload 66 | 67 | dist: clean 68 | python setup.py sdist 69 | python setup.py bdist_wheel 70 | ls -l dist 71 | 72 | install: clean 73 | python setup.py install 74 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =============================== 2 | Flatson 3 | =============================== 4 | 5 | .. image:: https://img.shields.io/travis/scrapinghub/flatson.svg 6 | :target: https://travis-ci.org/scrapinghub/flatson 7 | 8 | .. image:: https://img.shields.io/pypi/v/flatson.svg 9 | :target: https://pypi.python.org/pypi/flatson 10 | 11 | Flatson emerged at `Scrapinghub`_ from the need to export huge JSON-like datasets into **flat** CSV-like tables. Flatson is particularly useful to handle really huge datasets, because it doesn't load all the data in memory at once. 12 | 13 | .. _Scrapinghub: http://scrapinghub.com 14 | 15 | * Free software: BSD license 16 | * Documentation: https://flatson.readthedocs.org. 17 | 18 | Features 19 | -------- 20 | 21 | * Flattens Python dictionaries using a JSON schema 22 | * Supports per-field configuration via the schema 23 | 24 | Usage:: 25 | 26 | >>> from flatson import Flatson 27 | >>> schema = { 28 | "$schema": "http://json-schema.org/draft-04/schema", 29 | "type": "object", 30 | "properties": { 31 | "name": {"type": "string"}, 32 | "age": {"type": "number"}, 33 | "address": { 34 | "type": "object", 35 | "properties": {"city": {"type": "string"}, "street": {"type": "string"}} 36 | }, 37 | "skills": {"type": "array", "items": {"type": "string"}} 38 | } 39 | } 40 | >>> sample = { 41 | "name": "Claudio", "age": 42, 42 | "address": {"city": "Paris", "street": "Rue de Sevres"}, 43 | "skills": ["hacking", "soccer"]} 44 | >>> f = Flatson(schema) 45 | >>> f.fieldnames 46 | ['address.city', 'address.street', 'age', 'name', 'skills'] 47 | >>> f.flatten(sample) 48 | ['Paris', 'Rue de Sevres', 42, 'Claudio', '["hacking","soccer"]'] 49 | 50 | You can get a dict with the field names order preserved:: 51 | 52 | >>> f.flatten_dict(sample) 53 | OrderedDict([('address.city', 'Paris'), ('address.street', 'Rue de Sevres'), ('age', 42), ('name', 'Claudio'), ('skills', '["hacking","soccer"]')]) 54 | 55 | You can also configure array serialization behavior through the schema (default JSON):: 56 | 57 | >>> schema = { 58 | "$schema": "http://json-schema.org/draft-04/schema", 59 | "type": "object", 60 | "properties": { 61 | "name": {"type": "string"}, 62 | "skills": { 63 | "type": "array", 64 | "items": {"type": "string"}, 65 | "flatson_serialize": {"method": "join_values"}, 66 | } 67 | } 68 | } 69 | >>> f = Flatson(schema) 70 | >>> f.flatten({"name": "Salazar", "skills": ["hacking", "socker", "partying"]}) 71 | ['Salazar', 'hacking,socker,partying'] 72 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/flatson.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/flatson.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/flatson" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/flatson" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # flatson documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Jul 9 22:26:36 2013. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | # If extensions (or modules to document with autodoc) are in another 20 | # directory, add these directories to sys.path here. If the directory is 21 | # relative to the documentation root, use os.path.abspath to make it 22 | # absolute, like shown here. 23 | #sys.path.insert(0, os.path.abspath('.')) 24 | 25 | # Get the project root dir, which is the parent dir of this 26 | cwd = os.getcwd() 27 | project_root = os.path.dirname(cwd) 28 | 29 | # Insert the project root dir as the first element in the PYTHONPATH. 30 | # This lets us ensure that the source package is imported, and that its 31 | # version is used. 32 | sys.path.insert(0, project_root) 33 | 34 | import flatson 35 | 36 | # -- General configuration --------------------------------------------- 37 | 38 | # If your documentation needs a minimal Sphinx version, state it here. 39 | #needs_sphinx = '1.0' 40 | 41 | # Add any Sphinx extension module names here, as strings. They can be 42 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 43 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] 44 | 45 | # Add any paths that contain templates here, relative to this directory. 46 | templates_path = ['_templates'] 47 | 48 | # The suffix of source filenames. 49 | source_suffix = '.rst' 50 | 51 | # The encoding of source files. 52 | #source_encoding = 'utf-8-sig' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # General information about the project. 58 | project = u'Flatson' 59 | copyright = u'2015, ScrapingHub' 60 | 61 | # The version info for the project you're documenting, acts as replacement 62 | # for |version| and |release|, also used in various other places throughout 63 | # the built documents. 64 | # 65 | # The short X.Y version. 66 | version = flatson.__version__ 67 | # The full version, including alpha/beta/rc tags. 68 | release = flatson.__version__ 69 | 70 | # The language for content autogenerated by Sphinx. Refer to documentation 71 | # for a list of supported languages. 72 | #language = None 73 | 74 | # There are two options for replacing |today|: either, you set today to 75 | # some non-false value, then it is used: 76 | #today = '' 77 | # Else, today_fmt is used as the format for a strftime call. 78 | #today_fmt = '%B %d, %Y' 79 | 80 | # List of patterns, relative to source directory, that match files and 81 | # directories to ignore when looking for source files. 82 | exclude_patterns = ['_build'] 83 | 84 | # The reST default role (used for this markup: `text`) to use for all 85 | # documents. 86 | #default_role = None 87 | 88 | # If true, '()' will be appended to :func: etc. cross-reference text. 89 | #add_function_parentheses = True 90 | 91 | # If true, the current module name will be prepended to all description 92 | # unit titles (such as .. function::). 93 | #add_module_names = True 94 | 95 | # If true, sectionauthor and moduleauthor directives will be shown in the 96 | # output. They are ignored by default. 97 | #show_authors = False 98 | 99 | # The name of the Pygments (syntax highlighting) style to use. 100 | pygments_style = 'sphinx' 101 | 102 | # A list of ignored prefixes for module index sorting. 103 | #modindex_common_prefix = [] 104 | 105 | # If true, keep warnings as "system message" paragraphs in the built 106 | # documents. 107 | #keep_warnings = False 108 | 109 | 110 | # -- Options for HTML output ------------------------------------------- 111 | 112 | # The theme to use for HTML and HTML Help pages. See the documentation for 113 | # a list of builtin themes. 114 | html_theme = 'sphinx_rtd_theme' 115 | 116 | # Theme options are theme-specific and customize the look and feel of a 117 | # theme further. For a list of options available for each theme, see the 118 | # documentation. 119 | #html_theme_options = {} 120 | 121 | # Add any paths that contain custom themes here, relative to this directory. 122 | #html_theme_path = [] 123 | 124 | # The name for this set of Sphinx documents. If None, it defaults to 125 | # " v documentation". 126 | #html_title = None 127 | 128 | # A shorter title for the navigation bar. Default is the same as 129 | # html_title. 130 | #html_short_title = None 131 | 132 | # The name of an image file (relative to this directory) to place at the 133 | # top of the sidebar. 134 | #html_logo = None 135 | 136 | # The name of an image file (within the static path) to use as favicon 137 | # of the docs. This file should be a Windows icon file (.ico) being 138 | # 16x16 or 32x32 pixels large. 139 | #html_favicon = None 140 | 141 | # Add any paths that contain custom static files (such as style sheets) 142 | # here, relative to this directory. They are copied after the builtin 143 | # static files, so a file named "default.css" will overwrite the builtin 144 | # "default.css". 145 | html_static_path = ['_static'] 146 | 147 | # If not '', a 'Last updated on:' timestamp is inserted at every page 148 | # bottom, using the given strftime format. 149 | #html_last_updated_fmt = '%b %d, %Y' 150 | 151 | # If true, SmartyPants will be used to convert quotes and dashes to 152 | # typographically correct entities. 153 | #html_use_smartypants = True 154 | 155 | # Custom sidebar templates, maps document names to template names. 156 | #html_sidebars = {} 157 | 158 | # Additional templates that should be rendered to pages, maps page names 159 | # to template names. 160 | #html_additional_pages = {} 161 | 162 | # If false, no module index is generated. 163 | #html_domain_indices = True 164 | 165 | # If false, no index is generated. 166 | #html_use_index = True 167 | 168 | # If true, the index is split into individual pages for each letter. 169 | #html_split_index = False 170 | 171 | # If true, links to the reST sources are added to the pages. 172 | #html_show_sourcelink = True 173 | 174 | # If true, "Created using Sphinx" is shown in the HTML footer. 175 | # Default is True. 176 | #html_show_sphinx = True 177 | 178 | # If true, "(C) Copyright ..." is shown in the HTML footer. 179 | # Default is True. 180 | #html_show_copyright = True 181 | 182 | # If true, an OpenSearch description file will be output, and all pages 183 | # will contain a tag referring to it. The value of this option 184 | # must be the base URL from which the finished HTML is served. 185 | #html_use_opensearch = '' 186 | 187 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 188 | #html_file_suffix = None 189 | 190 | # Output file base name for HTML help builder. 191 | htmlhelp_basename = 'flatsondoc' 192 | 193 | 194 | # -- Options for LaTeX output ------------------------------------------ 195 | 196 | latex_elements = { 197 | # The paper size ('letterpaper' or 'a4paper'). 198 | #'papersize': 'letterpaper', 199 | 200 | # The font size ('10pt', '11pt' or '12pt'). 201 | #'pointsize': '10pt', 202 | 203 | # Additional stuff for the LaTeX preamble. 204 | #'preamble': '', 205 | } 206 | 207 | # Grouping the document tree into LaTeX files. List of tuples 208 | # (source start file, target name, title, author, documentclass 209 | # [howto/manual]). 210 | latex_documents = [ 211 | ('index', 'flatson.tex', 212 | u'Flatson Documentation', 213 | u'ScrapingHub', 'manual'), 214 | ] 215 | 216 | # The name of an image file (relative to this directory) to place at 217 | # the top of the title page. 218 | #latex_logo = None 219 | 220 | # For "manual" documents, if this is true, then toplevel headings 221 | # are parts, not chapters. 222 | #latex_use_parts = False 223 | 224 | # If true, show page references after internal links. 225 | #latex_show_pagerefs = False 226 | 227 | # If true, show URL addresses after external links. 228 | #latex_show_urls = False 229 | 230 | # Documents to append as an appendix to all manuals. 231 | #latex_appendices = [] 232 | 233 | # If false, no module index is generated. 234 | #latex_domain_indices = True 235 | 236 | 237 | # -- Options for manual page output ------------------------------------ 238 | 239 | # One entry per manual page. List of tuples 240 | # (source start file, name, description, authors, manual section). 241 | man_pages = [ 242 | ('index', 'flatson', 243 | u'Flatson Documentation', 244 | [u'ScrapingHub'], 1) 245 | ] 246 | 247 | # If true, show URL addresses after external links. 248 | #man_show_urls = False 249 | 250 | 251 | # -- Options for Texinfo output ---------------------------------------- 252 | 253 | # Grouping the document tree into Texinfo files. List of tuples 254 | # (source start file, target name, title, author, 255 | # dir menu entry, description, category) 256 | texinfo_documents = [ 257 | ('index', 'flatson', 258 | u'Flatson Documentation', 259 | u'ScrapingHub', 260 | 'flatson', 261 | 'One line description of project.', 262 | 'Miscellaneous'), 263 | ] 264 | 265 | # Documents to append as an appendix to all manuals. 266 | #texinfo_appendices = [] 267 | 268 | # If false, no module index is generated. 269 | #texinfo_domain_indices = True 270 | 271 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 272 | #texinfo_show_urls = 'footnote' 273 | 274 | # If true, do not generate a @detailmenu in the "Top" node's menu. 275 | #texinfo_no_detailmenu = False 276 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/flatson.rst: -------------------------------------------------------------------------------- 1 | flatson package 2 | =============== 3 | 4 | Submodules 5 | ---------- 6 | 7 | flatson.flatson module 8 | ---------------------- 9 | 10 | .. automodule:: flatson.flatson 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | 16 | Module contents 17 | --------------- 18 | 19 | .. automodule:: flatson 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. flatson documentation master file, created by 2 | sphinx-quickstart on Tue Jul 9 22:26:36 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Flatson's documentation! 7 | ====================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | readme 15 | installation 16 | usage 17 | contributing 18 | authors 19 | history 20 | 21 | Indices and tables 22 | ================== 23 | 24 | * :ref:`genindex` 25 | * :ref:`modindex` 26 | * :ref:`search` 27 | 28 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | At the command line:: 6 | 7 | $ easy_install flatson 8 | 9 | Or, if you have virtualenvwrapper installed:: 10 | 11 | $ mkvirtualenv flatson 12 | $ pip install flatson 13 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\flatson.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\flatson.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/modules.rst: -------------------------------------------------------------------------------- 1 | flatson 2 | ======= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | flatson 8 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | 4 | Next Steps 5 | ---------- 6 | 7 | Read more on :ref:`how to use Flatson `, check out the `Github Repo`_ 8 | and feel free to send Issues or PRs. =) 9 | 10 | .. _Github Repo: https://github.com/scrapinghub/flatson 11 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | .. _usage: 2 | 3 | ============= 4 | Using Flatson 5 | ============= 6 | 7 | 8 | Using Flatson is simple, you just need some JSON data to flatten and its `JSON 9 | schema`_. Flatson will use the information from the schema to understand the 10 | structure of your object, which makes the flattening easier and more 11 | predictable. 12 | 13 | 14 | .. note:: 15 | If you don't have the JSON schema for the data you want to flatten, you can 16 | use a tool to generate a JSON schema for your data, like `Skinfer`_ or 17 | http://jsonschema.net. 18 | 19 | Walk-through with an example 20 | ---------------------------- 21 | 22 | Say you have the following JSON schema in a file named ``schemafile.json``:: 23 | 24 | { 25 | "$schema": "http://json-schema.org/draft-04/schema", 26 | "type": "object", 27 | "properties": { 28 | "name": {"type": "string"}, 29 | "age": {"type": "number"}, 30 | "address": { 31 | "type": "object", 32 | "properties": {"city": {"type": "string"}, "street": {"type": "string"}} 33 | }, 34 | "skills": {"type": "array", "items": {"type": "string"}} 35 | } 36 | } 37 | 38 | You can instantiate the :class:`~.Flatson` class for this schemafile like this:: 39 | 40 | >>> from flatson import Flatson 41 | >>> f = Flatson.fromschemafile('schemafile.json') 42 | >>> f.fieldnames 43 | ['address.city', 'address.street', 'age', 'name', 'skills'] 44 | 45 | Note how Flatson has inferred the flattened field names, which you 46 | can access through the :attr:`~.Flatson.fieldnames` property. 47 | 48 | Let's test it with some sample data:: 49 | 50 | >>> sample = { 51 | "name": "Claudio", "age": 42, 52 | "address": {"city": "Paris", "street": "Rue de Sevres"}, 53 | "skills": ["hacking", "soccer"]} 54 | >>> f.flatten(sample) 55 | ['Paris', 'Rue de Sevres', 42, 'Claudio', '["hacking","soccer"]'] 56 | 57 | There are a couple of things to note here: 58 | 59 | 1) the :meth:`~.Flatson.flatten` method simply returns a list of simple objects 60 | 2) the array is by default serialized as a JSON string 61 | 62 | .. note:: 63 | Array serialization is :ref:`a topic apart `, for now 64 | it suffices to say that if you don't like this default behavior, there are 65 | other options you can configure through the schema, you can even register 66 | your own serialization methods if you like. 67 | 68 | Say you actually want a Python dict instead of a list, no worries, just use 69 | :meth:`~.Flatson.flatten_dict`:: 70 | 71 | >>> f.flatten_dict(sample) 72 | OrderedDict([('address.city', 'Paris'), ('address.street', 'Rue de Sevres'), ('age', 42), ('name', 'Claudio'), ('skills', '["hacking","soccer"]')]) 73 | 74 | Note that this returns an OrderedDict instead of a traditional Python dict: 75 | this has the advantage of preserving the same field ordering of the the list 76 | returned by the :meth:`~.Flatson.flatten` method. 77 | 78 | .. _array-serialization: 79 | 80 | Array serialization 81 | ------------------- 82 | 83 | TODO: write about array serialization here, point to design decisions, list available methods 84 | 85 | .. _JSON schema: http://spacetelescope.github.io/understanding-json-schema/index.html 86 | .. _Skinfer: https://github.com/scrapinghub/skinfer 87 | -------------------------------------------------------------------------------- /flatson/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | __author__ = 'ScrapingHub' 4 | __email__ = 'info@scrapinghub.com' 5 | __version__ = '0.1.0' 6 | 7 | 8 | from .flatson import Flatson # NOQA 9 | -------------------------------------------------------------------------------- /flatson/flatson.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals, print_function, absolute_import 3 | from collections import namedtuple, OrderedDict 4 | 5 | import json 6 | 7 | 8 | class Field(namedtuple('Field', 'name getter schema')): 9 | def is_array(self): 10 | return self.schema.get('type') == 'array' 11 | 12 | def is_simple_list(self): 13 | if not self.is_array(): 14 | return False 15 | 16 | items_type = self.schema.get('items', {}).get('type') 17 | return items_type in ('number', 'string') 18 | 19 | @property 20 | def serialization_options(self): 21 | return self.schema.get('flatson_serialize') or {} 22 | 23 | 24 | def create_getter(path, field_sep='.'): 25 | if field_sep in path: 26 | first_key, rest = path.split(field_sep, 1) 27 | return lambda x: create_getter(rest)(x.get(first_key, {})) 28 | else: 29 | return lambda x: x.get(path, None) 30 | 31 | 32 | def infer_flattened_field_names(schema, field_sep='.'): 33 | fields = [] 34 | 35 | for key, value in schema.get('properties', {}).items(): 36 | val_type = value.get('type') 37 | if val_type == 'object': 38 | for subfield in infer_flattened_field_names(value): 39 | full_name = '{prefix}{fsep}{extension}'.format( 40 | prefix=key, fsep=field_sep, extension=subfield.name) 41 | fields.append(Field(full_name, create_getter(full_name), subfield.schema)) 42 | else: 43 | fields.append(Field(key, create_getter(key), value)) 44 | 45 | return sorted(fields) 46 | 47 | 48 | def extract_key_values(array_value, separators=(';', ',', ':'), **kwargs): 49 | """Serialize array of objects with simple key-values 50 | """ 51 | items_sep, fields_sep, keys_sep = separators 52 | return items_sep.join(fields_sep.join(keys_sep.join(x) for x in sorted(it.items())) 53 | for it in array_value) 54 | 55 | 56 | def extract_first(array_value, **kwargs): 57 | if array_value: 58 | return array_value[0] 59 | 60 | 61 | def join_values(array_value, separator=',', **kwargs): 62 | return separator.join(str(x) for x in array_value) 63 | 64 | 65 | class Flatson(object): 66 | """This class implements flattening of JSON objects 67 | """ 68 | _default_serialization_methods = { 69 | 'extract_key_values': extract_key_values, 70 | 'extract_first': extract_first, 71 | 'join_values': join_values, 72 | } 73 | 74 | def __init__(self, schema, field_sep='.'): 75 | self.schema = schema 76 | self.field_sep = field_sep 77 | self.fields = self._build_fields() 78 | self._serialization_methods = dict(self._default_serialization_methods) 79 | 80 | @property 81 | def fieldnames(self): 82 | """Field names inferred from schema 83 | """ 84 | return [f.name for f in self.fields] 85 | 86 | def _build_fields(self): 87 | if self.schema.get('type') != 'object': 88 | raise ValueError("Schema should be of type object") 89 | return infer_flattened_field_names(self.schema, 90 | field_sep=self.field_sep) 91 | 92 | @classmethod 93 | def from_schemafile(cls, schemafile): 94 | """Create a Flatson instance from a schemafile 95 | """ 96 | with open(schemafile) as f: 97 | return cls(json.load(f)) 98 | 99 | def _serialize_array_value(self, field, value): 100 | options = dict(field.serialization_options) 101 | 102 | if options: 103 | try: 104 | method = options.pop('method') 105 | except KeyError: 106 | raise ValueError( 107 | 'Missing method in serialization options for field %s' % field.name) 108 | 109 | try: 110 | serialize = self._serialization_methods[method] 111 | except KeyError: 112 | raise ValueError('Unknown serialization method: {method}'.format(**options)) 113 | return serialize(value, **options) 114 | 115 | return json.dumps(value, separators=(',', ':'), sort_keys=True) 116 | 117 | def _serialize(self, field, obj): 118 | value = field.getter(obj) 119 | if field.is_array(): 120 | return self._serialize_array_value(field, value) 121 | return value 122 | 123 | def register_serialization_method(self, name, serialize_func): 124 | """Register a custom serialization method that can be 125 | used via schema configuration 126 | """ 127 | if name in self._default_serialization_methods: 128 | raise ValueError("Can't replace original %s serialization method") 129 | self._serialization_methods[name] = serialize_func 130 | 131 | def flatten(self, obj): 132 | """Return a list with the field values 133 | """ 134 | return [self._serialize(f, obj) for f in self.fields] 135 | 136 | def flatten_dict(self, obj): 137 | """Return an OrderedDict dict preserving order of keys in fieldnames 138 | """ 139 | return OrderedDict(zip(self.fieldnames, self.flatten(obj))) 140 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/flatson/dcbcea32ad6d4df1df85fff8366bce40438d469a/requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.1.0 3 | commit = True 4 | tag = True 5 | tag_name = v{new_version} 6 | 7 | [wheel] 8 | universal = 1 9 | 10 | [bumpversion:file:setup.py] 11 | 12 | [bumpversion:file:flatson/__init__.py] 13 | 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | 5 | try: 6 | from setuptools import setup 7 | except ImportError: 8 | from distutils.core import setup 9 | 10 | 11 | with open('README.rst') as readme_file: 12 | readme = readme_file.read() 13 | 14 | with open('HISTORY.rst') as history_file: 15 | history = history_file.read().replace('.. :changelog:', '') 16 | 17 | requirements = [ 18 | # TODO: put package requirements here 19 | ] 20 | 21 | setup( 22 | name='flatson', 23 | version='0.1.0', 24 | description="Tool to flatten stream of JSON-like objects, configured via schema", # NOQA 25 | long_description=readme + '\n\n' + history, 26 | author="ScrapingHub", 27 | author_email='info@scrapinghub.com', 28 | url='https://github.com/scrapinghub/flatson', 29 | packages=[ 30 | 'flatson', 31 | ], 32 | package_dir={'flatson': 33 | 'flatson'}, 34 | include_package_data=True, 35 | install_requires=requirements, 36 | license="BSD", 37 | zip_safe=False, 38 | keywords='flatson', 39 | classifiers=[ 40 | 'Development Status :: 2 - Pre-Alpha', 41 | 'Intended Audience :: Developers', 42 | 'License :: OSI Approved :: BSD License', 43 | 'Natural Language :: English', 44 | "Programming Language :: Python :: 2", 45 | 'Programming Language :: Python :: 2.6', 46 | 'Programming Language :: Python :: 2.7', 47 | 'Programming Language :: Python :: 3', 48 | 'Programming Language :: Python :: 3.3', 49 | 'Programming Language :: Python :: 3.4', 50 | ], 51 | test_suite='tests', 52 | ) 53 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | nose==1.3.7 2 | skinfer==0.1.2 3 | -------------------------------------------------------------------------------- /tests/test_flatson.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import unicode_literals, print_function, absolute_import 4 | 5 | """ 6 | test_flatson 7 | ---------------------------------- 8 | 9 | Tests for `flatson` module. 10 | """ 11 | 12 | import json 13 | import os 14 | import skinfer 15 | import unittest 16 | 17 | from flatson import Flatson 18 | import tempfile 19 | 20 | 21 | EMPTY_SCHEMA = skinfer.generate_schema({}) 22 | 23 | SIMPLE_SCHEMA = skinfer.generate_schema({'a_prop': ''}) 24 | 25 | LIST_SCHEMA = skinfer.generate_schema([]) 26 | 27 | SAMPLE_WITH_LIST_OF_OBJECTS = { 28 | 'first': 'hello', 29 | 'list': [{'key1': 'value1', 'key2': 'value2'}, {'key1': 'value3', 'key2': 'value4'}] 30 | } 31 | 32 | SAMPLE_WITH_LIST_OF_TUPLES = { 33 | 'first': 'hello', 34 | 'list': [['value1', 'value2'], ['value3', 'value4']] 35 | } 36 | 37 | 38 | class TestFlatson(unittest.TestCase): 39 | def test_create(self): 40 | f = Flatson(schema=SIMPLE_SCHEMA) 41 | assert f.schema == SIMPLE_SCHEMA 42 | 43 | def test_create_from_schemafile(self): 44 | _, fname = tempfile.mkstemp() 45 | try: 46 | with open(fname, 'w') as f: 47 | json.dump(SIMPLE_SCHEMA, f) 48 | 49 | obj = Flatson.from_schemafile(fname) 50 | self.assertEquals(SIMPLE_SCHEMA, obj.schema) 51 | finally: 52 | os.remove(fname) 53 | 54 | def test_no_support_for_list_objects(self): 55 | with self.assertRaises(ValueError): 56 | Flatson(schema=LIST_SCHEMA) 57 | 58 | def test_when_no_declared_properties_flatten_empty_list(self): 59 | f = Flatson(schema=EMPTY_SCHEMA) 60 | result = f.flatten({'a_prop': 'a_value'}) 61 | self.assertEquals([], result) 62 | 63 | def test_convert_simple_objects(self): 64 | f = Flatson(schema=SIMPLE_SCHEMA) 65 | self.assertEquals(['a_prop'], f.fieldnames) 66 | self.assertEquals(['a_value'], f.flatten({'a_prop': 'a_value'})) 67 | self.assertEquals([None], f.flatten({})) 68 | 69 | def test_convert_nested_objects(self): 70 | contain_nested_object = { 71 | 'first': 'hello', 72 | 'second': { 73 | 'one': 1, 74 | 'two': 2, 75 | } 76 | } 77 | schema = skinfer.generate_schema(contain_nested_object) 78 | f = Flatson(schema=schema) 79 | self.assertEquals(['first', 'second.one', 'second.two'], f.fieldnames) 80 | self.assertEquals(['hello', 1, 2], f.flatten(contain_nested_object)) 81 | 82 | def test_flatten_dict(self): 83 | contain_nested_object = { 84 | 'first': 'hello', 85 | 'second': { 86 | 'one': 1, 87 | 'two': 2, 88 | } 89 | } 90 | schema = skinfer.generate_schema(contain_nested_object) 91 | f = Flatson(schema=schema) 92 | expected = {'first': 'hello', 'second.one': 1, 'second.two': 2} 93 | self.assertEquals(expected, f.flatten_dict(contain_nested_object)) 94 | 95 | def test_convert_deep_nested_objects(self): 96 | contain_nested_object = { 97 | 'first': 'hello', 98 | 'second': { 99 | 'one': { 100 | 'a': 1, 101 | 'b': 2, 102 | }, 103 | 'two': { 104 | 'a': 3, 105 | 'b': 4, 106 | }, 107 | } 108 | } 109 | schema = skinfer.generate_schema(contain_nested_object) 110 | f = Flatson(schema=schema) 111 | self.assertEquals(['first', 'second.one.a', 'second.one.b', 'second.two.a', 'second.two.b'], f.fieldnames) 112 | self.assertEquals(['hello', 1, 2, 3, 4], f.flatten(contain_nested_object)) 113 | 114 | def test_convert_object_with_simple_list_with_default_serialization(self): 115 | contain_list = { 116 | 'first': 'hello', 117 | 'list': [1, 2, 3, 4], 118 | 'list2': ['one', 'two'], 119 | } 120 | schema = skinfer.generate_schema(contain_list) 121 | 122 | f = Flatson(schema=schema) 123 | self.assertEquals(['first', 'list', 'list2'], f.fieldnames) 124 | self.assertEquals(['hello', '[1,2,3,4]', '["one","two"]'], f.flatten(contain_list)) 125 | 126 | def test_convert_object_with_nested_simple_list_with_default_serialization(self): 127 | contain_list = { 128 | 'first': 'hello', 129 | 'second': { 130 | 'list1': [1, 2, 3, 4], 131 | 'word': 'world', 132 | 133 | }, 134 | } 135 | schema = skinfer.generate_schema(contain_list) 136 | f = Flatson(schema=schema) 137 | self.assertEquals(['first', 'second.list1', 'second.word'], f.fieldnames) 138 | self.assertEquals(['hello', '[1,2,3,4]', 'world'], f.flatten(contain_list)) 139 | 140 | def test_convert_object_with_simple_list_with_join_serialization(self): 141 | # given: 142 | contain_list = { 143 | 'first': 'hello', 144 | 'list': [1, 2, 3, 4], 145 | 'list2': ['one', 'two'], 146 | } 147 | schema = skinfer.generate_schema(contain_list) 148 | serialize_options = dict(method='join_values') 149 | schema['properties']['list']['flatson_serialize'] = serialize_options 150 | 151 | # when: 152 | f = Flatson(schema=schema) 153 | 154 | # then: 155 | self.assertEquals(['first', 'list', 'list2'], f.fieldnames) 156 | self.assertEquals(['hello', '1,2,3,4', '["one","two"]'], f.flatten(contain_list)) 157 | 158 | # and when: 159 | schema['properties']['list']['flatson_serialize']['separator'] = '+' 160 | f = Flatson(schema=schema) 161 | 162 | # then: 163 | self.assertEquals(['hello', '1+2+3+4', '["one","two"]'], f.flatten(contain_list)) 164 | 165 | def test_lists_with_objects_with_default_serialization(self): 166 | # given: 167 | schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS) 168 | f = Flatson(schema=schema) 169 | 170 | # when: 171 | result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS) 172 | 173 | # then: 174 | expected = '[{"key1":"value1","key2":"value2"},{"key1":"value3","key2":"value4"}]' 175 | self.assertEquals(['first', 'list'], f.fieldnames) 176 | self.assertEquals(['hello', expected], result) 177 | 178 | def test_array_serialization_with_extract_key_values(self): 179 | # given: 180 | schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS) 181 | serialize_options = dict(method='extract_key_values') 182 | 183 | # when: 184 | schema['properties']['list']['flatson_serialize'] = serialize_options 185 | f = Flatson(schema=schema) 186 | result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS) 187 | 188 | # then: 189 | expected = 'key1:value1,key2:value2;key1:value3,key2:value4' 190 | self.assertEquals(['first', 'list'], f.fieldnames) 191 | self.assertEquals(['hello', expected], result) 192 | 193 | def test_array_serialization_with_extract_key_values_custom_separators(self): 194 | # given: 195 | schema = skinfer.generate_schema(SAMPLE_WITH_LIST_OF_OBJECTS) 196 | serialize_options = dict(method='extract_key_values', 197 | separators=('|', '-', '=')) 198 | 199 | # when: 200 | schema['properties']['list']['flatson_serialize'] = serialize_options 201 | f = Flatson(schema=schema) 202 | result = f.flatten(SAMPLE_WITH_LIST_OF_OBJECTS) 203 | 204 | # then: 205 | expected = 'key1=value1-key2=value2|key1=value3-key2=value4' 206 | self.assertEquals(['first', 'list'], f.fieldnames) 207 | self.assertEquals(['hello', expected], result) 208 | 209 | def test_array_serialization_with_extract_first(self): 210 | # given: 211 | sample = {'first': 'hello', 'list': ['one', 'two']} 212 | schema = skinfer.generate_schema(sample) 213 | serialize_options = dict(method='extract_first') 214 | schema['properties']['list']['flatson_serialize'] = serialize_options 215 | 216 | # when: 217 | f = Flatson(schema=schema) 218 | result = f.flatten(sample) 219 | 220 | # then: 221 | self.assertEquals(['first', 'list'], f.fieldnames) 222 | self.assertEquals(['hello', 'one'], result) 223 | 224 | # and when: 225 | sample2 = {'first': 'hello', 'list': []} 226 | result = f.flatten(sample2) 227 | 228 | # then: 229 | self.assertEquals(['first', 'list'], f.fieldnames) 230 | self.assertEquals(['hello', None], result) 231 | 232 | def test_register_custom_serialization_method(self): 233 | # given: 234 | sample = {'first': 'hello', 'list': ['one', 'two']} 235 | schema = skinfer.generate_schema(sample) 236 | serialize_options = dict(method='always_one') 237 | schema['properties']['list']['flatson_serialize'] = serialize_options 238 | 239 | # when: 240 | f = Flatson(schema=schema) 241 | f.register_serialization_method('always_one', lambda _v, **kw: '1') 242 | result = f.flatten(sample) 243 | 244 | # then: 245 | self.assertEquals(['first', 'list'], f.fieldnames) 246 | self.assertEquals(['hello', '1'], result) 247 | 248 | def test_disallow_overwriting_official_serialization_methods(self): 249 | # given: 250 | sample = {'first': 'hello', 'list': ['one', 'two']} 251 | schema = skinfer.generate_schema(sample) 252 | serialize_options = dict(method='always_one') 253 | schema['properties']['list']['flatson_serialize'] = serialize_options 254 | 255 | # when: 256 | f = Flatson(schema=schema) 257 | with self.assertRaises(ValueError): 258 | f.register_serialization_method('extract_first', lambda _v, **kw: _v[2]) 259 | 260 | 261 | if __name__ == '__main__': 262 | unittest.main() 263 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | ; envlist = py26, py27, py33, py34 3 | envlist = py27 4 | 5 | [testenv] 6 | setenv = 7 | PYTHONPATH = {toxinidir}:{toxinidir}/flatson 8 | commands = nosetests -s -v 9 | deps = 10 | -r{toxinidir}/requirements.txt 11 | -r{toxinidir}/tests/requirements.txt 12 | --------------------------------------------------------------------------------