├── .coveragerc ├── .github └── FUNDING.yml ├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.rst ├── docs ├── Makefile ├── changelog.rst ├── complex.rst ├── conf.py ├── index.rst ├── parsers.rst ├── requirements.txt └── usage.rst ├── pyanyapi ├── __init__.py ├── _compat.py ├── decorators.py ├── exceptions.py ├── helpers.py ├── interfaces.py └── parsers.py ├── setup.cfg ├── setup.py ├── tests ├── __init__.py ├── _compat.py ├── conftest.py ├── test_interfaces.py ├── test_parsers.py └── test_strip.py └── tox.ini /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = true 3 | 4 | [report] 5 | show_missing = true 6 | precision = 2 7 | exclude_lines = raise NotImplementedError -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: Stranger6667 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.eggs 9 | *.egg-info 10 | dist 11 | build 12 | eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | develop-eggs 18 | .installed.cfg 19 | lib 20 | lib64 21 | venv*/ 22 | pyvenv*/ 23 | 24 | # Installer logs 25 | pip-log.txt 26 | 27 | # Unit test / coverage reports 28 | .coverage 29 | coverage.xml 30 | junit.xml 31 | .tox 32 | .coverage.* 33 | htmlcov 34 | 35 | # Translations 36 | *.mo 37 | 38 | .idea 39 | 40 | .DS_Store 41 | *~ 42 | .*.sw[po] 43 | .build 44 | .ve 45 | .env 46 | .bootstrap 47 | *.bak 48 | docs/_build 49 | .cache -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.5 4 | matrix: 5 | fast_finish: true 6 | include: 7 | - python: 3.5 8 | env: TOX_ENV=py35 9 | - python: 3.4 10 | env: TOX_ENV=py34 11 | - python: 3.3 12 | env: TOX_ENV=py33 13 | - python: 3.2 14 | env: TOX_ENV=py32 15 | - python: 2.7 16 | env: TOX_ENV=py27 17 | - python: 2.6 18 | env: TOX_ENV=py26 19 | - python: pypy 20 | env: TOX_ENV=pypy 21 | - python: pypy3 22 | env: TOX_ENV=pypy3 23 | - python: 3.5 24 | env: $JYTHON=true 25 | install: 26 | - if [ $TOX_ENV = "py32" ]; then travis_retry pip install "virtualenv<14.0.0" "tox<1.8.0"; fi 27 | - if [ $TOX_ENV = "pypy3" ]; then travis_retry pip install "virtualenv<14.0.0" "tox<1.8.0"; fi 28 | - if [ -z "$JYTHON" ]; then pip install codecov; fi 29 | - if [ "$TOX_ENV" ]; then travis_retry pip install "virtualenv<14.0.0" tox; fi 30 | before_install: 31 | - export JYTHON_URL='http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.0/jython-installer-2.7.0.jar' 32 | - if [ "$JYTHON" ]; then wget $JYTHON_URL -O jython_installer.jar; java -jar jython_installer.jar -s -d $HOME/jython; export PATH=$HOME/jython/bin:$PATH; fi 33 | 34 | script: 35 | - if [ "$JYTHON" ]; then travis_retry jython setup.py test; fi 36 | - if [ "$TOX_ENV" ]; then tox -e $TOX_ENV; fi 37 | after_success: 38 | - codecov 39 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at dadygalo@gmail.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Dmitry Dygalo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include CHANGELOG.md 2 | include README.rst 3 | 4 | recursive-include tests * 5 | recursive-exclude * __pycache__ 6 | recursive-exclude * *.py[co] -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | help: 2 | @echo "clean - remove all build, test, coverage and Python artifacts" 3 | @echo "clean-build - remove build artifacts" 4 | @echo "clean-pyc - remove Python file artifacts" 5 | @echo "clean-test - remove test and coverage artifacts" 6 | @echo "test - run tests quickly with the default Python" 7 | @echo "test-all - run tests on every Python version with tox" 8 | @echo "coverage - check code coverage quickly with the default Python" 9 | @echo "install - install the package to the active Python's site-packages" 10 | 11 | clean: clean-test clean-build clean-pyc 12 | 13 | clean-build: 14 | rm -fr build/ 15 | rm -fr dist/ 16 | rm -fr .eggs/ 17 | find . -name '*.egg-info' -exec rm -fr {} + 18 | find . -name '*.egg' -exec rm -fr {} + 19 | 20 | clean-pyc: 21 | find . -name '*.pyc' -exec rm -f {} + 22 | find . -name '*.pyo' -exec rm -f {} + 23 | find . -name '*~' -exec rm -f {} + 24 | find . -name '__pycache__' -exec rm -fr {} + 25 | 26 | clean-test: 27 | rm -fr .cache 28 | rm -fr .tox/ 29 | rm -f .coverage 30 | rm -fr htmlcov/ 31 | 32 | test: 33 | python setup.py test --pytest-args="--cov=pyanyapi --cov-report xml" 34 | 35 | test-all: 36 | tox 37 | 38 | coverage: 39 | coverage run --source pyanyapi setup.py test 40 | coverage report -m 41 | coverage html 42 | open htmlcov/index.html 43 | 44 | install: clean 45 | python setup.py install -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PyAnyAPI 2 | ======== 3 | 4 | Tools for convenient interface creation over various types of data in 5 | a declarative way. 6 | 7 | .. image:: https://travis-ci.org/Stranger6667/pyanyapi.svg?branch=master 8 | :target: https://travis-ci.org/Stranger6667/pyanyapi 9 | :alt: Build Status 10 | 11 | .. image:: https://codecov.io/github/Stranger6667/pyanyapi/coverage.svg?branch=master 12 | :target: https://codecov.io/github/Stranger6667/pyanyapi?branch=master 13 | :alt: Coverage Status 14 | 15 | .. image:: https://readthedocs.org/projects/pyanyapi/badge/?version=latest 16 | :target: http://pyanyapi.readthedocs.io/en/latest/?badge=latest 17 | :alt: Documentation Status 18 | 19 | Installation 20 | ------------ 21 | 22 | The current stable release: 23 | 24 | :: 25 | 26 | pip install pyanyapi 27 | 28 | or: 29 | 30 | :: 31 | 32 | easy_install pyanyapi 33 | 34 | or from source: 35 | 36 | :: 37 | 38 | $ sudo python setup.py install 39 | 40 | Usage 41 | ----- 42 | 43 | The library provides an ability to create API over various content. 44 | Currently there are bundled tools to work with HTML, XML, CSV, JSON and YAML. 45 | Initially it was created to work with ``requests`` library. 46 | 47 | Basic parsers can be declared in the following way: 48 | 49 | .. code-block:: python 50 | 51 | from pyanyapi.parsers import HTMLParser 52 | 53 | 54 | class SimpleParser(HTMLParser): 55 | settings = {'header': 'string(.//h1/text())'} 56 | 57 | 58 | >>> api = SimpleParser().parse('

Value

') 59 | >>> api.header 60 | Value 61 | 62 | Documentation 63 | ------------- 64 | 65 | You can view documentation online at: 66 | 67 | - https://pyanyapi.readthedocs.io 68 | 69 | Or you can look at the docs/ directory in the repository. 70 | 71 | Python support 72 | -------------- 73 | 74 | PyAnyAPI supports Python 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, PyPy and partially PyPy3 and Jython. 75 | Unfortunately ``lxml`` doesn't support PyPy3 and Jython, so HTML & XML parsing is not supported on PyPy3 and Jython. 76 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help 18 | help: 19 | @echo "Please use \`make ' where is one of" 20 | @echo " html to make standalone HTML files" 21 | @echo " dirhtml to make HTML files named index.html in directories" 22 | @echo " singlehtml to make a single large HTML file" 23 | @echo " pickle to make pickle files" 24 | @echo " json to make JSON files" 25 | @echo " htmlhelp to make HTML files and a HTML help project" 26 | @echo " qthelp to make HTML files and a qthelp project" 27 | @echo " applehelp to make an Apple Help Book" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " epub3 to make an epub3" 31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 32 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 34 | @echo " text to make text files" 35 | @echo " man to make manual pages" 36 | @echo " texinfo to make Texinfo files" 37 | @echo " info to make Texinfo files and run them through makeinfo" 38 | @echo " gettext to make PO message catalogs" 39 | @echo " changes to make an overview of all changed/added/deprecated items" 40 | @echo " xml to make Docutils-native XML files" 41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 42 | @echo " linkcheck to check all external links for integrity" 43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 44 | @echo " coverage to run coverage check of the documentation (if enabled)" 45 | @echo " dummy to check syntax errors of document sources" 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -rf $(BUILDDIR)/* 50 | 51 | .PHONY: html 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | .PHONY: dirhtml 58 | dirhtml: 59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 60 | @echo 61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 62 | 63 | .PHONY: singlehtml 64 | singlehtml: 65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 66 | @echo 67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 68 | 69 | .PHONY: pickle 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | .PHONY: json 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | .PHONY: htmlhelp 82 | htmlhelp: 83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 84 | @echo 85 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 86 | ".hhp project file in $(BUILDDIR)/htmlhelp." 87 | 88 | .PHONY: qthelp 89 | qthelp: 90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 91 | @echo 92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyAnyAPI.qhcp" 95 | @echo "To view the help file:" 96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyAnyAPI.qhc" 97 | 98 | .PHONY: applehelp 99 | applehelp: 100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 101 | @echo 102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 103 | @echo "N.B. You won't be able to view it unless you put it in" \ 104 | "~/Library/Documentation/Help or install it in your application" \ 105 | "bundle." 106 | 107 | .PHONY: devhelp 108 | devhelp: 109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 110 | @echo 111 | @echo "Build finished." 112 | @echo "To view the help file:" 113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/PyAnyAPI" 114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyAnyAPI" 115 | @echo "# devhelp" 116 | 117 | .PHONY: epub 118 | epub: 119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 120 | @echo 121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 122 | 123 | .PHONY: epub3 124 | epub3: 125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 126 | @echo 127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." 128 | 129 | .PHONY: latex 130 | latex: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo 133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 135 | "(use \`make latexpdf' here to do that automatically)." 136 | 137 | .PHONY: latexpdf 138 | latexpdf: 139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 140 | @echo "Running LaTeX files through pdflatex..." 141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 143 | 144 | .PHONY: latexpdfja 145 | latexpdfja: 146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 147 | @echo "Running LaTeX files through platex and dvipdfmx..." 148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 150 | 151 | .PHONY: text 152 | text: 153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 154 | @echo 155 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 156 | 157 | .PHONY: man 158 | man: 159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 160 | @echo 161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 162 | 163 | .PHONY: texinfo 164 | texinfo: 165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 166 | @echo 167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 168 | @echo "Run \`make' in that directory to run these through makeinfo" \ 169 | "(use \`make info' here to do that automatically)." 170 | 171 | .PHONY: info 172 | info: 173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 174 | @echo "Running Texinfo files through makeinfo..." 175 | make -C $(BUILDDIR)/texinfo info 176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 177 | 178 | .PHONY: gettext 179 | gettext: 180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 181 | @echo 182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 183 | 184 | .PHONY: changes 185 | changes: 186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 187 | @echo 188 | @echo "The overview file is in $(BUILDDIR)/changes." 189 | 190 | .PHONY: linkcheck 191 | linkcheck: 192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 193 | @echo 194 | @echo "Link check complete; look for any errors in the above output " \ 195 | "or in $(BUILDDIR)/linkcheck/output.txt." 196 | 197 | .PHONY: doctest 198 | doctest: 199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 200 | @echo "Testing of doctests in the sources finished, look at the " \ 201 | "results in $(BUILDDIR)/doctest/output.txt." 202 | 203 | .PHONY: coverage 204 | coverage: 205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 206 | @echo "Testing of coverage in the sources finished, look at the " \ 207 | "results in $(BUILDDIR)/coverage/python.txt." 208 | 209 | .PHONY: xml 210 | xml: 211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 212 | @echo 213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 214 | 215 | .PHONY: pseudoxml 216 | pseudoxml: 217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 218 | @echo 219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 220 | 221 | .PHONY: dummy 222 | dummy: 223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy 224 | @echo 225 | @echo "Build finished. Dummy builder generates no files." 226 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. _changelog: 2 | 3 | Changelog 4 | ========= 5 | 6 | 0.6.0 - 09.08.2016 7 | ------------------ 8 | 9 | * IndexOf parser. 10 | 11 | 0.5.8 - 14.07.2016 12 | ------------------ 13 | 14 | * Fixed XML content parsing for bytes input. 15 | 16 | 0.5.7 - 28.01.2016 17 | ------------------ 18 | 19 | * Added ``parse_all`` call on subparsers (`#37`_). 20 | 21 | 0.5.6 - 24.11.2015 22 | ------------------ 23 | 24 | * Fixed ``super`` call in exception. 25 | 26 | 0.5.5 - 23.11.2015 27 | ------------------ 28 | 29 | * Add content to exceptions in case of parsing errors (`#35`_). 30 | 31 | 0.5.4 - 15.11.2015 32 | ------------------ 33 | 34 | * Fixed ``lxml`` installation on PyPy (`#34`_). 35 | * Add support for subparsers (`#32`_). 36 | 37 | 0.5.3 - 30.10.2015 38 | ------------------ 39 | 40 | * Disable stripping in XMLObjectifyParser on PyPy (`#30`_). 41 | 42 | 0.5.2 - 20.10.2015 43 | ------------------ 44 | 45 | * Fix incorrect stripping in XMLObjectifyParser (`#29`_). 46 | 47 | 0.5.1 - 20.10.2015 48 | ------------------ 49 | 50 | * Ability to override ``strip`` attribute at class level (`#27`_). 51 | * Fix ``strip`` in XMLObjectifyParser (`#28`_). 52 | 53 | 0.5 - 05.10.2015 54 | ---------------- 55 | 56 | * Add ``parse_all`` to parse all settings (`#20`_). 57 | * Settings for regular expressions (`#19`_). 58 | * Add ``strip`` option to strip trailing whitespaces (`#14`_). 59 | * Add CSVParser (`#11`_). 60 | 61 | 0.4 - 29.09.2015 62 | ---------------- 63 | 64 | * Add YAMLParser (`#5`_). 65 | * Add AJAXParser (`#9`_). 66 | * ``parse`` calls memoization (`#18`_). 67 | 68 | 0.3 - 24.09.2015 69 | ---------------- 70 | 71 | * Add partial support for PyPy3 (`#7`_). 72 | * Add partial support for Jython (`#6`_). 73 | * Add ujson as dependency where it is possible (`#4`_). 74 | * Lxml will not be installed where it is not supported (`#3`_). 75 | 76 | 0.2.1 - 23.09.2015 77 | ------------------ 78 | 79 | * Remove encoding declaration for XMLObjectifyParser 80 | 81 | 0.2 - 23.09.2015 82 | ---------------- 83 | 84 | * Add ``parse`` methods for JSONInterface & RegExpInterface (`#8`_). 85 | * Add universal wheel config (`#2`_). 86 | 87 | 0.1 - 22.09.2015 88 | ---------------- 89 | 90 | * First release. 91 | 92 | .. _#37: https://github.com/Stranger6667/pyanyapi/issues/37 93 | .. _#35: https://github.com/Stranger6667/pyanyapi/issues/35 94 | .. _#34: https://github.com/Stranger6667/pyanyapi/issues/34 95 | .. _#32: https://github.com/Stranger6667/pyanyapi/issues/32 96 | .. _#30: https://github.com/Stranger6667/pyanyapi/issues/30 97 | .. _#29: https://github.com/Stranger6667/pyanyapi/issues/29 98 | .. _#28: https://github.com/Stranger6667/pyanyapi/issues/28 99 | .. _#27: https://github.com/Stranger6667/pyanyapi/issues/27 100 | .. _#20: https://github.com/Stranger6667/pyanyapi/issues/20 101 | .. _#19: https://github.com/Stranger6667/pyanyapi/issues/19 102 | .. _#18: https://github.com/Stranger6667/pyanyapi/issues/18 103 | .. _#14: https://github.com/Stranger6667/pyanyapi/issues/14 104 | .. _#11: https://github.com/Stranger6667/pyanyapi/issues/11 105 | .. _#9: https://github.com/Stranger6667/pyanyapi/issues/9 106 | .. _#8: https://github.com/Stranger6667/pyanyapi/issues/8 107 | .. _#7: https://github.com/Stranger6667/pyanyapi/issues/7 108 | .. _#6: https://github.com/Stranger6667/pyanyapi/issues/6 109 | .. _#5: https://github.com/Stranger6667/pyanyapi/issues/5 110 | .. _#4: https://github.com/Stranger6667/pyanyapi/issues/4 111 | .. _#3: https://github.com/Stranger6667/pyanyapi/issues/3 112 | .. _#2: https://github.com/Stranger6667/pyanyapi/issues/2 -------------------------------------------------------------------------------- /docs/complex.rst: -------------------------------------------------------------------------------- 1 | .. _complex: 2 | 3 | Complex content parsing 4 | ======================= 5 | 6 | Combined parsers 7 | ~~~~~~~~~~~~~~~~ 8 | 9 | In situations, when particular content type is unknown before parsing, 10 | you can create combined parser, which allows you to use multiply 11 | different parsers transparently. E.g. some server usually returns JSON, 12 | but in cases of server errors it returns HTML pages with some text. 13 | Then: 14 | 15 | .. code-block:: python 16 | 17 | from pyanyapi.parsers import CombinedParser, HTMLParser, JSONParser 18 | 19 | 20 | class Parser(CombinedParser): 21 | parsers = [ 22 | JSONParser({'test': 'test'}), 23 | HTMLParser({'error': 'string(//span)'}) 24 | ] 25 | 26 | >>> parser = Parser() 27 | >>> parser.parse('{"test": "Text"}').test 28 | Text 29 | >>> parser.parse('123').error 30 | 123 31 | 32 | Another example 33 | ~~~~~~~~~~~~~~~ 34 | 35 | Sometimes different content types can be combined inside single string. 36 | Often with AJAX requests. 37 | 38 | .. code:: javascript 39 | 40 | {"content": "Text"} 41 | 42 | You can work with such data in the following way: 43 | 44 | .. code-block:: python 45 | 46 | from pyanyapi.decorators import interface_property 47 | from pyanyapi.parsers import HTMLParser, JSONParser 48 | 49 | 50 | inner_parser = HTMLParser({'text': 'string(.//span/text())'}) 51 | 52 | 53 | class AJAXParser(JSONParser): 54 | settings = {'content': 'content'} 55 | 56 | @interface_property 57 | def text(self): 58 | return inner_parser.parse(self.content).text 59 | 60 | 61 | >>> api = AJAXParser().parse('{"content": "Text"}') 62 | >>> api.text 63 | Text 64 | 65 | Now AJAXParser is bundled in pyanyapi, but it works differently. 66 | But anyway, this example can be helpful for building custom parsers. -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # PyAnyAPI documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Sep 27 12:18:20 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | # 20 | import os 21 | import sys 22 | 23 | sys.path.insert(0, os.path.abspath('..')) 24 | 25 | 26 | import sphinx_rtd_theme 27 | from pyanyapi import __version__ 28 | 29 | # -- General configuration ------------------------------------------------ 30 | 31 | # If your documentation needs a minimal Sphinx version, state it here. 32 | # 33 | # needs_sphinx = '1.4.6' 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 40 | 'sphinx.ext.coverage', 41 | 'sphinx.ext.viewcode', 42 | ] 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix(es) of source filenames. 47 | # You can specify multiple suffix as a list of string: 48 | # 49 | # source_suffix = ['.rst', '.md'] 50 | source_suffix = '.rst' 51 | 52 | # The encoding of source files. 53 | # 54 | # source_encoding = 'utf-8-sig' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # General information about the project. 60 | project = 'PyAnyAPI' 61 | copyright = '2016, Dmitry Dygalo' 62 | author = 'Dmitry Dygalo' 63 | 64 | # The version info for the project you're documenting, acts as replacement for 65 | # |version| and |release|, also used in various other places throughout the 66 | # built documents. 67 | # 68 | # The short X.Y version. 69 | version = release = __version__ 70 | 71 | # The full version, including alpha/beta/rc tags. 72 | 73 | # The language for content autogenerated by Sphinx. Refer to documentation 74 | # for a list of supported languages. 75 | # 76 | # This is also used if you do content translation via gettext catalogs. 77 | # Usually you set "language" from the command line for these cases. 78 | language = None 79 | 80 | # There are two options for replacing |today|: either, you set today to some 81 | # non-false value, then it is used: 82 | # 83 | # today = '' 84 | # 85 | # Else, today_fmt is used as the format for a strftime call. 86 | # 87 | # today_fmt = '%B %d, %Y' 88 | 89 | # List of patterns, relative to source directory, that match files and 90 | # directories to ignore when looking for source files. 91 | # This patterns also effect to html_static_path and html_extra_path 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 93 | 94 | # The reST default role (used for this markup: `text`) to use for all 95 | # documents. 96 | # 97 | # default_role = None 98 | 99 | # If true, '()' will be appended to :func: etc. cross-reference text. 100 | # 101 | # add_function_parentheses = True 102 | 103 | # If true, the current module name will be prepended to all description 104 | # unit titles (such as .. function::). 105 | # 106 | # add_module_names = True 107 | 108 | # If true, sectionauthor and moduleauthor directives will be shown in the 109 | # output. They are ignored by default. 110 | # 111 | # show_authors = False 112 | 113 | # The name of the Pygments (syntax highlighting) style to use. 114 | pygments_style = 'sphinx' 115 | 116 | # A list of ignored prefixes for module index sorting. 117 | # modindex_common_prefix = [] 118 | 119 | # If true, keep warnings as "system message" paragraphs in the built documents. 120 | # keep_warnings = False 121 | 122 | # If true, `todo` and `todoList` produce output, else they produce nothing. 123 | todo_include_todos = False 124 | 125 | 126 | # -- Options for HTML output ---------------------------------------------- 127 | 128 | # The theme to use for HTML and HTML Help pages. See the documentation for 129 | # a list of builtin themes. 130 | # 131 | html_theme = 'sphinx_rtd_theme' 132 | 133 | # Theme options are theme-specific and customize the look and feel of a theme 134 | # further. For a list of options available for each theme, see the 135 | # documentation. 136 | # 137 | # html_theme_options = {} 138 | 139 | # Add any paths that contain custom themes here, relative to this directory. 140 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 141 | 142 | # The name for this set of Sphinx documents. 143 | # " v documentation" by default. 144 | # 145 | # html_title = 'PyAnyAPI v0.6.0' 146 | 147 | # A shorter title for the navigation bar. Default is the same as html_title. 148 | # 149 | # html_short_title = None 150 | 151 | # The name of an image file (relative to this directory) to place at the top 152 | # of the sidebar. 153 | # 154 | # html_logo = None 155 | 156 | # The name of an image file (relative to this directory) to use as a favicon of 157 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 158 | # pixels large. 159 | # 160 | # html_favicon = None 161 | 162 | # Add any paths that contain custom static files (such as style sheets) here, 163 | # relative to this directory. They are copied after the builtin static files, 164 | # so a file named "default.css" will overwrite the builtin "default.css". 165 | html_static_path = ['_static'] 166 | 167 | # Add any extra paths that contain custom files (such as robots.txt or 168 | # .htaccess) here, relative to this directory. These files are copied 169 | # directly to the root of the documentation. 170 | # 171 | # html_extra_path = [] 172 | 173 | # If not None, a 'Last updated on:' timestamp is inserted at every page 174 | # bottom, using the given strftime format. 175 | # The empty string is equivalent to '%b %d, %Y'. 176 | # 177 | # html_last_updated_fmt = None 178 | 179 | # If true, SmartyPants will be used to convert quotes and dashes to 180 | # typographically correct entities. 181 | # 182 | # html_use_smartypants = True 183 | 184 | # Custom sidebar templates, maps document names to template names. 185 | # 186 | # html_sidebars = {} 187 | 188 | # Additional templates that should be rendered to pages, maps page names to 189 | # template names. 190 | # 191 | # html_additional_pages = {} 192 | 193 | # If false, no module index is generated. 194 | # 195 | # html_domain_indices = True 196 | 197 | # If false, no index is generated. 198 | # 199 | # html_use_index = True 200 | 201 | # If true, the index is split into individual pages for each letter. 202 | # 203 | # html_split_index = False 204 | 205 | # If true, links to the reST sources are added to the pages. 206 | # 207 | # html_show_sourcelink = True 208 | 209 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 210 | # 211 | # html_show_sphinx = True 212 | 213 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 214 | # 215 | # html_show_copyright = True 216 | 217 | # If true, an OpenSearch description file will be output, and all pages will 218 | # contain a tag referring to it. The value of this option must be the 219 | # base URL from which the finished HTML is served. 220 | # 221 | # html_use_opensearch = '' 222 | 223 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 224 | # html_file_suffix = None 225 | 226 | # Language to be used for generating the HTML full-text search index. 227 | # Sphinx supports the following languages: 228 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 229 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' 230 | # 231 | # html_search_language = 'en' 232 | 233 | # A dictionary with options for the search language support, empty by default. 234 | # 'ja' uses this config value. 235 | # 'zh' user can custom change `jieba` dictionary path. 236 | # 237 | # html_search_options = {'type': 'default'} 238 | 239 | # The name of a javascript file (relative to the configuration directory) that 240 | # implements a search results scorer. If empty, the default will be used. 241 | # 242 | # html_search_scorer = 'scorer.js' 243 | 244 | # Output file base name for HTML help builder. 245 | htmlhelp_basename = 'PyAnyAPIdoc' 246 | 247 | # -- Options for LaTeX output --------------------------------------------- 248 | 249 | latex_elements = { 250 | # The paper size ('letterpaper' or 'a4paper'). 251 | # 252 | # 'papersize': 'letterpaper', 253 | 254 | # The font size ('10pt', '11pt' or '12pt'). 255 | # 256 | # 'pointsize': '10pt', 257 | 258 | # Additional stuff for the LaTeX preamble. 259 | # 260 | # 'preamble': '', 261 | 262 | # Latex figure (float) alignment 263 | # 264 | # 'figure_align': 'htbp', 265 | } 266 | 267 | # Grouping the document tree into LaTeX files. List of tuples 268 | # (source start file, target name, title, 269 | # author, documentclass [howto, manual, or own class]). 270 | latex_documents = [ 271 | (master_doc, 'PyAnyAPI.tex', 'PyAnyAPI Documentation', 272 | 'Dmitry Dygalo', 'manual'), 273 | ] 274 | 275 | # The name of an image file (relative to this directory) to place at the top of 276 | # the title page. 277 | # 278 | # latex_logo = None 279 | 280 | # For "manual" documents, if this is true, then toplevel headings are parts, 281 | # not chapters. 282 | # 283 | # latex_use_parts = False 284 | 285 | # If true, show page references after internal links. 286 | # 287 | # latex_show_pagerefs = False 288 | 289 | # If true, show URL addresses after external links. 290 | # 291 | # latex_show_urls = False 292 | 293 | # Documents to append as an appendix to all manuals. 294 | # 295 | # latex_appendices = [] 296 | 297 | # It false, will not define \strong, \code, itleref, \crossref ... but only 298 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added 299 | # packages. 300 | # 301 | # latex_keep_old_macro_names = True 302 | 303 | # If false, no module index is generated. 304 | # 305 | # latex_domain_indices = True 306 | 307 | 308 | # -- Options for manual page output --------------------------------------- 309 | 310 | # One entry per manual page. List of tuples 311 | # (source start file, name, description, authors, manual section). 312 | man_pages = [ 313 | (master_doc, 'pyanyapi', 'PyAnyAPI Documentation', 314 | [author], 1) 315 | ] 316 | 317 | # If true, show URL addresses after external links. 318 | # 319 | # man_show_urls = False 320 | 321 | 322 | # -- Options for Texinfo output ------------------------------------------- 323 | 324 | # Grouping the document tree into Texinfo files. List of tuples 325 | # (source start file, target name, title, author, 326 | # dir menu entry, description, category) 327 | texinfo_documents = [ 328 | (master_doc, 'PyAnyAPI', 'PyAnyAPI Documentation', 329 | author, 'PyAnyAPI', 'One line description of project.', 330 | 'Miscellaneous'), 331 | ] 332 | 333 | # Documents to append as an appendix to all manuals. 334 | # 335 | # texinfo_appendices = [] 336 | 337 | # If false, no module index is generated. 338 | # 339 | # texinfo_domain_indices = True 340 | 341 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 342 | # 343 | # texinfo_show_urls = 'footnote' 344 | 345 | # If true, do not generate a @detailmenu in the "Top" node's menu. 346 | # 347 | # texinfo_no_detailmenu = False 348 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to PyAnyAPI's documentation! 2 | ==================================== 3 | 4 | Contents: 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | usage 10 | parsers 11 | complex 12 | changelog 13 | 14 | 15 | Indices and tables 16 | ================== 17 | 18 | * :ref:`genindex` 19 | * :ref:`modindex` 20 | * :ref:`search` 21 | 22 | -------------------------------------------------------------------------------- /docs/parsers.rst: -------------------------------------------------------------------------------- 1 | .. _parsers: 2 | 3 | Parsers 4 | ======= 5 | 6 | HTML & XML 7 | ~~~~~~~~~~ 8 | 9 | For HTML and XML based interfaces XPath 1.0 syntax is used for settings 10 | declaration. Unfortunately XPath 2.0 is not supported by lxml. XML is 11 | about the same as HTMLParser, but uses a different lxml parser internally. 12 | Here is an example of usage with ``requests``: 13 | 14 | .. code-block:: python 15 | 16 | >>> import requests 17 | >>> import pyanyapi 18 | >>> parser = pyanyapi.HTMLParser({'header': 'string(.//h1/text())'}) 19 | >>> response = requests.get('http://example.com') 20 | >>> api = parser.parse(response.text) 21 | >>> api.header 22 | Example Domain 23 | 24 | If you need, you can execute more XPath queries at any time you want: 25 | 26 | .. code-block:: python 27 | 28 | from pyanyapi.parsers import HTMLParser 29 | 30 | 31 | >>> parser = HTMLParser({'header': 'string(.//h1/text())'}) 32 | >>> api = parser.parse('

This is

test

') 33 | >>> api.header 34 | This is 35 | >>> api.parse('string(//p)') 36 | test 37 | 38 | XML Objectify 39 | ~~~~~~~~~~~~~ 40 | 41 | Lxml provides interesting feature - objectified interface for XML. It 42 | converts whole XML to Python object. This parser doesn't require any 43 | settings. E.g: 44 | 45 | .. code-block:: python 46 | 47 | from pyanyapi.parsers import XMLObjectifyParser 48 | 49 | 50 | >>> XMLObjectifyParser().parse('123').test 51 | 123 52 | 53 | JSON 54 | ~~~~ 55 | 56 | Settings syntax in based on PostgreSQL statements syntax. 57 | 58 | .. code-block:: python 59 | 60 | from pyanyapi.parsers import JSONParser 61 | 62 | 63 | >>> JSONParser({'id': 'container > id'}).parse('{"container":{"id":"123"}}').id 64 | 123 65 | 66 | Or you can get access to values in lists by index: 67 | 68 | .. code-block:: python 69 | 70 | from pyanyapi.parsers import JSONParser 71 | 72 | 73 | >>> JSONParser({'second': 'container > 1'}).parse('{"container":["first", "second", "third"]}').second 74 | second 75 | 76 | And executes more queries after initial parsing: 77 | 78 | .. code-block:: python 79 | 80 | from pyanyapi.parsers import JSONParser 81 | 82 | 83 | >>> api = JSONParser({'second': 'container > 1'}).parse('{"container":[],"second_container":[123]}') 84 | >>> api.parse('second_container > 0') 85 | 123 86 | 87 | YAML 88 | ~~~~ 89 | Equal to JSON parser, but works with YAML data. 90 | 91 | .. code-block:: python 92 | 93 | from pyanyapi.parsers import YAMLParser 94 | 95 | 96 | >>> YAMLParser({'test': 'container > test'}).parse('container:\n test: "123"').test 97 | 123 98 | 99 | Regular Expressions Interface 100 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 | 102 | In case, when data has wrong format or is just very complicated to be parsed 103 | with bundled tools, you can use a parser based on regular expressions. 104 | Settings are based on Python's regular expressions. It is the most powerful 105 | parser, because of its simplicity. 106 | 107 | .. code-block:: python 108 | 109 | from pyanyapi.parsers import RegExpParser 110 | 111 | 112 | >>> RegExpParser({'error_code': 'Error (\d+)'}).parse('Oh no!!! It is Error 100!!!').error_code 113 | 100 114 | 115 | And executes more queries after initial parsing: 116 | 117 | .. code-block:: python 118 | 119 | from pyanyapi.parsers import RegExpParser 120 | 121 | 122 | >>> api = RegExpParser({'digits': '\d+'}).parse('123abc') 123 | >>> api.parse('[a-z]+') 124 | abc 125 | 126 | Also, you can pass flags for regular expressions on parser initialization: 127 | 128 | .. code-block:: python 129 | 130 | from pyanyapi.parsers import RegExpParser 131 | 132 | 133 | >>> RegExpParser({'test': '\d+.\d+'}).parse('123\n234').test 134 | 123 135 | >>> RegExpParser({'test': '\d+.\d+'}, flags=re.DOTALL).parse('123\n234').test 136 | 123 137 | 234 138 | 139 | 140 | CSV Interface 141 | ~~~~~~~~~~~~~ 142 | 143 | Operates with CSV data with simple queries in format 'row_id:column_id'. 144 | 145 | .. code-block:: python 146 | 147 | from pyanyapi.parsers import CSVParser 148 | 149 | 150 | >>> CSVParser({'value': '1:2'}).parse('1,2,3\r\n4,5,6\r\n').value 151 | 6 152 | 153 | Also, you can pass custom kwargs for `csv.reader` on parser initialization: 154 | 155 | .. code-block:: python 156 | 157 | from pyanyapi.parsers import CSVParser 158 | 159 | 160 | >>> CSVParser({'value': '1:2'}, delimiter=';').parse('1;2;3\r\n4;5;6\r\n').value 161 | 6 162 | 163 | AJAX Interface 164 | ~~~~~~~~~~~~~~ 165 | 166 | AJAX is a very popular technology and often use JSON data with HTML values. Here is an example: 167 | 168 | .. code-block:: python 169 | 170 | from pyanyapi.parsers import AJAXParser 171 | 172 | 173 | >>> api = AJAXParser({'p': 'content > string(//p)'}).parse('{"content": "

Pcontent

"}') 174 | >>> api.p 175 | Pcontent 176 | 177 | It uses combination of XPath queries and PostgreSQL-based JSON lookups. 178 | Custom queries execution is also available: 179 | 180 | .. code-block:: python 181 | 182 | from pyanyapi.parsers import AJAXParser 183 | 184 | 185 | >>> api = AJAXParser().parse('{"content": "

Pcontent

123"}') 186 | >>> api.parse('content > string(//span)') 187 | 123 188 | 189 | 190 | Custom Interface 191 | ~~~~~~~~~~~~~~~~ 192 | 193 | You can easily declare your own interface. For that you should define 194 | ``execute_method`` method. And optionally ``perform_parsing``. Here is 195 | an example of naive CSVInterface, which provides an ability to get the column 196 | value by index. Also you should create a separate parser for that. 197 | 198 | .. code-block:: python 199 | 200 | from pyanyapi.interfaces import BaseInterface 201 | from pyanyapi.parsers import BaseParser 202 | 203 | 204 | class CSVInterface(BaseInterface): 205 | 206 | def perform_parsing(self): 207 | return self.content.split(',') 208 | 209 | def execute_method(self, settings): 210 | return self.parsed_content[settings] 211 | 212 | 213 | class CSVParser(BaseParser): 214 | interface_class = CSVInterface 215 | 216 | 217 | >>> CSVParser({'second': 1}).parse('1,2,3').second 218 | 2 219 | 220 | Extending interfaces 221 | ~~~~~~~~~~~~~~~~~~~~ 222 | 223 | Also content can be parsed with regular Python code. It can be done with 224 | special decorators ``interface_method`` and ``interface_property``. 225 | 226 | Custom method example: 227 | 228 | .. code-block:: python 229 | 230 | from pyanyapi.decorators import interface_method 231 | from pyanyapi.parsers import interface_method 232 | 233 | 234 | class ParserWithMethod(HTMLParser): 235 | settings = {'occupation': 'string(.//p/text())'} 236 | 237 | @interface_method 238 | def hello(self, name): 239 | return name + ' is ' + self.occupation 240 | 241 | 242 | >>> api = ParserWithMethod().parse('

programmer

') 243 | >>> api.occupation 244 | programmer 245 | 246 | >>> api.hello('John') 247 | John is programmer 248 | 249 | Custom property example: 250 | 251 | .. code-block:: python 252 | 253 | from pyanyapi.decorators import interface_property 254 | from pyanyapi.parsers import HTMLParser 255 | 256 | 257 | class ParserWithProperty(HTMLParser): 258 | settings = {'p': 'string(.//p/text())', 'h1': 'string(.//h1/text())'} 259 | 260 | @interface_property 261 | def test(self): 262 | return self.h1 + ' ' + self.p 263 | 264 | 265 | >>> api = ParserWithProperty().parse('

This is

test

') 266 | >>> api.h1 267 | This is 268 | 269 | >>> api.p 270 | test 271 | 272 | >>> api.test 273 | This is test 274 | 275 | Certainly the previous example can be done with more complex XPath 276 | expression, but in general case XPath is not enough. 277 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | .. _usage: 2 | 3 | Usage 4 | ===== 5 | 6 | The library provides an ability to create API over various content. 7 | Currently there are bundled tools to work with HTML, XML, CSV, JSON and YAML. 8 | Initially it was created to work with ``requests`` library. 9 | 10 | Basic setup 11 | ~~~~~~~~~~~ 12 | 13 | Basic parsers can be declared in the following way: 14 | 15 | .. code-block:: python 16 | 17 | from pyanyapi.parsers import HTMLParser 18 | 19 | 20 | class SimpleParser(HTMLParser): 21 | settings = {'header': 'string(.//h1/text())'} 22 | 23 | 24 | >>> api = SimpleParser().parse('

Value

') 25 | >>> api.header 26 | Value 27 | 28 | Or it can be configured in runtime: 29 | 30 | .. code-block:: python 31 | 32 | from pyanyapi.parsers import HTMLParser 33 | 34 | 35 | >>> api = HTMLParser({ 36 | 'header': 'string(.//h1/text())' 37 | }).parse('

Value

') 38 | >>> api.header 39 | Value 40 | 41 | To get all parsing results as a dict there is ``parse_all`` method. 42 | All properties (include defined with ``@interface_property`` decorator) will be returned. 43 | 44 | .. code-block:: python 45 | 46 | from pyanyapi.parsers import JSONParser 47 | 48 | >>> JSONParser({ 49 | 'first': 'container > 0', 50 | 'second': 'container > 1', 51 | 'third': 'container > 2', 52 | }).parse('{"container":["first", "second", "third"]}').parse_all() 53 | { 54 | 'first': 'first', 55 | 'second': 'second', 56 | 'third': 'third', 57 | } 58 | 59 | Complex setup 60 | ~~~~~~~~~~~~~ 61 | 62 | In some cases you may want to apply extra transformations to result 63 | list. Here comes "base-children" setup style. 64 | 65 | .. code-block:: python 66 | 67 | from pyanyapi.parsers import HTMLParser 68 | 69 | 70 | class SimpleParser(HTMLParser): 71 | settings = { 72 | 'test': { 73 | 'base': '//test', 74 | 'children': 'text()|*//text()' 75 | } 76 | } 77 | 78 | 79 | >>> api = SimpleParser().parse('123 234') 80 | >>> api.test 81 | ['123 ', ' 234'] 82 | 83 | There is another option to interact with sub-elements. Sub parsers! 84 | 85 | .. code-block:: python 86 | 87 | from pyanyapi.parsers import HTMLParser 88 | 89 | 90 | class SubParser(HTMLParser): 91 | settings = { 92 | 'href': 'string(//@href)', 93 | 'text': 'string(//text())' 94 | } 95 | 96 | 97 | class Parser(HTMLParser): 98 | settings = { 99 | 'elem': { 100 | 'base': './/a', 101 | 'parser': SubParser 102 | } 103 | } 104 | 105 | >>> api = Parser().parse("test") 106 | >>> api.elem[0].href 107 | #test 108 | >>> api.elem[0].text 109 | test 110 | >>> api.parse_all() 111 | {'elem': [{'href': '#test', 'text': 'test'}]} 112 | 113 | Also you can pass sub parsers as classes or like instances. 114 | 115 | Settings inheritance 116 | ~~~~~~~~~~~~~~~~~~~~ 117 | 118 | Settings attribute is merged from all ancestors of current parser. 119 | 120 | .. code-block:: python 121 | 122 | from pyanyapi.parsers import HTMLParser 123 | 124 | 125 | class ParentParser(HTMLParser): 126 | settings = {'parent': '//p'} 127 | 128 | 129 | class FirstChildParser(ParentParser): 130 | settings = {'parent': '//override'} 131 | 132 | 133 | class SecondChildParser(ParentParser): 134 | settings = {'child': '//h1'} 135 | 136 | 137 | >>> FirstChildParser().settings['parent'] 138 | //override 139 | 140 | >>> SecondChildParser().settings['parent'] 141 | //p 142 | 143 | >>> SecondChildParser().settings['child'] 144 | //h1 145 | 146 | >>> SecondChildParser({'child': '//more'}).settings['child'] 147 | //more 148 | 149 | Results stripping 150 | ~~~~~~~~~~~~~~~~~ 151 | 152 | Parsers can automagically strip trailing whitespaces with ``strip=True`` option. 153 | 154 | .. code-block:: python 155 | 156 | from pyanyapi.parsers import XMLParser 157 | 158 | 159 | >>> settings = {'p': 'string(//p)'} 160 | >>> XMLParser(settings).parse('

Pcontent

').p 161 | Pcontent 162 | >>> XMLParser(settings, strip=True).parse('

Pcontent

').p 163 | Pcontent 164 | -------------------------------------------------------------------------------- /pyanyapi/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Module provides tools for convenient interface creation over various types of data in a declarative way. 4 | """ 5 | 6 | 7 | __version__ = '0.6.1' 8 | -------------------------------------------------------------------------------- /pyanyapi/_compat.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | try: 5 | from lxml import etree, objectify 6 | 7 | HTMLParser = etree.HTMLParser 8 | XMLParser = etree.XMLParser 9 | except ImportError: 10 | etree = None 11 | objectify = None 12 | HTMLParser = None 13 | XMLParser = None 14 | 15 | try: 16 | import ujson as json 17 | except ImportError: 18 | import json 19 | 20 | 21 | try: 22 | string_types = (str, unicode) 23 | except NameError: 24 | string_types = (str, ) 25 | -------------------------------------------------------------------------------- /pyanyapi/decorators.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | def interface_property(method): 5 | """ 6 | Marks method to be included in parsing result as property. 7 | """ 8 | method._interface_property = True 9 | return staticmethod(method) 10 | 11 | 12 | def interface_method(method): 13 | """ 14 | Marks method to be included in parsing result. 15 | """ 16 | method._interface_method = True 17 | return staticmethod(method) 18 | -------------------------------------------------------------------------------- /pyanyapi/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | class ResponseParseError(Exception): 5 | """ 6 | Raises when data can not be parsed with specified parser. 7 | """ 8 | 9 | def __init__(self, message, content=None): 10 | super(ResponseParseError, self).__init__(message) 11 | self.content = content 12 | -------------------------------------------------------------------------------- /pyanyapi/helpers.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Functions to dynamically attach attributes to classes. 4 | Most of parsing results are cached because of immutability of input data. 5 | """ 6 | 7 | 8 | class cached_property(object): 9 | """ 10 | Copied from Django. 11 | """ 12 | def __init__(self, func): 13 | self.func = func 14 | 15 | def __get__(self, instance, type=None): 16 | res = instance.__dict__[self.func.__name__] = self.func(instance) 17 | return res 18 | 19 | 20 | def memoize(f): 21 | memo = {} 22 | 23 | def inner(key): 24 | if key not in memo: 25 | memo[key] = f(key) 26 | return memo[key] 27 | 28 | return inner 29 | 30 | 31 | def attach_attribute(target, name, attr): 32 | attr.__name__ = name 33 | attr._attached = True 34 | setattr(target, name, attr) 35 | 36 | 37 | def attach_cached_property(target, name, prop): 38 | method = cached_property(prop) 39 | attach_attribute(target, name, method) 40 | -------------------------------------------------------------------------------- /pyanyapi/interfaces.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Classes to be filled with interface declarations. 4 | """ 5 | import csv 6 | import re 7 | import sys 8 | 9 | import yaml 10 | 11 | from ._compat import json, etree, objectify, XMLParser, HTMLParser, string_types 12 | from .exceptions import ResponseParseError 13 | from .helpers import memoize 14 | 15 | 16 | DICT_LOOKUP = ' > ' 17 | 18 | 19 | def expand_results(value): 20 | if isinstance(value, list): 21 | return [item.parse_all() for item in value] 22 | return value 23 | 24 | 25 | class BaseInterface(object): 26 | """ 27 | Basic dynamically generated interface. 28 | """ 29 | content = None 30 | empty_result = None 31 | 32 | def __init__(self, content, strip=False): 33 | self.content = content 34 | self.strip = strip 35 | self.parse = memoize(self.parse) 36 | 37 | @classmethod 38 | def init_attr(cls, settings): 39 | 40 | def inner(self): 41 | return cls.execute_method(self, settings) 42 | 43 | return inner 44 | 45 | def execute_method(self, settings): 46 | raise NotImplementedError 47 | 48 | @property 49 | def parsed_content(self): 50 | if not hasattr(self, '_parsed_content'): 51 | self._parsed_content = self.perform_parsing() 52 | return self._parsed_content 53 | 54 | def perform_parsing(self): 55 | raise NotImplementedError 56 | 57 | def parse(self, query): 58 | raise NotImplementedError 59 | 60 | def parse_all(self): 61 | """ 62 | Processes all available properties and returns results as dictionary. 63 | """ 64 | return dict( 65 | (key, expand_results(getattr(self, key, self.empty_result))) 66 | for key, attr in self.__class__.__dict__.items() 67 | if hasattr(attr, '_attached') and type(attr).__name__ == 'cached_property' 68 | ) 69 | 70 | def maybe_strip(self, value): 71 | if self.strip and isinstance(value, string_types): 72 | return value.strip() 73 | return value 74 | 75 | 76 | # Uses as fallback. None - can be obtained from JSON's null, any string also can be, so unique object is a best choice 77 | EMPTY_RESULT = object() 78 | 79 | 80 | class CombinedInterface(BaseInterface): 81 | 82 | def __init__(self, parsers, *args, **kwargs): 83 | self.parsers = parsers 84 | super(CombinedInterface, self).__init__(*args, **kwargs) 85 | 86 | def __getattribute__(self, item): 87 | # Catch self.parsers and dynamically attached attributes 88 | try: 89 | return super(CombinedInterface, self).__getattribute__(item) 90 | except AttributeError: 91 | return self.walk(item) 92 | 93 | def walk(self, item): 94 | """ 95 | Recursively walks through all available parsers. 96 | """ 97 | for parser in self.parsers: 98 | try: 99 | if item not in parser.attributes: 100 | continue 101 | result = getattr(parser.parse(self.content), item, EMPTY_RESULT) 102 | # Ignore empty results in current parser 103 | if result in (EMPTY_RESULT, parser.interface_class.empty_result): 104 | continue 105 | return result 106 | except (AttributeError, ResponseParseError): 107 | pass 108 | 109 | def parse_all(self): 110 | result = super(CombinedInterface, self).parse_all() 111 | for parser in self.parsers: 112 | result.update(parser.parse_all(self.content)) 113 | return result 114 | 115 | 116 | class XPathInterface(BaseInterface): 117 | """ 118 | Uses as base class for HTML/XML-based content. 119 | Use XPath 1.0 syntax, which is compatible with LXML. 120 | Because of lack of support of XPath 2.0 some parts of settings structure 121 | is not intuitive. 122 | Settings example: 123 | 124 | { 125 | 'errors': { 126 | 'base': "//ul[@class='alerts']/div", 127 | 'children': 'text()|*//text()' 128 | } 129 | } 130 | 131 | 'children' key usually uses for modification of result of 'base' expression 132 | before concatenation. 133 | """ 134 | parser_class = HTMLParser 135 | empty_result = '' 136 | _error_message = 'HTML data can not be parsed.' 137 | 138 | def perform_parsing(self): 139 | try: 140 | return etree.fromstring(self.content, self.parser_class()) 141 | except etree.XMLSyntaxError: 142 | raise ResponseParseError(self._error_message, self.content) 143 | 144 | def execute_method(self, settings): 145 | if isinstance(settings, dict): 146 | result = self.parse(settings['base']) 147 | child_query = settings.get('children') 148 | if child_query: 149 | return [self.maybe_strip(''.join(element.xpath(child_query))) for element in result] 150 | sub_parser = settings.get('parser') 151 | if sub_parser: 152 | return [ 153 | (sub_parser() if callable(sub_parser) else sub_parser).parse(etree.tostring(element)) 154 | for element in result 155 | ] 156 | return result 157 | 158 | return self.parse(settings) 159 | 160 | def parse(self, query): 161 | return self.maybe_strip(self.parsed_content.xpath(query)) 162 | 163 | 164 | class XMLInterface(XPathInterface): 165 | parser_class = XMLParser 166 | _error_message = 'XML data can not be parsed.' 167 | 168 | 169 | class XMLObjectifyInterface(BaseInterface): 170 | """ 171 | Parse XML in the way, that its attributes can be accessed like attributes of python object: 172 | 173 | 123 174 | 175 | From it you can get: 176 | >> obj.test 177 | 123 178 | >> obj.not_test 179 | None 180 | 181 | Also this interface does not require any settings. 182 | """ 183 | _error_message = 'XML data can not be parsed.' 184 | 185 | def __init__(self, content, strip=False): 186 | assert not (strip and hasattr(sys, 'pypy_translation_info') and sys.version_info[0] == 2), \ 187 | 'Stripping is not supported on PyPy' 188 | super(XMLObjectifyInterface, self).__init__(content, strip) 189 | 190 | def perform_parsing(self): 191 | try: 192 | return objectify.fromstring(self.content) 193 | except etree.XMLSyntaxError: 194 | raise ResponseParseError(self._error_message, self.content) 195 | 196 | def __getattribute__(self, item): 197 | try: 198 | return super(XMLObjectifyInterface, self).__getattribute__(item) 199 | except AttributeError: 200 | if item == '_parsed_content': 201 | raise 202 | try: 203 | return self.maybe_strip(self.parsed_content.__getattribute__(item)) 204 | except AttributeError: 205 | return None 206 | 207 | def maybe_strip(self, value): 208 | if self.strip and isinstance(value, objectify.ObjectifiedElement): 209 | if isinstance(value, objectify.StringElement) and value.text is not None: 210 | value = value.text.strip() 211 | else: 212 | for key, inner_value in value.__dict__.items(): 213 | value[key] = self.maybe_strip(inner_value) 214 | return value 215 | 216 | 217 | class DictInterface(BaseInterface): 218 | """ 219 | Interface for python dictionaries. Based on PostgreSQL statements syntax. 220 | 221 | { 222 | 'external_id': 'container > id' 223 | } 224 | 225 | which will get "123" from {"container":{"id":"123"}} 226 | """ 227 | 228 | def get_from_dict(self, target, query): 229 | if not target: 230 | return target 231 | action_list = query.split(DICT_LOOKUP) 232 | for action in action_list: 233 | if target: 234 | action = action.strip() 235 | if isinstance(target, dict): 236 | target = target.get(action, self.empty_result) 237 | else: 238 | try: 239 | target = target[int(action)] 240 | except (IndexError, TypeError, ValueError): 241 | return self.empty_result 242 | else: 243 | return target 244 | return self.maybe_strip(target) 245 | 246 | def execute_method(self, settings): 247 | if isinstance(settings, dict): 248 | result = self.parse(settings['base']) 249 | child_query = settings.get('children') 250 | if child_query: 251 | return [ 252 | self.get_from_dict(r, child_query) or self.empty_result for r in result 253 | ] if result else self.empty_result 254 | return result 255 | 256 | return self.parse(settings) 257 | 258 | def parse(self, query): 259 | return self.get_from_dict(self.parsed_content, query) 260 | 261 | 262 | class JSONInterface(DictInterface): 263 | _error_message = 'JSON data can not be parsed.' 264 | 265 | def perform_parsing(self): 266 | try: 267 | return json.loads(self.content) 268 | except (ValueError, TypeError): 269 | raise ResponseParseError(self._error_message, self.content) 270 | 271 | 272 | class YAMLInterface(DictInterface): 273 | _error_message = 'YAML data can not be parsed.' 274 | 275 | def perform_parsing(self): 276 | try: 277 | return yaml.safe_load(self.content) 278 | except yaml.error.YAMLError: 279 | raise ResponseParseError(self._error_message, self.content) 280 | 281 | 282 | class AJAXInterface(JSONInterface): 283 | """ 284 | Allows to execute XPath, combined with dictionary-based lookups from DictInterface. 285 | 286 | { 287 | 'p': 'container > string(//p)' 288 | } 289 | 290 | which will get "p_content" from {"container":"

p_content

"} 291 | """ 292 | inner_interface_class = XPathInterface 293 | 294 | def __init__(self, *args, **kwargs): 295 | self._inner_cache = {} 296 | super(AJAXInterface, self).__init__(*args, **kwargs) 297 | 298 | def get_inner_interface(self, text, json_part): 299 | if json_part not in self._inner_cache: 300 | inner_content = super(AJAXInterface, self).get_from_dict(text, json_part) 301 | self._inner_cache[json_part] = self.inner_interface_class(inner_content, self.strip) 302 | return self._inner_cache[json_part] 303 | 304 | def get_from_dict(self, target, query): 305 | json_part, xpath_part = query.rsplit(DICT_LOOKUP, 1) 306 | inner_interface = self.get_inner_interface(target, json_part) 307 | try: 308 | return inner_interface.parse(xpath_part) 309 | except (etree.XMLSyntaxError, ValueError): 310 | return inner_interface.empty_result 311 | 312 | 313 | class RegExpInterface(BaseInterface): 314 | """ 315 | Parser based on regular expressions. It is the most powerful parser, because of 316 | its simplicity. 317 | Settings example: 318 | 319 | { 320 | "result": "^ok$", 321 | "errors": "^Error \d+$", 322 | } 323 | 324 | So, response will be like 'ok' or 'Error 100'. 325 | """ 326 | 327 | def __init__(self, content, strip=False, flags=0): 328 | self.flags = flags 329 | super(RegExpInterface, self).__init__(content, strip) 330 | 331 | def execute_method(self, settings): 332 | matches = re.findall(settings, self.content, self.flags) 333 | if matches: 334 | return self.maybe_strip(matches[0]) 335 | return self.empty_result 336 | 337 | def parse(self, query): 338 | return self.execute_method(query) 339 | 340 | 341 | class CSVInterface(BaseInterface): 342 | """ 343 | Operates with CSV data with simple queries in format 'row_id:column_id'. 344 | 345 | { 346 | "value": "1:2" 347 | } 348 | 349 | Will get 6 from "1,2,3\r\n4,5,6" 350 | """ 351 | _error_message = 'CSV data can not be parsed.' 352 | 353 | def __init__(self, content, strip=False, **reader_kwargs): 354 | self.reader_kwargs = reader_kwargs 355 | super(CSVInterface, self).__init__(content, strip) 356 | 357 | def perform_parsing(self): 358 | try: 359 | return list(csv.reader(self.content.split(), **self.reader_kwargs)) 360 | except (TypeError, AttributeError): 361 | raise ResponseParseError(self._error_message, self.content) 362 | 363 | def execute_method(self, settings): 364 | row, column = settings.split(':') 365 | try: 366 | return self.parsed_content[int(row)][int(column)] 367 | except (IndexError, TypeError): 368 | return self.empty_result 369 | 370 | def parse(self, query): 371 | return self.execute_method(query) 372 | 373 | 374 | class IndexOfInterface(BaseInterface): 375 | """ 376 | Simple interface that tries to find specified string inside another string, storing boolean values. 377 | Settings example: 378 | { 379 | "has_bar": "bar", 380 | "has_foo": "foo" 381 | } 382 | If content contains "bar" string, interface property "has_bar" will be True. 383 | """ 384 | _error_message = 'Can not perform string search.' 385 | 386 | def execute_method(self, settings): 387 | try: 388 | return str(settings) in str(self.content) 389 | except (TypeError, ValueError): 390 | raise ResponseParseError(self._error_message, self.content) 391 | 392 | def parse(self, query): 393 | return self.execute_method(query) 394 | -------------------------------------------------------------------------------- /pyanyapi/parsers.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Classes for fabrics of interfaces. 4 | Generates interfaces dynamically from given settings. 5 | """ 6 | from ._compat import etree 7 | from .interfaces import ( 8 | XPathInterface, 9 | XMLInterface, 10 | XMLObjectifyInterface, 11 | JSONInterface, 12 | YAMLInterface, 13 | AJAXInterface, 14 | RegExpInterface, 15 | CSVInterface, 16 | CombinedInterface, 17 | IndexOfInterface, 18 | ) 19 | from .helpers import attach_attribute, attach_cached_property 20 | 21 | 22 | class BaseParser(object): 23 | """ 24 | Fabric for some API-like components, which supposes to provide interface to different types of content. 25 | """ 26 | interface_class = None 27 | strip = False 28 | 29 | def __init__(self, settings=None, strip=None): 30 | if strip is not None: 31 | self.strip = strip 32 | parents_settings = self.get_parents_settings() 33 | if settings: 34 | parents_settings.update(settings) 35 | self.settings = parents_settings 36 | 37 | @property 38 | def attributes(self): 39 | extra_attributes = [] 40 | for name in dir(self): 41 | if name == 'attributes': 42 | continue 43 | attr = getattr(self, name) 44 | if hasattr(attr, '_interface_property') or hasattr(attr, '_interface_method'): 45 | extra_attributes.append(name) 46 | return list(self.settings.keys()) + extra_attributes 47 | 48 | def get_parents_settings(self): 49 | """ 50 | Gather settings from parent classes. It provides some kind of settings inheritance. 51 | """ 52 | parents_settings = {} 53 | for klass in reversed(self.__class__.mro()): 54 | parents_settings.update(getattr(klass, 'settings', {})) 55 | return parents_settings 56 | 57 | def parse(self, content=''): 58 | """ 59 | Generates new class instance with desired attributes. 60 | """ 61 | self.content = self.prepare_content(content) 62 | 63 | class Interface(self.interface_class): 64 | pass 65 | 66 | self.setup_class(Interface) 67 | 68 | init_kwargs = self.get_interface_kwargs() 69 | 70 | return Interface(**init_kwargs) 71 | 72 | def parse_all(self, content=''): 73 | return self.parse(content).parse_all() 74 | 75 | def get_interface_kwargs(self): 76 | return {'content': self.content, 'strip': self.strip} 77 | 78 | def prepare_content(self, content): 79 | """ 80 | Hook to provide way to transform content. 81 | """ 82 | return content 83 | 84 | def setup_class(self, cls): 85 | """ 86 | Attaches dynamic properties & methods. 87 | """ 88 | self.process_settings(cls) 89 | self.process_decorators(cls) 90 | 91 | def process_settings(self, cls): 92 | """ 93 | Generates methods, based on settings. 94 | """ 95 | for name, settings in self.settings.items(): 96 | attr = cls.init_attr(settings) 97 | attach_cached_property(cls, name, attr) 98 | 99 | def process_decorators(self, cls): 100 | """ 101 | Re-attach all attributes, which is decorated with 102 | @interface_property or @interface_method decorators to new class. 103 | """ 104 | for name in dir(self): 105 | attr = getattr(self, name) 106 | if getattr(attr, '_interface_property', False): 107 | attach_cached_property(cls, name, attr) 108 | elif getattr(attr, '_interface_method', False): 109 | attach_attribute(cls, name, attr) 110 | 111 | def __and__(self, other): 112 | return CombinedParser(self, other) 113 | 114 | 115 | class CombinedParser(BaseParser): 116 | """ 117 | Combines multiple parsers in one. This can also be in different types. 118 | """ 119 | interface_class = CombinedInterface 120 | 121 | def __init__(self, *parsers, **kwargs): 122 | if parsers: 123 | self.parsers = parsers 124 | super(CombinedParser, self).__init__(**kwargs) 125 | 126 | @property 127 | def attributes(self): 128 | return super(CombinedParser, self).attributes + sum([parser.attributes for parser in self.parsers], []) 129 | 130 | def get_interface_kwargs(self): 131 | kwargs = super(CombinedParser, self).get_interface_kwargs() 132 | kwargs['parsers'] = self.parsers 133 | return kwargs 134 | 135 | 136 | class LXMLParser(BaseParser): 137 | 138 | def parse(self, *args, **kwargs): 139 | assert etree, 'Using %s, but lxml is not installed' % self.__class__.__name__ 140 | return super(LXMLParser, self).parse(*args, **kwargs) 141 | 142 | 143 | class HTMLParser(LXMLParser): 144 | interface_class = XPathInterface 145 | 146 | 147 | class XMLParser(LXMLParser): 148 | interface_class = XMLInterface 149 | 150 | def prepare_content(self, content): 151 | if isinstance(content, bytes): 152 | declaration, replacement = b'encoding="UTF-8"', b'' 153 | else: 154 | declaration, replacement = 'encoding="UTF-8"', '' 155 | return content.replace(declaration, replacement).replace(declaration.lower(), replacement) 156 | 157 | 158 | class XMLObjectifyParser(XMLParser): 159 | interface_class = XMLObjectifyInterface 160 | 161 | 162 | class JSONParser(BaseParser): 163 | interface_class = JSONInterface 164 | 165 | 166 | class YAMLParser(BaseParser): 167 | interface_class = YAMLInterface 168 | 169 | 170 | class AJAXParser(LXMLParser): 171 | interface_class = AJAXInterface 172 | 173 | 174 | class RegExpParser(BaseParser): 175 | interface_class = RegExpInterface 176 | 177 | def __init__(self, settings=None, strip=None, flags=0): 178 | self.flags = flags 179 | super(RegExpParser, self).__init__(settings, strip) 180 | 181 | def get_interface_kwargs(self): 182 | kwargs = super(RegExpParser, self).get_interface_kwargs() 183 | kwargs['flags'] = self.flags 184 | return kwargs 185 | 186 | 187 | class CSVParser(BaseParser): 188 | interface_class = CSVInterface 189 | 190 | def __init__(self, settings=None, strip=None, **reader_kwargs): 191 | self.reader_kwargs = reader_kwargs 192 | super(CSVParser, self).__init__(settings, strip) 193 | 194 | def get_interface_kwargs(self): 195 | kwargs = super(CSVParser, self).get_interface_kwargs() 196 | kwargs.update(self.reader_kwargs) 197 | return kwargs 198 | 199 | 200 | class IndexOfParser(BaseParser): 201 | interface_class = IndexOfInterface 202 | 203 | def prepare_content(self, content): 204 | if isinstance(content, bytes): 205 | try: 206 | return content.decode() 207 | except UnicodeDecodeError: # For python 2/3 compatibility 208 | pass 209 | return content 210 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import platform 4 | import sys 5 | 6 | from setuptools import setup 7 | from setuptools.command.test import test as TestCommand 8 | 9 | import pyanyapi 10 | 11 | 12 | PYPY = hasattr(sys, 'pypy_translation_info') 13 | PYPY3 = PYPY and sys.version_info[0] == 3 14 | JYTHON = platform.system() == 'Java' 15 | 16 | 17 | class PyTest(TestCommand): 18 | user_options = [('pytest-args=', 'a', "Arguments to pass into py.test")] 19 | 20 | def initialize_options(self): 21 | TestCommand.initialize_options(self) 22 | self.pytest_args = [] 23 | 24 | def finalize_options(self): 25 | TestCommand.finalize_options(self) 26 | self.test_args = [] 27 | self.test_suite = True 28 | 29 | def run_tests(self): 30 | import pytest 31 | 32 | errno = pytest.main(self.pytest_args) 33 | sys.exit(errno) 34 | 35 | 36 | requirements = ['PyYAML>=3.11'] 37 | test_requirements = ['pytest>=2.8.0,<3.0.0'] 38 | 39 | 40 | if sys.version_info < (3, 3): 41 | test_requirements.append('mock==1.0.1') 42 | if sys.version_info[:2] == (3, 2): 43 | test_requirements.append('coverage==3.7.1') 44 | 45 | if not JYTHON: 46 | if not PYPY: 47 | requirements.append('ujson') 48 | if not PYPY3: 49 | if PYPY: 50 | requirements.append('lxml<3.5') 51 | else: 52 | requirements.append('lxml') 53 | test_requirements.append('pytest-cov>=1.8') 54 | 55 | 56 | setup( 57 | name='pyanyapi', 58 | url='https://github.com/Stranger6667/pyanyapi', 59 | version=pyanyapi.__version__, 60 | packages=['pyanyapi'], 61 | license='MIT', 62 | author='Dmitry Dygalo', 63 | author_email='dadygalo@gmail.com', 64 | maintainer='Dmitry Dygalo', 65 | maintainer_email='dadygalo@gmail.com', 66 | keywords=['parsing', 'interface', 'xml', 'json', 'csv', 'yaml', 'API', 'XPath', 'regexp'], 67 | description='Tools for convenient interface creation over various types of data in a declarative way.', 68 | classifiers=[ 69 | 'Development Status :: 5 - Production/Stable', 70 | 'Environment :: Console', 71 | 'Intended Audience :: Developers', 72 | 'Operating System :: OS Independent', 73 | 'License :: OSI Approved :: MIT License', 74 | 'Programming Language :: Python', 75 | 'Programming Language :: Python :: 2.6', 76 | 'Programming Language :: Python :: 2.7', 77 | 'Programming Language :: Python :: 3.2', 78 | 'Programming Language :: Python :: 3.3', 79 | 'Programming Language :: Python :: 3.4', 80 | 'Programming Language :: Python :: 3.5', 81 | 'Programming Language :: Python :: Implementation :: CPython', 82 | 'Programming Language :: Python :: Implementation :: PyPy', 83 | 'Programming Language :: Python :: Implementation :: Jython', 84 | 'Topic :: Text Processing :: General', 85 | 'Topic :: Utilities', 86 | ], 87 | cmdclass={'test': PyTest}, 88 | include_package_data=True, 89 | install_requires=requirements, 90 | tests_require=test_requirements 91 | ) 92 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | -------------------------------------------------------------------------------- /tests/_compat.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | 4 | try: 5 | from mock import patch 6 | except ImportError: 7 | from unittest.mock import patch 8 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import platform 3 | import sys 4 | 5 | import pytest 6 | 7 | from pyanyapi.decorators import interface_property, interface_method 8 | from pyanyapi.parsers import JSONParser, RegExpParser, CombinedParser, HTMLParser 9 | 10 | 11 | class EmptyValuesParser(CombinedParser): 12 | parsers = [ 13 | RegExpParser({'test': '\d,\d'}), 14 | JSONParser( 15 | { 16 | 'test': { 17 | 'base': 'container > test', 18 | }, 19 | 'second': { 20 | 'base': 'container > fail > 1', 21 | }, 22 | 'third': { 23 | 'base': 'container', 24 | 'children': '0' 25 | }, 26 | 'null': { 27 | 'base': 'container', 28 | } 29 | } 30 | ) 31 | ] 32 | 33 | @interface_property 34 | def combined(self): 35 | return '123-' + self.null 36 | 37 | 38 | @pytest.fixture 39 | def empty_values_parser(): 40 | return EmptyValuesParser() 41 | 42 | 43 | @pytest.fixture 44 | def dummy_parser(): 45 | 46 | class DummyParser(CombinedParser): 47 | parsers = ( 48 | JSONParser({'success': 'container > test'}), 49 | RegExpParser({'test': 'href=\'(.*)\''}), 50 | ) 51 | 52 | @interface_property 53 | def combined(self): 54 | return '123-' + self.success 55 | 56 | @interface_method 57 | def method(self, value): 58 | return self.success + value 59 | 60 | return DummyParser() 61 | 62 | 63 | class ParentParser(JSONParser): 64 | settings = { 65 | 'parent1': 'test1', 66 | 'parent2': 'test2' 67 | } 68 | 69 | 70 | class ChildParser(ParentParser): 71 | settings = { 72 | 'parent2': 'child_override', 73 | 'child1': 'test3', 74 | 'child2': 'test4' 75 | } 76 | 77 | 78 | class SubParser(HTMLParser): 79 | settings = { 80 | 'href': 'string(//@href)', 81 | 'text': 'string(//text())' 82 | } 83 | 84 | 85 | class SimpleParser(RegExpParser): 86 | settings = { 87 | 'test': '\d+.\d+', 88 | 'test2': '\d+', 89 | 'test3': 'a', 90 | } 91 | 92 | @interface_property 93 | def test4(self): 94 | return self.test2 + '_4' 95 | 96 | @interface_method 97 | def test_5(self, value): 98 | return 'Will not be included' 99 | 100 | 101 | PYPY = hasattr(sys, 'pypy_translation_info') and sys.version_info[0] == 2 102 | PYPY3 = hasattr(sys, 'pypy_translation_info') and sys.version_info[0] == 3 103 | JYTHON = platform.system() == 'Java' 104 | 105 | lxml_is_supported = pytest.mark.skipif(PYPY3 or JYTHON, reason='lxml is not supported') 106 | lxml_is_not_supported = pytest.mark.skipif(not (PYPY3 or JYTHON), reason='Only on if lxml is supported') 107 | not_pypy = pytest.mark.skipif(PYPY, reason='PyPy is not supported') 108 | -------------------------------------------------------------------------------- /tests/test_interfaces.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from pyanyapi.interfaces import IndexOfInterface 3 | 4 | 5 | def test_indexof_interface(): 6 | interface = IndexOfInterface('this is dummy content') 7 | assert interface.parse('dummy') 8 | assert not interface.parse('foo') 9 | -------------------------------------------------------------------------------- /tests/test_parsers.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import re 3 | 4 | import pytest 5 | 6 | from ._compat import patch 7 | from .conftest import ChildParser, SubParser, SimpleParser, lxml_is_supported, lxml_is_not_supported 8 | from pyanyapi.exceptions import ResponseParseError 9 | from pyanyapi.parsers import ( 10 | XMLObjectifyParser, 11 | XMLParser, 12 | JSONParser, 13 | YAMLParser, 14 | RegExpParser, 15 | AJAXParser, 16 | CSVParser, 17 | HTMLParser, 18 | IndexOfParser 19 | ) 20 | 21 | 22 | HTML_CONTENT = "test" 23 | XML_CONTENT = ''' 24 | 25 | 32e9a4a2 26 | 1 27 | accept 28 | 29 | ''' 30 | JSON_CONTENT = '{"container":{"test":"value"},"another":"123"}' 31 | YAML_CONTENT = 'container:\n test: "123"' 32 | AJAX_CONTENT = '{"content": "

Pcontent

SPANcontent",' \ 33 | '"second_part":"

second_p

","third":{"inner":"

third_p

"}}' 34 | MULTILINE_CONTENT = '123\n234' 35 | CSV_CONTENT = '1,2,3\r\n4,5,6\r\n' 36 | CSV_CONTENT_DIFFERENT_DELIMITER = '1;2;3\r\n4;5;6\r\n' 37 | 38 | 39 | @lxml_is_supported 40 | def test_xml_objectify_parser(): 41 | parsed = XMLObjectifyParser().parse('123') 42 | assert parsed.test == 123 43 | assert parsed.not_existing is None 44 | 45 | 46 | @lxml_is_supported 47 | def test_xml_objectify_parser_error(): 48 | parsed = XMLObjectifyParser().parse('123') 49 | with pytest.raises(ResponseParseError): 50 | parsed.test 51 | 52 | 53 | @lxml_is_supported 54 | def test_xml_parser_error(): 55 | parsed = XMLParser({'test': None}).parse('123') 56 | with pytest.raises(ResponseParseError): 57 | parsed.test 58 | 59 | 60 | def test_yaml_parser_error(): 61 | parsed = YAMLParser({'test': 'test'}).parse('||') 62 | with pytest.raises(ResponseParseError): 63 | parsed.test 64 | 65 | 66 | def test_yaml_parser_vulnerability(): 67 | """ 68 | In case of usage of yaml.load `test` value will be equal to 0. 69 | """ 70 | parsed = YAMLParser({'test': 'container > test'}).parse('!!python/object/apply:os.system ["exit 0"]') 71 | with pytest.raises(ResponseParseError): 72 | parsed.test 73 | 74 | 75 | @lxml_is_supported 76 | @pytest.mark.parametrize( 77 | 'settings', ( 78 | {'success': {'base': '//test-mode/text()'}}, 79 | {'success': '//test-mode/text()'}, 80 | ) 81 | ) 82 | def test_xml_parsed(settings): 83 | parsed = XMLParser(settings).parse(XML_CONTENT) 84 | assert parsed.success == ['1'] 85 | assert parsed.parse('string(//id/text())') == '32e9a4a2' 86 | 87 | 88 | @lxml_is_supported 89 | def test_xml_simple_settings(): 90 | assert XMLParser({'id': {'base': 'string(//id/text())'}}).parse(XML_CONTENT).id == '32e9a4a2' 91 | 92 | 93 | def test_json_parsed(): 94 | content = ''' 95 | { 96 | "container": 97 | { 98 | "id": 1138003, 99 | "inner": 100 | [ 101 | { 102 | "end": { 103 | "id": 123 104 | } 105 | } 106 | ] 107 | } 108 | } 109 | ''' 110 | 111 | parser = JSONParser({ 112 | 'success': { 113 | 'base': 'container > inner > 0 > end > id' 114 | } 115 | }) 116 | assert parser.parse(content).success == 123 117 | parser = JSONParser({ 118 | 'success': { 119 | 'base': 'container > inner', 120 | 'children': 'end > id', 121 | } 122 | }) 123 | assert parser.parse(content).success == [123] 124 | 125 | 126 | def test_multiple_parser_join(): 127 | first_parser = RegExpParser({'test': 'href=\'(.*)\''}) 128 | second_parser = JSONParser({'success': 'container > test'}) 129 | for result_parser in ((first_parser & second_parser), (second_parser & first_parser)): 130 | assert result_parser.parse(HTML_CONTENT).test == '#test' 131 | assert result_parser.parse(JSON_CONTENT).success == 'value' 132 | third_parser = JSONParser({ 133 | 'fail': { 134 | 'base': 'container > test', 135 | } 136 | }) 137 | result_parser = first_parser & second_parser & third_parser 138 | assert result_parser.parse(JSON_CONTENT).success == 'value' 139 | 140 | 141 | def test_multiply_parsers_declaration(dummy_parser): 142 | parsed = dummy_parser.parse(JSON_CONTENT) 143 | assert parsed.success == 'value' 144 | assert parsed.combined == '123-value' 145 | assert parsed.method('-123') == 'value-123' 146 | assert parsed.test is None 147 | 148 | parsed = dummy_parser.parse(HTML_CONTENT) 149 | assert parsed.test == '#test' 150 | assert parsed.success is None 151 | 152 | 153 | @pytest.mark.parametrize( 154 | 'content, attr, expected', 155 | ( 156 | ('{"container":{"test":"value"}}', 'test', 'value'), 157 | ('{"container":{"test":"value"}}', 'second', None), 158 | ('{"container":{"fail":[1]}}', 'second', None), 159 | ('{"container":[[1],[],[3]]}', 'third', [1, None, 3]), 160 | ('{"container":null}', 'null', None), 161 | ('{"container":[1,2]}', 'test', '1,2'), 162 | ) 163 | ) 164 | def test_empty_values(empty_values_parser, content, attr, expected): 165 | parsed = empty_values_parser.parse(content) 166 | assert getattr(parsed, attr) == expected 167 | 168 | 169 | def test_attributes(empty_values_parser): 170 | assert set(empty_values_parser.attributes) == set(['combined', 'test', 'test', 'second', 'null', 'third']) 171 | 172 | 173 | def test_efficient_parsing(empty_values_parser): 174 | with patch.object(empty_values_parser.parsers[0], 'parse') as regexp_parser: 175 | assert empty_values_parser.parse(JSON_CONTENT).second is None 176 | assert not regexp_parser.called 177 | 178 | 179 | @lxml_is_supported 180 | def test_simple_config_xml_parser(): 181 | parsed = XMLParser({'test': 'string(//test/text())'}).parse('123') 182 | assert parsed.test == '123' 183 | 184 | 185 | def test_simple_config_json_parser(): 186 | parsed = JSONParser({'test': 'container > test'}).parse(JSON_CONTENT) 187 | assert parsed.test == 'value' 188 | 189 | 190 | def test_settings_inheritance(): 191 | parser = ChildParser({'child2': 'override'}) 192 | assert parser.settings['child2'] == 'override' 193 | assert parser.settings['child1'] == 'test3' 194 | assert parser.settings['parent2'] == 'child_override' 195 | assert parser.settings['parent1'] == 'test1' 196 | 197 | 198 | @lxml_is_supported 199 | def test_complex_config(): 200 | parsed = XMLParser({'test': {'base': '//test', 'children': 'text()|*//text()'}}).parse( 201 | '123 234' 202 | ) 203 | assert parsed.test == ['123 ', ' 234'] 204 | 205 | 206 | def test_json_parse(): 207 | assert JSONParser({'test': 'container > test'}).parse(JSON_CONTENT).parse('another') == '123' 208 | 209 | 210 | def test_json_value_error_parse(): 211 | assert JSONParser({'test': 'container > test'}).parse('{"container":"1"}').test is None 212 | 213 | 214 | def test_regexp_parse(): 215 | assert RegExpParser({'digits': '\d+'}).parse('123abc').parse('[a-z]+') == 'abc' 216 | 217 | 218 | def test_yaml_parse(): 219 | assert YAMLParser({'test': 'container > test'}).parse(YAML_CONTENT).test == '123' 220 | 221 | 222 | @lxml_is_not_supported 223 | def test_lxml_not_supported(): 224 | with pytest.raises(AssertionError): 225 | XMLParser({'test': '//p'}).parse('') 226 | 227 | 228 | @lxml_is_supported 229 | def test_ajax_parser(): 230 | parsed = AJAXParser({'p': 'content > string(//p)', 'span': 'content > string(//span)'}).parse(AJAX_CONTENT) 231 | assert parsed.p == 'Pcontent' 232 | assert parsed.span == 'SPANcontent' 233 | assert parsed.parse('third > inner > string(//p)') == 'third_p' 234 | 235 | 236 | @lxml_is_supported 237 | def test_ajax_parser_cache(): 238 | parsed = AJAXParser({ 239 | 'p': 'content > string(//p)', 240 | 'span': 'content > string(//span)', 241 | 'second': 'second_part > string(//p)' 242 | }).parse(AJAX_CONTENT) 243 | assert parsed.p == 'Pcontent' 244 | inner_interface = parsed._inner_cache['content'] 245 | with patch.object(inner_interface, 'parse', wraps=inner_interface.parse) as patched: 246 | assert parsed.span == 'SPANcontent' 247 | assert len(parsed._inner_cache) == 1 248 | assert patched.call_count == 1 249 | assert parsed.second == 'second_p' 250 | assert patched.call_count == 1 251 | assert len(parsed._inner_cache) == 2 252 | 253 | 254 | @lxml_is_supported 255 | def test_ajax_parser_invalid_settings(): 256 | parsed = AJAXParser({ 257 | 'valid': 'third > inner > string(//p)', 258 | 'invalid': 'third > string(//p)', 259 | }).parse(AJAX_CONTENT) 260 | assert parsed.valid == 'third_p' 261 | assert parsed.invalid == '' 262 | 263 | 264 | def test_parse_memoization(): 265 | api = JSONParser().parse(JSON_CONTENT) 266 | with patch.object(api, 'get_from_dict', wraps=api.get_from_dict) as patched: 267 | assert api.parse('container > test') == 'value' 268 | assert patched.call_count == 1 269 | assert api.parse('container > test') == 'value' 270 | assert patched.call_count == 1 271 | 272 | 273 | def test_regexp_settings(): 274 | assert RegExpParser({'test': '\d+.\d+'}).parse(MULTILINE_CONTENT).test == '123' 275 | assert RegExpParser({'test': '\d+.\d+'}, flags=re.DOTALL).parse(MULTILINE_CONTENT).test == '123\n234' 276 | 277 | 278 | def test_parse_all(): 279 | expected = {'test': '123\n234', 'test2': '123', 'test3': None, 'test4': '123_4'} 280 | parser = SimpleParser(flags=re.DOTALL) 281 | assert parser.parse(MULTILINE_CONTENT).parse_all() == expected 282 | assert parser.parse_all(MULTILINE_CONTENT) == expected 283 | 284 | 285 | def test_parse_all_combined_parser(dummy_parser): 286 | assert dummy_parser.parse(JSON_CONTENT).parse_all() == { 287 | 'success': 'value', 288 | 'combined': '123-value', 289 | 'test': None 290 | } 291 | 292 | 293 | def test_parse_csv(): 294 | api = CSVParser({'second': '1:2'}).parse(CSV_CONTENT) 295 | assert api.second == '6' 296 | assert api.parse('0:1') == '2' 297 | assert api.parse('0:6') is None 298 | 299 | 300 | def test_parse_csv_custom_delimiter(): 301 | api = CSVParser({'second': '1:2'}, delimiter=';').parse(CSV_CONTENT_DIFFERENT_DELIMITER) 302 | assert api.second == '6' 303 | assert api.parse('0:1') == '2' 304 | assert api.parse('0:6') is None 305 | 306 | 307 | def test_csv_parser_error(): 308 | parsed = CSVParser({'test': '1:1'}).parse(123) 309 | with pytest.raises(ResponseParseError): 310 | parsed.test 311 | 312 | 313 | @lxml_is_supported 314 | @pytest.mark.parametrize('sub_parser', (SubParser, SubParser())) 315 | def test_children(sub_parser): 316 | 317 | class Parser(HTMLParser): 318 | settings = { 319 | 'elem': { 320 | 'base': './/a', 321 | 'parser': sub_parser 322 | } 323 | } 324 | 325 | api = Parser().parse(HTML_CONTENT) 326 | sub_api = api.elem[0] 327 | assert sub_api.href == '#test' 328 | assert sub_api.text == 'test' 329 | assert api.parse_all() == {'elem': [{'href': '#test', 'text': 'test'}]} 330 | 331 | 332 | class BrokenObject(object): 333 | 334 | def __str__(self): 335 | return None 336 | 337 | 338 | class TestIndexOfParser: 339 | parser = IndexOfParser({ 340 | 'has_bar': 'bár', 341 | 'has_baz': 'báz', 342 | }) 343 | 344 | @pytest.mark.parametrize('content', ('foo-bár', b'foo-b\xc3\xa1r')) 345 | def test_default(self, content): 346 | parsed = self.parser.parse(content) 347 | assert parsed.has_bar 348 | assert not parsed.has_baz 349 | 350 | @pytest.mark.parametrize('attr', parser.settings.keys()) 351 | def test_parsing_error(self, attr): 352 | parsed = self.parser.parse(BrokenObject()) 353 | with pytest.raises(ResponseParseError): 354 | getattr(parsed, attr) 355 | -------------------------------------------------------------------------------- /tests/test_strip.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | from .conftest import lxml_is_supported, not_pypy 3 | from pyanyapi.parsers import RegExpParser, JSONParser, AJAXParser, XMLParser, XMLObjectifyParser 4 | 5 | 6 | JSON_CONTENT = '{"container":" 1 "}' 7 | AJAX_CONTENT = '{"content": "

Pcontent

"}' 8 | XML_CONTENT = '

Pcontent

' 9 | OBJECTIFY_CONTENT = ''' 10 | abc 11 | bcd 12 | inside 13 | ''' 14 | 15 | 16 | def test_strip_regexp_parser(): 17 | settings = {'all': '.+'} 18 | assert RegExpParser(settings).parse(' 1 ').all == ' 1 ' 19 | assert RegExpParser(settings, strip=True).parse(' 1 ').all == '1' 20 | 21 | 22 | def test_strip_json_parser(): 23 | settings = {'all': 'container'} 24 | assert JSONParser(settings).parse(JSON_CONTENT).all == ' 1 ' 25 | assert JSONParser(settings, strip=True).parse(JSON_CONTENT).all == '1' 26 | 27 | 28 | @lxml_is_supported 29 | def test_strip_ajax_parser(): 30 | settings = {'all': 'content > string(//p)'} 31 | assert AJAXParser(settings).parse(AJAX_CONTENT).all == ' Pcontent ' 32 | assert AJAXParser(settings, strip=True).parse(AJAX_CONTENT).all == 'Pcontent' 33 | 34 | 35 | @lxml_is_supported 36 | def test_strip_xml_parser(): 37 | settings = {'all': 'string(//p)'} 38 | assert XMLParser(settings).parse(XML_CONTENT).all == ' Pcontent ' 39 | assert XMLParser(settings, strip=True).parse(XML_CONTENT).all == 'Pcontent' 40 | 41 | 42 | class CustomParser(RegExpParser): 43 | settings = {'all': '.+'} 44 | strip = True 45 | 46 | 47 | def test_class_override(): 48 | assert CustomParser().parse(' 1 ').all == '1' 49 | assert CustomParser(strip=False).parse(' 1 ').all == ' 1 ' 50 | 51 | 52 | @lxml_is_supported 53 | def test_objectify_strip_default(): 54 | default = XMLObjectifyParser().parse(OBJECTIFY_CONTENT) 55 | assert default.Messages.Message == ' abc ' 56 | assert default.test == ' bcd ' 57 | assert default.first.second.third == ' inside ' 58 | 59 | 60 | @lxml_is_supported 61 | @not_pypy 62 | def test_objectify_strip(): 63 | with_strip = XMLObjectifyParser(strip=True).parse(OBJECTIFY_CONTENT) 64 | assert with_strip.Messages.Message == 'abc' 65 | assert with_strip.test == 'bcd' 66 | assert with_strip.first.second.third == 'inside' 67 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py26, py27, py32, py33, py34, py35, pypy, pypy3 3 | 4 | [testenv] 5 | setenv = 6 | PYTHONPATH = {toxinidir}:{toxinidir}/pyanyapi 7 | whitelist_externals = make 8 | commands = make test --------------------------------------------------------------------------------