├── .coveragerc
├── .github
    └── FUNDING.yml
├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
    ├── Makefile
    ├── changelog.rst
    ├── complex.rst
    ├── conf.py
    ├── index.rst
    ├── parsers.rst
    ├── requirements.txt
    └── usage.rst
├── pyanyapi
    ├── __init__.py
    ├── _compat.py
    ├── decorators.py
    ├── exceptions.py
    ├── helpers.py
    ├── interfaces.py
    └── parsers.py
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── _compat.py
    ├── conftest.py
    ├── test_interfaces.py
    ├── test_parsers.py
    └── test_strip.py
└── tox.ini


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = true
3 | 
4 | [report]
5 | show_missing = true
6 | precision = 2
7 | exclude_lines = raise NotImplementedError


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: Stranger6667
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.eggs
 9 | *.egg-info
10 | dist
11 | build
12 | eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | develop-eggs
18 | .installed.cfg
19 | lib
20 | lib64
21 | venv*/
22 | pyvenv*/
23 | 
24 | # Installer logs
25 | pip-log.txt
26 | 
27 | # Unit test / coverage reports
28 | .coverage
29 | coverage.xml
30 | junit.xml
31 | .tox
32 | .coverage.*
33 | htmlcov
34 | 
35 | # Translations
36 | *.mo
37 | 
38 | .idea
39 | 
40 | .DS_Store
41 | *~
42 | .*.sw[po]
43 | .build
44 | .ve
45 | .env
46 | .bootstrap
47 | *.bak
48 | docs/_build
49 | .cache


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - 3.5
 4 | matrix:
 5 |   fast_finish: true
 6 |   include:
 7 |     - python: 3.5
 8 |       env: TOX_ENV=py35
 9 |     - python: 3.4
10 |       env: TOX_ENV=py34
11 |     - python: 3.3
12 |       env: TOX_ENV=py33
13 |     - python: 3.2
14 |       env: TOX_ENV=py32
15 |     - python: 2.7
16 |       env: TOX_ENV=py27
17 |     - python: 2.6
18 |       env: TOX_ENV=py26
19 |     - python: pypy
20 |       env: TOX_ENV=pypy
21 |     - python: pypy3
22 |       env: TOX_ENV=pypy3
23 |     - python: 3.5
24 |       env: $JYTHON=true
25 | install:
26 |   - if [ $TOX_ENV = "py32" ]; then travis_retry pip install "virtualenv<14.0.0" "tox<1.8.0"; fi
27 |   - if [ $TOX_ENV = "pypy3" ]; then travis_retry pip install "virtualenv<14.0.0" "tox<1.8.0"; fi
28 |   - if [ -z "$JYTHON" ]; then pip install codecov; fi
29 |   - if [ "$TOX_ENV" ]; then travis_retry pip install "virtualenv<14.0.0" tox; fi
30 | before_install:
31 |   - export JYTHON_URL='http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.0/jython-installer-2.7.0.jar'
32 |   - if [ "$JYTHON" ]; then wget $JYTHON_URL -O jython_installer.jar; java -jar jython_installer.jar -s -d $HOME/jython; export PATH=$HOME/jython/bin:$PATH; fi
33 | 
34 | script:
35 |   - if [ "$JYTHON" ]; then travis_retry jython setup.py test; fi
36 |   - if [ "$TOX_ENV" ]; then tox -e $TOX_ENV; fi
37 | after_success:
38 |   - codecov
39 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |  advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |  address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |  professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at dadygalo@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Dmitry Dygalo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CHANGELOG.md
2 | include README.rst
3 | 
4 | recursive-include tests *
5 | recursive-exclude * __pycache__
6 | recursive-exclude * *.py[co]


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | help:
 2 | 	@echo "clean - remove all build, test, coverage and Python artifacts"
 3 | 	@echo "clean-build - remove build artifacts"
 4 | 	@echo "clean-pyc - remove Python file artifacts"
 5 | 	@echo "clean-test - remove test and coverage artifacts"
 6 | 	@echo "test - run tests quickly with the default Python"
 7 | 	@echo "test-all - run tests on every Python version with tox"
 8 | 	@echo "coverage - check code coverage quickly with the default Python"
 9 | 	@echo "install - install the package to the active Python's site-packages"
10 | 
11 | clean: clean-test clean-build clean-pyc
12 | 
13 | clean-build:
14 | 	rm -fr build/
15 | 	rm -fr dist/
16 | 	rm -fr .eggs/
17 | 	find . -name '*.egg-info' -exec rm -fr {} +
18 | 	find . -name '*.egg' -exec rm -fr {} +
19 | 
20 | clean-pyc:
21 | 	find . -name '*.pyc' -exec rm -f {} +
22 | 	find . -name '*.pyo' -exec rm -f {} +
23 | 	find . -name '*~' -exec rm -f {} +
24 | 	find . -name '__pycache__' -exec rm -fr {} +
25 | 
26 | clean-test:
27 | 	rm -fr .cache
28 | 	rm -fr .tox/
29 | 	rm -f .coverage
30 | 	rm -fr htmlcov/
31 | 
32 | test:
33 | 	python setup.py test --pytest-args="--cov=pyanyapi --cov-report xml"
34 | 
35 | test-all:
36 | 	tox
37 | 
38 | coverage:
39 | 	coverage run --source pyanyapi setup.py test
40 | 	coverage report -m
41 | 	coverage html
42 | 	open htmlcov/index.html
43 | 
44 | install: clean
45 | 	python setup.py install


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | PyAnyAPI
 2 | ========
 3 | 
 4 | Tools for convenient interface creation over various types of data in
 5 | a declarative way.
 6 | 
 7 | .. image:: https://travis-ci.org/Stranger6667/pyanyapi.svg?branch=master
 8 |    :target: https://travis-ci.org/Stranger6667/pyanyapi
 9 |    :alt: Build Status
10 | 
11 | .. image:: https://codecov.io/github/Stranger6667/pyanyapi/coverage.svg?branch=master
12 |    :target: https://codecov.io/github/Stranger6667/pyanyapi?branch=master
13 |    :alt: Coverage Status
14 | 
15 | .. image:: https://readthedocs.org/projects/pyanyapi/badge/?version=latest
16 |    :target: http://pyanyapi.readthedocs.io/en/latest/?badge=latest
17 |    :alt: Documentation Status
18 | 
19 | Installation
20 | ------------
21 | 
22 | The current stable release:
23 | 
24 | ::
25 | 
26 |     pip install pyanyapi
27 | 
28 | or:
29 | 
30 | ::
31 | 
32 |     easy_install pyanyapi
33 | 
34 | or from source:
35 | 
36 | ::
37 | 
38 |     $ sudo python setup.py install
39 | 
40 | Usage
41 | -----
42 | 
43 | The library provides an ability to create API over various content.
44 | Currently there are bundled tools to work with HTML, XML, CSV, JSON and YAML.
45 | Initially it was created to work with ``requests`` library.
46 | 
47 | Basic parsers can be declared in the following way:
48 | 
49 | .. code-block:: python
50 | 
51 |     from pyanyapi.parsers import HTMLParser
52 | 
53 | 
54 |     class SimpleParser(HTMLParser):
55 |         settings = {'header': 'string(.//h1/text())'}
56 | 
57 | 
58 |     >>> api = SimpleParser().parse('<html><body><h1>Value</h1></body></html>')
59 |     >>> api.header
60 |     Value
61 | 
62 | Documentation
63 | -------------
64 | 
65 | You can view documentation online at:
66 | 
67 | - https://pyanyapi.readthedocs.io
68 | 
69 | Or you can look at the docs/ directory in the repository.
70 | 
71 | Python support
72 | --------------
73 | 
74 | PyAnyAPI supports Python 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, PyPy and partially PyPy3 and Jython.
75 | Unfortunately ``lxml`` doesn't support PyPy3 and Jython, so HTML & XML parsing is not supported on PyPy3 and Jython.
76 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help
 18 | help:
 19 | 	@echo "Please use \`make <target>' where <target> is one of"
 20 | 	@echo "  html       to make standalone HTML files"
 21 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 22 | 	@echo "  singlehtml to make a single large HTML file"
 23 | 	@echo "  pickle     to make pickle files"
 24 | 	@echo "  json       to make JSON files"
 25 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 26 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 27 | 	@echo "  applehelp  to make an Apple Help Book"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  epub3      to make an epub3"
 31 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 32 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 33 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 34 | 	@echo "  text       to make text files"
 35 | 	@echo "  man        to make manual pages"
 36 | 	@echo "  texinfo    to make Texinfo files"
 37 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 38 | 	@echo "  gettext    to make PO message catalogs"
 39 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 40 | 	@echo "  xml        to make Docutils-native XML files"
 41 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 42 | 	@echo "  linkcheck  to check all external links for integrity"
 43 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 44 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 45 | 	@echo "  dummy      to check syntax errors of document sources"
 46 | 
 47 | .PHONY: clean
 48 | clean:
 49 | 	rm -rf $(BUILDDIR)/*
 50 | 
 51 | .PHONY: html
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | .PHONY: dirhtml
 58 | dirhtml:
 59 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 60 | 	@echo
 61 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 62 | 
 63 | .PHONY: singlehtml
 64 | singlehtml:
 65 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 66 | 	@echo
 67 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 68 | 
 69 | .PHONY: pickle
 70 | pickle:
 71 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 72 | 	@echo
 73 | 	@echo "Build finished; now you can process the pickle files."
 74 | 
 75 | .PHONY: json
 76 | json:
 77 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 78 | 	@echo
 79 | 	@echo "Build finished; now you can process the JSON files."
 80 | 
 81 | .PHONY: htmlhelp
 82 | htmlhelp:
 83 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 84 | 	@echo
 85 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 86 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 87 | 
 88 | .PHONY: qthelp
 89 | qthelp:
 90 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 91 | 	@echo
 92 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 93 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 94 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyAnyAPI.qhcp"
 95 | 	@echo "To view the help file:"
 96 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyAnyAPI.qhc"
 97 | 
 98 | .PHONY: applehelp
 99 | applehelp:
100 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
101 | 	@echo
102 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
103 | 	@echo "N.B. You won't be able to view it unless you put it in" \
104 | 	      "~/Library/Documentation/Help or install it in your application" \
105 | 	      "bundle."
106 | 
107 | .PHONY: devhelp
108 | devhelp:
109 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
110 | 	@echo
111 | 	@echo "Build finished."
112 | 	@echo "To view the help file:"
113 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/PyAnyAPI"
114 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyAnyAPI"
115 | 	@echo "# devhelp"
116 | 
117 | .PHONY: epub
118 | epub:
119 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
120 | 	@echo
121 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
122 | 
123 | .PHONY: epub3
124 | epub3:
125 | 	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
126 | 	@echo
127 | 	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
128 | 
129 | .PHONY: latex
130 | latex:
131 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
132 | 	@echo
133 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
134 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
135 | 	      "(use \`make latexpdf' here to do that automatically)."
136 | 
137 | .PHONY: latexpdf
138 | latexpdf:
139 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
140 | 	@echo "Running LaTeX files through pdflatex..."
141 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
142 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
143 | 
144 | .PHONY: latexpdfja
145 | latexpdfja:
146 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
147 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
148 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
149 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
150 | 
151 | .PHONY: text
152 | text:
153 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
154 | 	@echo
155 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
156 | 
157 | .PHONY: man
158 | man:
159 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
160 | 	@echo
161 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
162 | 
163 | .PHONY: texinfo
164 | texinfo:
165 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
166 | 	@echo
167 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
168 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
169 | 	      "(use \`make info' here to do that automatically)."
170 | 
171 | .PHONY: info
172 | info:
173 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
174 | 	@echo "Running Texinfo files through makeinfo..."
175 | 	make -C $(BUILDDIR)/texinfo info
176 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
177 | 
178 | .PHONY: gettext
179 | gettext:
180 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
181 | 	@echo
182 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
183 | 
184 | .PHONY: changes
185 | changes:
186 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
187 | 	@echo
188 | 	@echo "The overview file is in $(BUILDDIR)/changes."
189 | 
190 | .PHONY: linkcheck
191 | linkcheck:
192 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
193 | 	@echo
194 | 	@echo "Link check complete; look for any errors in the above output " \
195 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
196 | 
197 | .PHONY: doctest
198 | doctest:
199 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
200 | 	@echo "Testing of doctests in the sources finished, look at the " \
201 | 	      "results in $(BUILDDIR)/doctest/output.txt."
202 | 
203 | .PHONY: coverage
204 | coverage:
205 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
206 | 	@echo "Testing of coverage in the sources finished, look at the " \
207 | 	      "results in $(BUILDDIR)/coverage/python.txt."
208 | 
209 | .PHONY: xml
210 | xml:
211 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
212 | 	@echo
213 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
214 | 
215 | .PHONY: pseudoxml
216 | pseudoxml:
217 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
218 | 	@echo
219 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
220 | 
221 | .PHONY: dummy
222 | dummy:
223 | 	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
224 | 	@echo
225 | 	@echo "Build finished. Dummy builder generates no files."
226 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
  1 | .. _changelog:
  2 | 
  3 | Changelog
  4 | =========
  5 | 
  6 | 0.6.0 - 09.08.2016
  7 | ------------------
  8 | 
  9 | * IndexOf parser.
 10 | 
 11 | 0.5.8 - 14.07.2016
 12 | ------------------
 13 | 
 14 | * Fixed XML content parsing for bytes input.
 15 | 
 16 | 0.5.7 - 28.01.2016
 17 | ------------------
 18 | 
 19 | * Added ``parse_all`` call on subparsers (`#37`_).
 20 | 
 21 | 0.5.6 - 24.11.2015
 22 | ------------------
 23 | 
 24 | * Fixed ``super`` call in exception.
 25 | 
 26 | 0.5.5 - 23.11.2015
 27 | ------------------
 28 | 
 29 | * Add content to exceptions in case of parsing errors (`#35`_).
 30 | 
 31 | 0.5.4 - 15.11.2015
 32 | ------------------
 33 | 
 34 | * Fixed ``lxml`` installation on PyPy (`#34`_).
 35 | * Add support for subparsers (`#32`_).
 36 | 
 37 | 0.5.3 - 30.10.2015
 38 | ------------------
 39 | 
 40 | * Disable stripping in XMLObjectifyParser on PyPy (`#30`_).
 41 | 
 42 | 0.5.2 - 20.10.2015
 43 | ------------------
 44 | 
 45 | * Fix incorrect stripping in XMLObjectifyParser (`#29`_).
 46 | 
 47 | 0.5.1 - 20.10.2015
 48 | ------------------
 49 | 
 50 | * Ability to override ``strip`` attribute at class level (`#27`_).
 51 | * Fix ``strip`` in XMLObjectifyParser (`#28`_).
 52 | 
 53 | 0.5 - 05.10.2015
 54 | ----------------
 55 | 
 56 | * Add ``parse_all`` to parse all settings (`#20`_).
 57 | * Settings for regular expressions (`#19`_).
 58 | * Add ``strip`` option to strip trailing whitespaces (`#14`_).
 59 | * Add CSVParser (`#11`_).
 60 | 
 61 | 0.4 - 29.09.2015
 62 | ----------------
 63 | 
 64 | * Add YAMLParser (`#5`_).
 65 | * Add AJAXParser (`#9`_).
 66 | * ``parse`` calls memoization (`#18`_).
 67 | 
 68 | 0.3 - 24.09.2015
 69 | ----------------
 70 | 
 71 | * Add partial support for PyPy3 (`#7`_).
 72 | * Add partial support for Jython (`#6`_).
 73 | * Add ujson as dependency where it is possible (`#4`_).
 74 | * Lxml will not be installed where it is not supported (`#3`_).
 75 | 
 76 | 0.2.1 - 23.09.2015
 77 | ------------------
 78 | 
 79 | * Remove encoding declaration for XMLObjectifyParser
 80 | 
 81 | 0.2 - 23.09.2015
 82 | ----------------
 83 | 
 84 | * Add ``parse`` methods for JSONInterface & RegExpInterface (`#8`_).
 85 | * Add universal wheel config (`#2`_).
 86 | 
 87 | 0.1 - 22.09.2015
 88 | ----------------
 89 | 
 90 | * First release.
 91 | 
 92 | .. _#37: https://github.com/Stranger6667/pyanyapi/issues/37
 93 | .. _#35: https://github.com/Stranger6667/pyanyapi/issues/35
 94 | .. _#34: https://github.com/Stranger6667/pyanyapi/issues/34
 95 | .. _#32: https://github.com/Stranger6667/pyanyapi/issues/32
 96 | .. _#30: https://github.com/Stranger6667/pyanyapi/issues/30
 97 | .. _#29: https://github.com/Stranger6667/pyanyapi/issues/29
 98 | .. _#28: https://github.com/Stranger6667/pyanyapi/issues/28
 99 | .. _#27: https://github.com/Stranger6667/pyanyapi/issues/27
100 | .. _#20: https://github.com/Stranger6667/pyanyapi/issues/20
101 | .. _#19: https://github.com/Stranger6667/pyanyapi/issues/19
102 | .. _#18: https://github.com/Stranger6667/pyanyapi/issues/18
103 | .. _#14: https://github.com/Stranger6667/pyanyapi/issues/14
104 | .. _#11: https://github.com/Stranger6667/pyanyapi/issues/11
105 | .. _#9: https://github.com/Stranger6667/pyanyapi/issues/9
106 | .. _#8: https://github.com/Stranger6667/pyanyapi/issues/8
107 | .. _#7: https://github.com/Stranger6667/pyanyapi/issues/7
108 | .. _#6: https://github.com/Stranger6667/pyanyapi/issues/6
109 | .. _#5: https://github.com/Stranger6667/pyanyapi/issues/5
110 | .. _#4: https://github.com/Stranger6667/pyanyapi/issues/4
111 | .. _#3: https://github.com/Stranger6667/pyanyapi/issues/3
112 | .. _#2: https://github.com/Stranger6667/pyanyapi/issues/2


--------------------------------------------------------------------------------
/docs/complex.rst:
--------------------------------------------------------------------------------
 1 | .. _complex:
 2 | 
 3 | Complex content parsing
 4 | =======================
 5 | 
 6 | Combined parsers
 7 | ~~~~~~~~~~~~~~~~
 8 | 
 9 | In situations, when particular content type is unknown before parsing,
10 | you can create combined parser, which allows you to use multiply
11 | different parsers transparently. E.g. some server usually returns JSON,
12 | but in cases of server errors it returns HTML pages with some text.
13 | Then:
14 | 
15 | .. code-block:: python
16 | 
17 |     from pyanyapi.parsers import CombinedParser, HTMLParser, JSONParser
18 | 
19 | 
20 |     class Parser(CombinedParser):
21 |         parsers = [
22 |             JSONParser({'test': 'test'}),
23 |             HTMLParser({'error': 'string(//span)'})
24 |         ]
25 | 
26 |     >>> parser = Parser()
27 |     >>> parser.parse('{"test": "Text"}').test
28 |     Text
29 |     >>> parser.parse('<body><span>123</span></body>').error
30 |     123
31 | 
32 | Another example
33 | ~~~~~~~~~~~~~~~
34 | 
35 | Sometimes different content types can be combined inside single string.
36 | Often with AJAX requests.
37 | 
38 | .. code:: javascript
39 | 
40 |     {"content": "<span>Text</span>"}
41 | 
42 | You can work with such data in the following way:
43 | 
44 | .. code-block:: python
45 | 
46 |     from pyanyapi.decorators import interface_property
47 |     from pyanyapi.parsers import HTMLParser, JSONParser
48 | 
49 | 
50 |     inner_parser = HTMLParser({'text': 'string(.//span/text())'})
51 | 
52 | 
53 |     class AJAXParser(JSONParser):
54 |         settings = {'content': 'content'}
55 | 
56 |         @interface_property
57 |         def text(self):
58 |             return inner_parser.parse(self.content).text
59 | 
60 | 
61 |     >>> api = AJAXParser().parse('{"content": "<span>Text</span>"}')
62 |     >>> api.text
63 |     Text
64 | 
65 | Now AJAXParser is bundled in pyanyapi, but it works differently.
66 | But anyway, this example can be helpful for building custom parsers.


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | #
  4 | # PyAnyAPI documentation build configuration file, created by
  5 | # sphinx-quickstart on Tue Sep 27 12:18:20 2016.
  6 | #
  7 | # This file is execfile()d with the current directory set to its
  8 | # containing dir.
  9 | #
 10 | # Note that not all possible configuration values are present in this
 11 | # autogenerated file.
 12 | #
 13 | # All configuration values have a default; values that are commented out
 14 | # serve to show the default.
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #
 20 | import os
 21 | import sys
 22 | 
 23 | sys.path.insert(0, os.path.abspath('..'))
 24 | 
 25 | 
 26 | import sphinx_rtd_theme
 27 | from pyanyapi import __version__
 28 | 
 29 | # -- General configuration ------------------------------------------------
 30 | 
 31 | # If your documentation needs a minimal Sphinx version, state it here.
 32 | #
 33 | # needs_sphinx = '1.4.6'
 34 | 
 35 | # Add any Sphinx extension module names here, as strings. They can be
 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 37 | # ones.
 38 | extensions = [
 39 |     'sphinx.ext.autodoc',
 40 |     'sphinx.ext.coverage',
 41 |     'sphinx.ext.viewcode',
 42 | ]
 43 | # Add any paths that contain templates here, relative to this directory.
 44 | templates_path = ['_templates']
 45 | 
 46 | # The suffix(es) of source filenames.
 47 | # You can specify multiple suffix as a list of string:
 48 | #
 49 | # source_suffix = ['.rst', '.md']
 50 | source_suffix = '.rst'
 51 | 
 52 | # The encoding of source files.
 53 | #
 54 | # source_encoding = 'utf-8-sig'
 55 | 
 56 | # The master toctree document.
 57 | master_doc = 'index'
 58 | 
 59 | # General information about the project.
 60 | project = 'PyAnyAPI'
 61 | copyright = '2016, Dmitry Dygalo'
 62 | author = 'Dmitry Dygalo'
 63 | 
 64 | # The version info for the project you're documenting, acts as replacement for
 65 | # |version| and |release|, also used in various other places throughout the
 66 | # built documents.
 67 | #
 68 | # The short X.Y version.
 69 | version = release = __version__
 70 | 
 71 | # The full version, including alpha/beta/rc tags.
 72 | 
 73 | # The language for content autogenerated by Sphinx. Refer to documentation
 74 | # for a list of supported languages.
 75 | #
 76 | # This is also used if you do content translation via gettext catalogs.
 77 | # Usually you set "language" from the command line for these cases.
 78 | language = None
 79 | 
 80 | # There are two options for replacing |today|: either, you set today to some
 81 | # non-false value, then it is used:
 82 | #
 83 | # today = ''
 84 | #
 85 | # Else, today_fmt is used as the format for a strftime call.
 86 | #
 87 | # today_fmt = '%B %d, %Y'
 88 | 
 89 | # List of patterns, relative to source directory, that match files and
 90 | # directories to ignore when looking for source files.
 91 | # This patterns also effect to html_static_path and html_extra_path
 92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 93 | 
 94 | # The reST default role (used for this markup: `text`) to use for all
 95 | # documents.
 96 | #
 97 | # default_role = None
 98 | 
 99 | # If true, '()' will be appended to :func: etc. cross-reference text.
100 | #
101 | # add_function_parentheses = True
102 | 
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | #
106 | # add_module_names = True
107 | 
108 | # If true, sectionauthor and moduleauthor directives will be shown in the
109 | # output. They are ignored by default.
110 | #
111 | # show_authors = False
112 | 
113 | # The name of the Pygments (syntax highlighting) style to use.
114 | pygments_style = 'sphinx'
115 | 
116 | # A list of ignored prefixes for module index sorting.
117 | # modindex_common_prefix = []
118 | 
119 | # If true, keep warnings as "system message" paragraphs in the built documents.
120 | # keep_warnings = False
121 | 
122 | # If true, `todo` and `todoList` produce output, else they produce nothing.
123 | todo_include_todos = False
124 | 
125 | 
126 | # -- Options for HTML output ----------------------------------------------
127 | 
128 | # The theme to use for HTML and HTML Help pages.  See the documentation for
129 | # a list of builtin themes.
130 | #
131 | html_theme = 'sphinx_rtd_theme'
132 | 
133 | # Theme options are theme-specific and customize the look and feel of a theme
134 | # further.  For a list of options available for each theme, see the
135 | # documentation.
136 | #
137 | # html_theme_options = {}
138 | 
139 | # Add any paths that contain custom themes here, relative to this directory.
140 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
141 | 
142 | # The name for this set of Sphinx documents.
143 | # "<project> v<release> documentation" by default.
144 | #
145 | # html_title = 'PyAnyAPI v0.6.0'
146 | 
147 | # A shorter title for the navigation bar.  Default is the same as html_title.
148 | #
149 | # html_short_title = None
150 | 
151 | # The name of an image file (relative to this directory) to place at the top
152 | # of the sidebar.
153 | #
154 | # html_logo = None
155 | 
156 | # The name of an image file (relative to this directory) to use as a favicon of
157 | # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
158 | # pixels large.
159 | #
160 | # html_favicon = None
161 | 
162 | # Add any paths that contain custom static files (such as style sheets) here,
163 | # relative to this directory. They are copied after the builtin static files,
164 | # so a file named "default.css" will overwrite the builtin "default.css".
165 | html_static_path = ['_static']
166 | 
167 | # Add any extra paths that contain custom files (such as robots.txt or
168 | # .htaccess) here, relative to this directory. These files are copied
169 | # directly to the root of the documentation.
170 | #
171 | # html_extra_path = []
172 | 
173 | # If not None, a 'Last updated on:' timestamp is inserted at every page
174 | # bottom, using the given strftime format.
175 | # The empty string is equivalent to '%b %d, %Y'.
176 | #
177 | # html_last_updated_fmt = None
178 | 
179 | # If true, SmartyPants will be used to convert quotes and dashes to
180 | # typographically correct entities.
181 | #
182 | # html_use_smartypants = True
183 | 
184 | # Custom sidebar templates, maps document names to template names.
185 | #
186 | # html_sidebars = {}
187 | 
188 | # Additional templates that should be rendered to pages, maps page names to
189 | # template names.
190 | #
191 | # html_additional_pages = {}
192 | 
193 | # If false, no module index is generated.
194 | #
195 | # html_domain_indices = True
196 | 
197 | # If false, no index is generated.
198 | #
199 | # html_use_index = True
200 | 
201 | # If true, the index is split into individual pages for each letter.
202 | #
203 | # html_split_index = False
204 | 
205 | # If true, links to the reST sources are added to the pages.
206 | #
207 | # html_show_sourcelink = True
208 | 
209 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
210 | #
211 | # html_show_sphinx = True
212 | 
213 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
214 | #
215 | # html_show_copyright = True
216 | 
217 | # If true, an OpenSearch description file will be output, and all pages will
218 | # contain a <link> tag referring to it.  The value of this option must be the
219 | # base URL from which the finished HTML is served.
220 | #
221 | # html_use_opensearch = ''
222 | 
223 | # This is the file name suffix for HTML files (e.g. ".xhtml").
224 | # html_file_suffix = None
225 | 
226 | # Language to be used for generating the HTML full-text search index.
227 | # Sphinx supports the following languages:
228 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
229 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
230 | #
231 | # html_search_language = 'en'
232 | 
233 | # A dictionary with options for the search language support, empty by default.
234 | # 'ja' uses this config value.
235 | # 'zh' user can custom change `jieba` dictionary path.
236 | #
237 | # html_search_options = {'type': 'default'}
238 | 
239 | # The name of a javascript file (relative to the configuration directory) that
240 | # implements a search results scorer. If empty, the default will be used.
241 | #
242 | # html_search_scorer = 'scorer.js'
243 | 
244 | # Output file base name for HTML help builder.
245 | htmlhelp_basename = 'PyAnyAPIdoc'
246 | 
247 | # -- Options for LaTeX output ---------------------------------------------
248 | 
249 | latex_elements = {
250 |      # The paper size ('letterpaper' or 'a4paper').
251 |      #
252 |      # 'papersize': 'letterpaper',
253 | 
254 |      # The font size ('10pt', '11pt' or '12pt').
255 |      #
256 |      # 'pointsize': '10pt',
257 | 
258 |      # Additional stuff for the LaTeX preamble.
259 |      #
260 |      # 'preamble': '',
261 | 
262 |      # Latex figure (float) alignment
263 |      #
264 |      # 'figure_align': 'htbp',
265 | }
266 | 
267 | # Grouping the document tree into LaTeX files. List of tuples
268 | # (source start file, target name, title,
269 | #  author, documentclass [howto, manual, or own class]).
270 | latex_documents = [
271 |     (master_doc, 'PyAnyAPI.tex', 'PyAnyAPI Documentation',
272 |      'Dmitry Dygalo', 'manual'),
273 | ]
274 | 
275 | # The name of an image file (relative to this directory) to place at the top of
276 | # the title page.
277 | #
278 | # latex_logo = None
279 | 
280 | # For "manual" documents, if this is true, then toplevel headings are parts,
281 | # not chapters.
282 | #
283 | # latex_use_parts = False
284 | 
285 | # If true, show page references after internal links.
286 | #
287 | # latex_show_pagerefs = False
288 | 
289 | # If true, show URL addresses after external links.
290 | #
291 | # latex_show_urls = False
292 | 
293 | # Documents to append as an appendix to all manuals.
294 | #
295 | # latex_appendices = []
296 | 
297 | # It false, will not define \strong, \code, 	itleref, \crossref ... but only
298 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
299 | # packages.
300 | #
301 | # latex_keep_old_macro_names = True
302 | 
303 | # If false, no module index is generated.
304 | #
305 | # latex_domain_indices = True
306 | 
307 | 
308 | # -- Options for manual page output ---------------------------------------
309 | 
310 | # One entry per manual page. List of tuples
311 | # (source start file, name, description, authors, manual section).
312 | man_pages = [
313 |     (master_doc, 'pyanyapi', 'PyAnyAPI Documentation',
314 |      [author], 1)
315 | ]
316 | 
317 | # If true, show URL addresses after external links.
318 | #
319 | # man_show_urls = False
320 | 
321 | 
322 | # -- Options for Texinfo output -------------------------------------------
323 | 
324 | # Grouping the document tree into Texinfo files. List of tuples
325 | # (source start file, target name, title, author,
326 | #  dir menu entry, description, category)
327 | texinfo_documents = [
328 |     (master_doc, 'PyAnyAPI', 'PyAnyAPI Documentation',
329 |      author, 'PyAnyAPI', 'One line description of project.',
330 |      'Miscellaneous'),
331 | ]
332 | 
333 | # Documents to append as an appendix to all manuals.
334 | #
335 | # texinfo_appendices = []
336 | 
337 | # If false, no module index is generated.
338 | #
339 | # texinfo_domain_indices = True
340 | 
341 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
342 | #
343 | # texinfo_show_urls = 'footnote'
344 | 
345 | # If true, do not generate a @detailmenu in the "Top" node's menu.
346 | #
347 | # texinfo_no_detailmenu = False
348 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to PyAnyAPI's documentation!
 2 | ====================================
 3 | 
 4 | Contents:
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    usage
10 |    parsers
11 |    complex
12 |    changelog
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 
22 | 


--------------------------------------------------------------------------------
/docs/parsers.rst:
--------------------------------------------------------------------------------
  1 | .. _parsers:
  2 | 
  3 | Parsers
  4 | =======
  5 | 
  6 | HTML & XML
  7 | ~~~~~~~~~~
  8 | 
  9 | For HTML and XML based interfaces XPath 1.0 syntax is used for settings
 10 | declaration. Unfortunately XPath 2.0 is not supported by lxml. XML is
 11 | about the same as HTMLParser, but uses a different lxml parser internally.
 12 | Here is an example of usage with ``requests``:
 13 | 
 14 | .. code-block:: python
 15 | 
 16 |     >>> import requests
 17 |     >>> import pyanyapi
 18 |     >>> parser = pyanyapi.HTMLParser({'header': 'string(.//h1/text())'})
 19 |     >>> response = requests.get('http://example.com')
 20 |     >>> api = parser.parse(response.text)
 21 |     >>> api.header
 22 |     Example Domain
 23 | 
 24 | If you need, you can execute more XPath queries at any time you want:
 25 | 
 26 | .. code-block:: python
 27 | 
 28 |     from pyanyapi.parsers import HTMLParser
 29 | 
 30 | 
 31 |     >>> parser = HTMLParser({'header': 'string(.//h1/text())'})
 32 |     >>> api = parser.parse('<html><body><h1>This is</h1><p>test</p></body></html>')
 33 |     >>> api.header
 34 |     This is
 35 |     >>> api.parse('string(//p)')
 36 |     test
 37 | 
 38 | XML Objectify
 39 | ~~~~~~~~~~~~~
 40 | 
 41 | Lxml provides interesting feature - objectified interface for XML. It
 42 | converts whole XML to Python object. This parser doesn't require any
 43 | settings. E.g:
 44 | 
 45 | .. code-block:: python
 46 | 
 47 |     from pyanyapi.parsers import XMLObjectifyParser
 48 | 
 49 | 
 50 |     >>> XMLObjectifyParser().parse('<xml><test>123</test></xml>').test
 51 |     123
 52 | 
 53 | JSON
 54 | ~~~~
 55 | 
 56 | Settings syntax in based on PostgreSQL statements syntax.
 57 | 
 58 | .. code-block:: python
 59 | 
 60 |     from pyanyapi.parsers import JSONParser
 61 | 
 62 | 
 63 |     >>> JSONParser({'id': 'container > id'}).parse('{"container":{"id":"123"}}').id
 64 |     123
 65 | 
 66 | Or you can get access to values in lists by index:
 67 | 
 68 | .. code-block:: python
 69 | 
 70 |     from pyanyapi.parsers import JSONParser
 71 | 
 72 | 
 73 |     >>> JSONParser({'second': 'container > 1'}).parse('{"container":["first", "second", "third"]}').second
 74 |     second
 75 | 
 76 | And executes more queries after initial parsing:
 77 | 
 78 | .. code-block:: python
 79 | 
 80 |     from pyanyapi.parsers import JSONParser
 81 | 
 82 | 
 83 |     >>> api = JSONParser({'second': 'container > 1'}).parse('{"container":[],"second_container":[123]}')
 84 |     >>> api.parse('second_container > 0')
 85 |     123
 86 | 
 87 | YAML
 88 | ~~~~
 89 | Equal to JSON parser, but works with YAML data.
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |     from pyanyapi.parsers import YAMLParser
 94 | 
 95 | 
 96 |     >>> YAMLParser({'test': 'container > test'}).parse('container:\n    test: "123"').test
 97 |     123
 98 | 
 99 | Regular Expressions Interface
100 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 | 
102 | In case, when data has wrong format or is just very complicated to be parsed
103 | with bundled tools, you can use a parser based on regular expressions.
104 | Settings are based on Python's regular expressions. It is the most powerful
105 | parser, because of its simplicity.
106 | 
107 | .. code-block:: python
108 | 
109 |     from pyanyapi.parsers import RegExpParser
110 | 
111 | 
112 |     >>> RegExpParser({'error_code': 'Error (\d+)'}).parse('Oh no!!! It is Error 100!!!').error_code
113 |     100
114 | 
115 | And executes more queries after initial parsing:
116 | 
117 | .. code-block:: python
118 | 
119 |     from pyanyapi.parsers import RegExpParser
120 | 
121 | 
122 |     >>> api = RegExpParser({'digits': '\d+'}).parse('123abc')
123 |     >>> api.parse('[a-z]+')
124 |     abc
125 | 
126 | Also, you can pass flags for regular expressions on parser initialization:
127 | 
128 | .. code-block:: python
129 | 
130 |     from pyanyapi.parsers import RegExpParser
131 | 
132 | 
133 |     >>> RegExpParser({'test': '\d+.\d+'}).parse('123\n234').test
134 |     123
135 |     >>> RegExpParser({'test': '\d+.\d+'}, flags=re.DOTALL).parse('123\n234').test
136 |     123
137 |     234
138 | 
139 | 
140 | CSV Interface
141 | ~~~~~~~~~~~~~
142 | 
143 | Operates with CSV data with simple queries in format 'row_id:column_id'.
144 | 
145 | .. code-block:: python
146 | 
147 |     from pyanyapi.parsers import CSVParser
148 | 
149 | 
150 |     >>> CSVParser({'value': '1:2'}).parse('1,2,3\r\n4,5,6\r\n').value
151 |     6
152 | 
153 | Also, you can pass custom kwargs for `csv.reader` on parser initialization:
154 | 
155 | .. code-block:: python
156 | 
157 |     from pyanyapi.parsers import CSVParser
158 | 
159 | 
160 |     >>> CSVParser({'value': '1:2'}, delimiter=';').parse('1;2;3\r\n4;5;6\r\n').value
161 |     6
162 | 
163 | AJAX Interface
164 | ~~~~~~~~~~~~~~
165 | 
166 | AJAX is a very popular technology and often use JSON data with HTML values. Here is an example:
167 | 
168 | .. code-block:: python
169 | 
170 |     from pyanyapi.parsers import AJAXParser
171 | 
172 | 
173 |     >>> api = AJAXParser({'p': 'content > string(//p)'}).parse('{"content": "<p>Pcontent</p>"}')
174 |     >>> api.p
175 |     Pcontent
176 | 
177 | It uses combination of XPath queries and PostgreSQL-based JSON lookups.
178 | Custom queries execution is also available:
179 | 
180 | .. code-block:: python
181 | 
182 |     from pyanyapi.parsers import AJAXParser
183 | 
184 | 
185 |     >>> api = AJAXParser().parse('{"content": "<p>Pcontent</p><span>123</span>"}')
186 |     >>> api.parse('content > string(//span)')
187 |     123
188 | 
189 | 
190 | Custom Interface
191 | ~~~~~~~~~~~~~~~~
192 | 
193 | You can easily declare your own interface. For that you should define
194 | ``execute_method`` method. And optionally ``perform_parsing``. Here is
195 | an example of naive CSVInterface, which provides an ability to get the column
196 | value by index. Also you should create a separate parser for that.
197 | 
198 | .. code-block:: python
199 | 
200 |     from pyanyapi.interfaces import BaseInterface
201 |     from pyanyapi.parsers import BaseParser
202 | 
203 | 
204 |     class CSVInterface(BaseInterface):
205 | 
206 |         def perform_parsing(self):
207 |             return self.content.split(',')
208 | 
209 |         def execute_method(self, settings):
210 |             return self.parsed_content[settings]
211 | 
212 | 
213 |     class CSVParser(BaseParser):
214 |         interface_class = CSVInterface
215 | 
216 | 
217 |     >>> CSVParser({'second': 1}).parse('1,2,3').second
218 |     2
219 | 
220 | Extending interfaces
221 | ~~~~~~~~~~~~~~~~~~~~
222 | 
223 | Also content can be parsed with regular Python code. It can be done with
224 | special decorators ``interface_method`` and ``interface_property``.
225 | 
226 | Custom method example:
227 | 
228 | .. code-block:: python
229 | 
230 |     from pyanyapi.decorators import interface_method
231 |     from pyanyapi.parsers import interface_method
232 | 
233 | 
234 |     class ParserWithMethod(HTMLParser):
235 |         settings = {'occupation': 'string(.//p/text())'}
236 | 
237 |         @interface_method
238 |         def hello(self, name):
239 |             return name + ' is ' + self.occupation
240 | 
241 | 
242 |     >>> api = ParserWithMethod().parse('<html><body><p>programmer</p></body></html>')
243 |     >>> api.occupation
244 |     programmer
245 | 
246 |     >>> api.hello('John')
247 |     John is programmer
248 | 
249 | Custom property example:
250 | 
251 | .. code-block:: python
252 | 
253 |     from pyanyapi.decorators import interface_property
254 |     from pyanyapi.parsers import HTMLParser
255 | 
256 | 
257 |     class ParserWithProperty(HTMLParser):
258 |         settings = {'p': 'string(.//p/text())', 'h1': 'string(.//h1/text())'}
259 | 
260 |         @interface_property
261 |         def test(self):
262 |             return self.h1 + ' ' + self.p
263 | 
264 | 
265 |     >>> api = ParserWithProperty().parse('<html><body><h1>This is</h1><p>test</p></body></html>')
266 |     >>> api.h1
267 |     This is
268 | 
269 |     >>> api.p
270 |     test
271 | 
272 |     >>> api.test
273 |     This is test
274 | 
275 | Certainly the previous example can be done with more complex XPath
276 | expression, but in general case XPath is not enough.
277 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx_rtd_theme


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
  1 | .. _usage:
  2 | 
  3 | Usage
  4 | =====
  5 | 
  6 | The library provides an ability to create API over various content.
  7 | Currently there are bundled tools to work with HTML, XML, CSV, JSON and YAML.
  8 | Initially it was created to work with ``requests`` library.
  9 | 
 10 | Basic setup
 11 | ~~~~~~~~~~~
 12 | 
 13 | Basic parsers can be declared in the following way:
 14 | 
 15 | .. code-block:: python
 16 | 
 17 |     from pyanyapi.parsers import HTMLParser
 18 | 
 19 | 
 20 |     class SimpleParser(HTMLParser):
 21 |         settings = {'header': 'string(.//h1/text())'}
 22 | 
 23 | 
 24 |     >>> api = SimpleParser().parse('<html><body><h1>Value</h1></body></html>')
 25 |     >>> api.header
 26 |     Value
 27 | 
 28 | Or it can be configured in runtime:
 29 | 
 30 | .. code-block:: python
 31 | 
 32 |     from pyanyapi.parsers import HTMLParser
 33 | 
 34 | 
 35 |     >>> api = HTMLParser({
 36 |         'header': 'string(.//h1/text())'
 37 |     }).parse('<html><body><h1>Value</h1></body></html>')
 38 |     >>> api.header
 39 |     Value
 40 | 
 41 | To get all parsing results as a dict there is ``parse_all`` method.
 42 | All properties (include defined with ``@interface_property`` decorator) will be returned.
 43 | 
 44 | .. code-block:: python
 45 | 
 46 |     from pyanyapi.parsers import JSONParser
 47 | 
 48 |     >>> JSONParser({
 49 |         'first': 'container > 0',
 50 |         'second': 'container > 1',
 51 |         'third': 'container > 2',
 52 |     }).parse('{"container":["first", "second", "third"]}').parse_all()
 53 |     {
 54 |         'first': 'first',
 55 |         'second': 'second',
 56 |         'third': 'third',
 57 |     }
 58 | 
 59 | Complex setup
 60 | ~~~~~~~~~~~~~
 61 | 
 62 | In some cases you may want to apply extra transformations to result
 63 | list. Here comes "base-children" setup style.
 64 | 
 65 | .. code-block:: python
 66 | 
 67 |     from pyanyapi.parsers import HTMLParser
 68 | 
 69 | 
 70 |     class SimpleParser(HTMLParser):
 71 |         settings = {
 72 |             'test': {
 73 |                 'base': '//test',
 74 |                 'children': 'text()|*//text()'
 75 |             }
 76 |         }
 77 | 
 78 | 
 79 |     >>> api = SimpleParser().parse('<xml><test>123 </test><test><inside> 234</inside></test></xml>')
 80 |     >>> api.test
 81 |     ['123 ', ' 234']
 82 | 
 83 | There is another option to interact with sub-elements. Sub parsers!
 84 | 
 85 | .. code-block:: python
 86 | 
 87 |     from pyanyapi.parsers import HTMLParser
 88 | 
 89 | 
 90 |     class SubParser(HTMLParser):
 91 |         settings = {
 92 |             'href': 'string(//@href)',
 93 |             'text': 'string(//text())'
 94 |         }
 95 | 
 96 | 
 97 |     class Parser(HTMLParser):
 98 |         settings = {
 99 |             'elem': {
100 |                 'base': './/a',
101 |                 'parser': SubParser
102 |             }
103 |         }
104 | 
105 |     >>> api = Parser().parse("<html><body><a href='#test'>test</a></body></html>")
106 |     >>> api.elem[0].href
107 |     #test
108 |     >>> api.elem[0].text
109 |     test
110 |     >>> api.parse_all()
111 |     {'elem': [{'href': '#test', 'text': 'test'}]}
112 | 
113 | Also you can pass sub parsers as classes or like instances.
114 | 
115 | Settings inheritance
116 | ~~~~~~~~~~~~~~~~~~~~
117 | 
118 | Settings attribute is merged from all ancestors of current parser.
119 | 
120 | .. code-block:: python
121 | 
122 |     from pyanyapi.parsers import HTMLParser
123 | 
124 | 
125 |     class ParentParser(HTMLParser):
126 |         settings = {'parent': '//p'}
127 | 
128 | 
129 |     class FirstChildParser(ParentParser):
130 |         settings = {'parent': '//override'}
131 | 
132 | 
133 |     class SecondChildParser(ParentParser):
134 |         settings = {'child': '//h1'}
135 | 
136 | 
137 |     >>> FirstChildParser().settings['parent']
138 |     //override
139 | 
140 |     >>> SecondChildParser().settings['parent']
141 |     //p
142 | 
143 |     >>> SecondChildParser().settings['child']
144 |     //h1
145 | 
146 |     >>> SecondChildParser({'child': '//more'}).settings['child']
147 |     //more
148 | 
149 | Results stripping
150 | ~~~~~~~~~~~~~~~~~
151 | 
152 | Parsers can automagically strip trailing whitespaces with ``strip=True`` option.
153 | 
154 | .. code-block:: python
155 | 
156 |     from pyanyapi.parsers import XMLParser
157 | 
158 | 
159 |     >>> settings = {'p': 'string(//p)'}
160 |     >>> XMLParser(settings).parse('<p> Pcontent </p>').p
161 |      Pcontent
162 |     >>> XMLParser(settings, strip=True).parse('<p> Pcontent </p>').p
163 |     Pcontent
164 | 


--------------------------------------------------------------------------------
/pyanyapi/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | """
3 | Module provides tools for convenient interface creation over various types of data in a declarative way.
4 | """
5 | 
6 | 
7 | __version__ = '0.6.1'
8 | 


--------------------------------------------------------------------------------
/pyanyapi/_compat.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | 
 4 | try:
 5 |     from lxml import etree, objectify
 6 | 
 7 |     HTMLParser = etree.HTMLParser
 8 |     XMLParser = etree.XMLParser
 9 | except ImportError:
10 |     etree = None
11 |     objectify = None
12 |     HTMLParser = None
13 |     XMLParser = None
14 | 
15 | try:
16 |     import ujson as json
17 | except ImportError:
18 |     import json
19 | 
20 | 
21 | try:
22 |     string_types = (str, unicode)
23 | except NameError:
24 |     string_types = (str, )
25 | 


--------------------------------------------------------------------------------
/pyanyapi/decorators.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | 
 4 | def interface_property(method):
 5 |     """
 6 |     Marks method to be included in parsing result as property.
 7 |     """
 8 |     method._interface_property = True
 9 |     return staticmethod(method)
10 | 
11 | 
12 | def interface_method(method):
13 |     """
14 |     Marks method to be included in parsing result.
15 |     """
16 |     method._interface_method = True
17 |     return staticmethod(method)
18 | 


--------------------------------------------------------------------------------
/pyanyapi/exceptions.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | 
 3 | 
 4 | class ResponseParseError(Exception):
 5 |     """
 6 |     Raises when data can not be parsed with specified parser.
 7 |     """
 8 | 
 9 |     def __init__(self, message, content=None):
10 |         super(ResponseParseError, self).__init__(message)
11 |         self.content = content
12 | 


--------------------------------------------------------------------------------
/pyanyapi/helpers.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | """
 3 | Functions to dynamically attach attributes to classes.
 4 | Most of parsing results are cached because of immutability of input data.
 5 | """
 6 | 
 7 | 
 8 | class cached_property(object):
 9 |     """
10 |     Copied from Django.
11 |     """
12 |     def __init__(self, func):
13 |         self.func = func
14 | 
15 |     def __get__(self, instance, type=None):
16 |         res = instance.__dict__[self.func.__name__] = self.func(instance)
17 |         return res
18 | 
19 | 
20 | def memoize(f):
21 |     memo = {}
22 | 
23 |     def inner(key):
24 |         if key not in memo:
25 |             memo[key] = f(key)
26 |         return memo[key]
27 | 
28 |     return inner
29 | 
30 | 
31 | def attach_attribute(target, name, attr):
32 |     attr.__name__ = name
33 |     attr._attached = True
34 |     setattr(target, name, attr)
35 | 
36 | 
37 | def attach_cached_property(target, name, prop):
38 |     method = cached_property(prop)
39 |     attach_attribute(target, name, method)
40 | 


--------------------------------------------------------------------------------
/pyanyapi/interfaces.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | Classes to be filled with interface declarations.
  4 | """
  5 | import csv
  6 | import re
  7 | import sys
  8 | 
  9 | import yaml
 10 | 
 11 | from ._compat import json, etree, objectify, XMLParser, HTMLParser, string_types
 12 | from .exceptions import ResponseParseError
 13 | from .helpers import memoize
 14 | 
 15 | 
 16 | DICT_LOOKUP = ' > '
 17 | 
 18 | 
 19 | def expand_results(value):
 20 |     if isinstance(value, list):
 21 |         return [item.parse_all() for item in value]
 22 |     return value
 23 | 
 24 | 
 25 | class BaseInterface(object):
 26 |     """
 27 |     Basic dynamically generated interface.
 28 |     """
 29 |     content = None
 30 |     empty_result = None
 31 | 
 32 |     def __init__(self, content, strip=False):
 33 |         self.content = content
 34 |         self.strip = strip
 35 |         self.parse = memoize(self.parse)
 36 | 
 37 |     @classmethod
 38 |     def init_attr(cls, settings):
 39 | 
 40 |         def inner(self):
 41 |             return cls.execute_method(self, settings)
 42 | 
 43 |         return inner
 44 | 
 45 |     def execute_method(self, settings):
 46 |         raise NotImplementedError
 47 | 
 48 |     @property
 49 |     def parsed_content(self):
 50 |         if not hasattr(self, '_parsed_content'):
 51 |             self._parsed_content = self.perform_parsing()
 52 |         return self._parsed_content
 53 | 
 54 |     def perform_parsing(self):
 55 |         raise NotImplementedError
 56 | 
 57 |     def parse(self, query):
 58 |         raise NotImplementedError
 59 | 
 60 |     def parse_all(self):
 61 |         """
 62 |         Processes all available properties and returns results as dictionary.
 63 |         """
 64 |         return dict(
 65 |             (key, expand_results(getattr(self, key, self.empty_result)))
 66 |             for key, attr in self.__class__.__dict__.items()
 67 |             if hasattr(attr, '_attached') and type(attr).__name__ == 'cached_property'
 68 |         )
 69 | 
 70 |     def maybe_strip(self, value):
 71 |         if self.strip and isinstance(value, string_types):
 72 |             return value.strip()
 73 |         return value
 74 | 
 75 | 
 76 | # Uses as fallback. None - can be obtained from JSON's null, any string also can be, so unique object is a best choice
 77 | EMPTY_RESULT = object()
 78 | 
 79 | 
 80 | class CombinedInterface(BaseInterface):
 81 | 
 82 |     def __init__(self, parsers, *args, **kwargs):
 83 |         self.parsers = parsers
 84 |         super(CombinedInterface, self).__init__(*args, **kwargs)
 85 | 
 86 |     def __getattribute__(self, item):
 87 |         # Catch self.parsers and dynamically attached attributes
 88 |         try:
 89 |             return super(CombinedInterface, self).__getattribute__(item)
 90 |         except AttributeError:
 91 |             return self.walk(item)
 92 | 
 93 |     def walk(self, item):
 94 |         """
 95 |         Recursively walks through all available parsers.
 96 |         """
 97 |         for parser in self.parsers:
 98 |             try:
 99 |                 if item not in parser.attributes:
100 |                     continue
101 |                 result = getattr(parser.parse(self.content), item, EMPTY_RESULT)
102 |                 # Ignore empty results in current parser
103 |                 if result in (EMPTY_RESULT, parser.interface_class.empty_result):
104 |                     continue
105 |                 return result
106 |             except (AttributeError, ResponseParseError):
107 |                 pass
108 | 
109 |     def parse_all(self):
110 |         result = super(CombinedInterface, self).parse_all()
111 |         for parser in self.parsers:
112 |             result.update(parser.parse_all(self.content))
113 |         return result
114 | 
115 | 
116 | class XPathInterface(BaseInterface):
117 |     """
118 |     Uses as base class for HTML/XML-based content.
119 |     Use XPath 1.0 syntax, which is compatible with LXML.
120 |     Because of lack of support of XPath 2.0 some parts of settings structure
121 |     is not intuitive.
122 |     Settings example:
123 | 
124 |     {
125 |         'errors': {
126 |             'base': "//ul[@class='alerts']/div",
127 |             'children': 'text()|*//text()'
128 |         }
129 |     }
130 | 
131 |     'children' key usually uses for modification of result of 'base' expression
132 |     before concatenation.
133 |     """
134 |     parser_class = HTMLParser
135 |     empty_result = ''
136 |     _error_message = 'HTML data can not be parsed.'
137 | 
138 |     def perform_parsing(self):
139 |         try:
140 |             return etree.fromstring(self.content, self.parser_class())
141 |         except etree.XMLSyntaxError:
142 |             raise ResponseParseError(self._error_message, self.content)
143 | 
144 |     def execute_method(self, settings):
145 |         if isinstance(settings, dict):
146 |             result = self.parse(settings['base'])
147 |             child_query = settings.get('children')
148 |             if child_query:
149 |                 return [self.maybe_strip(''.join(element.xpath(child_query))) for element in result]
150 |             sub_parser = settings.get('parser')
151 |             if sub_parser:
152 |                 return [
153 |                     (sub_parser() if callable(sub_parser) else sub_parser).parse(etree.tostring(element))
154 |                     for element in result
155 |                 ]
156 |             return result
157 | 
158 |         return self.parse(settings)
159 | 
160 |     def parse(self, query):
161 |         return self.maybe_strip(self.parsed_content.xpath(query))
162 | 
163 | 
164 | class XMLInterface(XPathInterface):
165 |     parser_class = XMLParser
166 |     _error_message = 'XML data can not be parsed.'
167 | 
168 | 
169 | class XMLObjectifyInterface(BaseInterface):
170 |     """
171 |     Parse XML in the way, that its attributes can be accessed like attributes of python object:
172 | 
173 |     <xml><test>123</test></xml>
174 | 
175 |     From it you can get:
176 |     >> obj.test
177 |     123
178 |     >> obj.not_test
179 |     None
180 | 
181 |     Also this interface does not require any settings.
182 |     """
183 |     _error_message = 'XML data can not be parsed.'
184 | 
185 |     def __init__(self, content, strip=False):
186 |         assert not (strip and hasattr(sys, 'pypy_translation_info') and sys.version_info[0] == 2), \
187 |             'Stripping is not supported on PyPy'
188 |         super(XMLObjectifyInterface, self).__init__(content, strip)
189 | 
190 |     def perform_parsing(self):
191 |         try:
192 |             return objectify.fromstring(self.content)
193 |         except etree.XMLSyntaxError:
194 |             raise ResponseParseError(self._error_message, self.content)
195 | 
196 |     def __getattribute__(self, item):
197 |         try:
198 |             return super(XMLObjectifyInterface, self).__getattribute__(item)
199 |         except AttributeError:
200 |             if item == '_parsed_content':
201 |                 raise
202 |             try:
203 |                 return self.maybe_strip(self.parsed_content.__getattribute__(item))
204 |             except AttributeError:
205 |                 return None
206 | 
207 |     def maybe_strip(self, value):
208 |         if self.strip and isinstance(value, objectify.ObjectifiedElement):
209 |             if isinstance(value, objectify.StringElement) and value.text is not None:
210 |                 value = value.text.strip()
211 |             else:
212 |                 for key, inner_value in value.__dict__.items():
213 |                     value[key] = self.maybe_strip(inner_value)
214 |         return value
215 | 
216 | 
217 | class DictInterface(BaseInterface):
218 |     """
219 |     Interface for python dictionaries. Based on PostgreSQL statements syntax.
220 | 
221 |     {
222 |         'external_id': 'container > id'
223 |     }
224 | 
225 |     which will get "123" from {"container":{"id":"123"}}
226 |     """
227 | 
228 |     def get_from_dict(self, target, query):
229 |         if not target:
230 |             return target
231 |         action_list = query.split(DICT_LOOKUP)
232 |         for action in action_list:
233 |             if target:
234 |                 action = action.strip()
235 |                 if isinstance(target, dict):
236 |                     target = target.get(action, self.empty_result)
237 |                 else:
238 |                     try:
239 |                         target = target[int(action)]
240 |                     except (IndexError, TypeError, ValueError):
241 |                         return self.empty_result
242 |             else:
243 |                 return target
244 |         return self.maybe_strip(target)
245 | 
246 |     def execute_method(self, settings):
247 |         if isinstance(settings, dict):
248 |             result = self.parse(settings['base'])
249 |             child_query = settings.get('children')
250 |             if child_query:
251 |                 return [
252 |                     self.get_from_dict(r, child_query) or self.empty_result for r in result
253 |                 ] if result else self.empty_result
254 |             return result
255 | 
256 |         return self.parse(settings)
257 | 
258 |     def parse(self, query):
259 |         return self.get_from_dict(self.parsed_content, query)
260 | 
261 | 
262 | class JSONInterface(DictInterface):
263 |     _error_message = 'JSON data can not be parsed.'
264 | 
265 |     def perform_parsing(self):
266 |         try:
267 |             return json.loads(self.content)
268 |         except (ValueError, TypeError):
269 |             raise ResponseParseError(self._error_message, self.content)
270 | 
271 | 
272 | class YAMLInterface(DictInterface):
273 |     _error_message = 'YAML data can not be parsed.'
274 | 
275 |     def perform_parsing(self):
276 |         try:
277 |             return yaml.safe_load(self.content)
278 |         except yaml.error.YAMLError:
279 |             raise ResponseParseError(self._error_message, self.content)
280 | 
281 | 
282 | class AJAXInterface(JSONInterface):
283 |     """
284 |     Allows to execute XPath, combined with dictionary-based lookups from DictInterface.
285 | 
286 |     {
287 |         'p': 'container > string(//p)'
288 |     }
289 | 
290 |     which will get "p_content" from {"container":"<p>p_content</p>"}
291 |     """
292 |     inner_interface_class = XPathInterface
293 | 
294 |     def __init__(self, *args, **kwargs):
295 |         self._inner_cache = {}
296 |         super(AJAXInterface, self).__init__(*args, **kwargs)
297 | 
298 |     def get_inner_interface(self, text, json_part):
299 |         if json_part not in self._inner_cache:
300 |             inner_content = super(AJAXInterface, self).get_from_dict(text, json_part)
301 |             self._inner_cache[json_part] = self.inner_interface_class(inner_content, self.strip)
302 |         return self._inner_cache[json_part]
303 | 
304 |     def get_from_dict(self, target, query):
305 |         json_part, xpath_part = query.rsplit(DICT_LOOKUP, 1)
306 |         inner_interface = self.get_inner_interface(target, json_part)
307 |         try:
308 |             return inner_interface.parse(xpath_part)
309 |         except (etree.XMLSyntaxError, ValueError):
310 |             return inner_interface.empty_result
311 | 
312 | 
313 | class RegExpInterface(BaseInterface):
314 |     """
315 |     Parser based on regular expressions. It is the most powerful parser, because of
316 |     its simplicity.
317 |     Settings example:
318 | 
319 |     {
320 |         "result": "^ok$",
321 |         "errors": "^Error \d+$",
322 |     }
323 | 
324 |     So, response will be like 'ok' or 'Error 100'.
325 |     """
326 | 
327 |     def __init__(self, content, strip=False, flags=0):
328 |         self.flags = flags
329 |         super(RegExpInterface, self).__init__(content, strip)
330 | 
331 |     def execute_method(self, settings):
332 |         matches = re.findall(settings, self.content, self.flags)
333 |         if matches:
334 |             return self.maybe_strip(matches[0])
335 |         return self.empty_result
336 | 
337 |     def parse(self, query):
338 |         return self.execute_method(query)
339 | 
340 | 
341 | class CSVInterface(BaseInterface):
342 |     """
343 |     Operates with CSV data with simple queries in format 'row_id:column_id'.
344 | 
345 |     {
346 |         "value": "1:2"
347 |     }
348 | 
349 |     Will get 6 from "1,2,3\r\n4,5,6"
350 |     """
351 |     _error_message = 'CSV data can not be parsed.'
352 | 
353 |     def __init__(self, content, strip=False, **reader_kwargs):
354 |         self.reader_kwargs = reader_kwargs
355 |         super(CSVInterface, self).__init__(content, strip)
356 | 
357 |     def perform_parsing(self):
358 |         try:
359 |             return list(csv.reader(self.content.split(), **self.reader_kwargs))
360 |         except (TypeError, AttributeError):
361 |             raise ResponseParseError(self._error_message, self.content)
362 | 
363 |     def execute_method(self, settings):
364 |         row, column = settings.split(':')
365 |         try:
366 |             return self.parsed_content[int(row)][int(column)]
367 |         except (IndexError, TypeError):
368 |             return self.empty_result
369 | 
370 |     def parse(self, query):
371 |         return self.execute_method(query)
372 | 
373 | 
374 | class IndexOfInterface(BaseInterface):
375 |     """
376 |     Simple interface that tries to find specified string inside another string, storing boolean values.
377 |     Settings example:
378 |     {
379 |         "has_bar": "bar",
380 |         "has_foo": "foo"
381 |     }
382 |     If content contains "bar" string, interface property "has_bar" will be True.
383 |     """
384 |     _error_message = 'Can not perform string search.'
385 | 
386 |     def execute_method(self, settings):
387 |         try:
388 |             return str(settings) in str(self.content)
389 |         except (TypeError, ValueError):
390 |             raise ResponseParseError(self._error_message, self.content)
391 | 
392 |     def parse(self, query):
393 |         return self.execute_method(query)
394 | 


--------------------------------------------------------------------------------
/pyanyapi/parsers.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | """
  3 | Classes for fabrics of interfaces.
  4 | Generates interfaces dynamically from given settings.
  5 | """
  6 | from ._compat import etree
  7 | from .interfaces import (
  8 |     XPathInterface,
  9 |     XMLInterface,
 10 |     XMLObjectifyInterface,
 11 |     JSONInterface,
 12 |     YAMLInterface,
 13 |     AJAXInterface,
 14 |     RegExpInterface,
 15 |     CSVInterface,
 16 |     CombinedInterface,
 17 |     IndexOfInterface,
 18 | )
 19 | from .helpers import attach_attribute, attach_cached_property
 20 | 
 21 | 
 22 | class BaseParser(object):
 23 |     """
 24 |     Fabric for some API-like components, which supposes to provide interface to different types of content.
 25 |     """
 26 |     interface_class = None
 27 |     strip = False
 28 | 
 29 |     def __init__(self, settings=None, strip=None):
 30 |         if strip is not None:
 31 |             self.strip = strip
 32 |         parents_settings = self.get_parents_settings()
 33 |         if settings:
 34 |             parents_settings.update(settings)
 35 |         self.settings = parents_settings
 36 | 
 37 |     @property
 38 |     def attributes(self):
 39 |         extra_attributes = []
 40 |         for name in dir(self):
 41 |             if name == 'attributes':
 42 |                 continue
 43 |             attr = getattr(self, name)
 44 |             if hasattr(attr, '_interface_property') or hasattr(attr, '_interface_method'):
 45 |                 extra_attributes.append(name)
 46 |         return list(self.settings.keys()) + extra_attributes
 47 | 
 48 |     def get_parents_settings(self):
 49 |         """
 50 |         Gather settings from parent classes. It provides some kind of settings inheritance.
 51 |         """
 52 |         parents_settings = {}
 53 |         for klass in reversed(self.__class__.mro()):
 54 |             parents_settings.update(getattr(klass, 'settings', {}))
 55 |         return parents_settings
 56 | 
 57 |     def parse(self, content=''):
 58 |         """
 59 |         Generates new class instance with desired attributes.
 60 |         """
 61 |         self.content = self.prepare_content(content)
 62 | 
 63 |         class Interface(self.interface_class):
 64 |             pass
 65 | 
 66 |         self.setup_class(Interface)
 67 | 
 68 |         init_kwargs = self.get_interface_kwargs()
 69 | 
 70 |         return Interface(**init_kwargs)
 71 | 
 72 |     def parse_all(self, content=''):
 73 |         return self.parse(content).parse_all()
 74 | 
 75 |     def get_interface_kwargs(self):
 76 |         return {'content': self.content, 'strip': self.strip}
 77 | 
 78 |     def prepare_content(self, content):
 79 |         """
 80 |         Hook to provide way to transform content.
 81 |         """
 82 |         return content
 83 | 
 84 |     def setup_class(self, cls):
 85 |         """
 86 |         Attaches dynamic properties & methods.
 87 |         """
 88 |         self.process_settings(cls)
 89 |         self.process_decorators(cls)
 90 | 
 91 |     def process_settings(self, cls):
 92 |         """
 93 |         Generates methods, based on settings.
 94 |         """
 95 |         for name, settings in self.settings.items():
 96 |             attr = cls.init_attr(settings)
 97 |             attach_cached_property(cls, name, attr)
 98 | 
 99 |     def process_decorators(self, cls):
100 |         """
101 |         Re-attach all attributes, which is decorated with
102 |         @interface_property or @interface_method decorators to new class.
103 |         """
104 |         for name in dir(self):
105 |             attr = getattr(self, name)
106 |             if getattr(attr, '_interface_property', False):
107 |                 attach_cached_property(cls, name, attr)
108 |             elif getattr(attr, '_interface_method', False):
109 |                 attach_attribute(cls, name, attr)
110 | 
111 |     def __and__(self, other):
112 |         return CombinedParser(self, other)
113 | 
114 | 
115 | class CombinedParser(BaseParser):
116 |     """
117 |     Combines multiple parsers in one. This can also be in different types.
118 |     """
119 |     interface_class = CombinedInterface
120 | 
121 |     def __init__(self, *parsers, **kwargs):
122 |         if parsers:
123 |             self.parsers = parsers
124 |         super(CombinedParser, self).__init__(**kwargs)
125 | 
126 |     @property
127 |     def attributes(self):
128 |         return super(CombinedParser, self).attributes + sum([parser.attributes for parser in self.parsers], [])
129 | 
130 |     def get_interface_kwargs(self):
131 |         kwargs = super(CombinedParser, self).get_interface_kwargs()
132 |         kwargs['parsers'] = self.parsers
133 |         return kwargs
134 | 
135 | 
136 | class LXMLParser(BaseParser):
137 | 
138 |     def parse(self, *args, **kwargs):
139 |         assert etree, 'Using %s, but lxml is not installed' % self.__class__.__name__
140 |         return super(LXMLParser, self).parse(*args, **kwargs)
141 | 
142 | 
143 | class HTMLParser(LXMLParser):
144 |     interface_class = XPathInterface
145 | 
146 | 
147 | class XMLParser(LXMLParser):
148 |     interface_class = XMLInterface
149 | 
150 |     def prepare_content(self, content):
151 |         if isinstance(content, bytes):
152 |             declaration, replacement = b'encoding="UTF-8"', b''
153 |         else:
154 |             declaration, replacement = 'encoding="UTF-8"', ''
155 |         return content.replace(declaration, replacement).replace(declaration.lower(), replacement)
156 | 
157 | 
158 | class XMLObjectifyParser(XMLParser):
159 |     interface_class = XMLObjectifyInterface
160 | 
161 | 
162 | class JSONParser(BaseParser):
163 |     interface_class = JSONInterface
164 | 
165 | 
166 | class YAMLParser(BaseParser):
167 |     interface_class = YAMLInterface
168 | 
169 | 
170 | class AJAXParser(LXMLParser):
171 |     interface_class = AJAXInterface
172 | 
173 | 
174 | class RegExpParser(BaseParser):
175 |     interface_class = RegExpInterface
176 | 
177 |     def __init__(self, settings=None, strip=None, flags=0):
178 |         self.flags = flags
179 |         super(RegExpParser, self).__init__(settings, strip)
180 | 
181 |     def get_interface_kwargs(self):
182 |         kwargs = super(RegExpParser, self).get_interface_kwargs()
183 |         kwargs['flags'] = self.flags
184 |         return kwargs
185 | 
186 | 
187 | class CSVParser(BaseParser):
188 |     interface_class = CSVInterface
189 | 
190 |     def __init__(self, settings=None, strip=None, **reader_kwargs):
191 |         self.reader_kwargs = reader_kwargs
192 |         super(CSVParser, self).__init__(settings, strip)
193 | 
194 |     def get_interface_kwargs(self):
195 |         kwargs = super(CSVParser, self).get_interface_kwargs()
196 |         kwargs.update(self.reader_kwargs)
197 |         return kwargs
198 | 
199 | 
200 | class IndexOfParser(BaseParser):
201 |     interface_class = IndexOfInterface
202 | 
203 |     def prepare_content(self, content):
204 |         if isinstance(content, bytes):
205 |             try:
206 |                 return content.decode()
207 |             except UnicodeDecodeError:  # For python 2/3 compatibility
208 |                 pass
209 |         return content
210 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | import platform
 4 | import sys
 5 | 
 6 | from setuptools import setup
 7 | from setuptools.command.test import test as TestCommand
 8 | 
 9 | import pyanyapi
10 | 
11 | 
12 | PYPY = hasattr(sys, 'pypy_translation_info')
13 | PYPY3 = PYPY and sys.version_info[0] == 3
14 | JYTHON = platform.system() == 'Java'
15 | 
16 | 
17 | class PyTest(TestCommand):
18 |     user_options = [('pytest-args=', 'a', "Arguments to pass into py.test")]
19 | 
20 |     def initialize_options(self):
21 |         TestCommand.initialize_options(self)
22 |         self.pytest_args = []
23 | 
24 |     def finalize_options(self):
25 |         TestCommand.finalize_options(self)
26 |         self.test_args = []
27 |         self.test_suite = True
28 | 
29 |     def run_tests(self):
30 |         import pytest
31 | 
32 |         errno = pytest.main(self.pytest_args)
33 |         sys.exit(errno)
34 | 
35 | 
36 | requirements = ['PyYAML>=3.11']
37 | test_requirements = ['pytest>=2.8.0,<3.0.0']
38 | 
39 | 
40 | if sys.version_info < (3, 3):
41 |     test_requirements.append('mock==1.0.1')
42 | if sys.version_info[:2] == (3, 2):
43 |     test_requirements.append('coverage==3.7.1')
44 | 
45 | if not JYTHON:
46 |     if not PYPY:
47 |         requirements.append('ujson')
48 |     if not PYPY3:
49 |         if PYPY:
50 |             requirements.append('lxml<3.5')
51 |         else:
52 |             requirements.append('lxml')
53 |     test_requirements.append('pytest-cov>=1.8')
54 | 
55 | 
56 | setup(
57 |     name='pyanyapi',
58 |     url='https://github.com/Stranger6667/pyanyapi',
59 |     version=pyanyapi.__version__,
60 |     packages=['pyanyapi'],
61 |     license='MIT',
62 |     author='Dmitry Dygalo',
63 |     author_email='dadygalo@gmail.com',
64 |     maintainer='Dmitry Dygalo',
65 |     maintainer_email='dadygalo@gmail.com',
66 |     keywords=['parsing', 'interface', 'xml', 'json', 'csv', 'yaml', 'API', 'XPath', 'regexp'],
67 |     description='Tools for convenient interface creation over various types of data in a declarative way.',
68 |     classifiers=[
69 |         'Development Status :: 5 - Production/Stable',
70 |         'Environment :: Console',
71 |         'Intended Audience :: Developers',
72 |         'Operating System :: OS Independent',
73 |         'License :: OSI Approved :: MIT License',
74 |         'Programming Language :: Python',
75 |         'Programming Language :: Python :: 2.6',
76 |         'Programming Language :: Python :: 2.7',
77 |         'Programming Language :: Python :: 3.2',
78 |         'Programming Language :: Python :: 3.3',
79 |         'Programming Language :: Python :: 3.4',
80 |         'Programming Language :: Python :: 3.5',
81 |         'Programming Language :: Python :: Implementation :: CPython',
82 |         'Programming Language :: Python :: Implementation :: PyPy',
83 |         'Programming Language :: Python :: Implementation :: Jython',
84 |         'Topic :: Text Processing :: General',
85 |         'Topic :: Utilities',
86 |     ],
87 |     cmdclass={'test': PyTest},
88 |     include_package_data=True,
89 |     install_requires=requirements,
90 |     tests_require=test_requirements
91 | )
92 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 


--------------------------------------------------------------------------------
/tests/_compat.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | 
3 | 
4 | try:
5 |     from mock import patch
6 | except ImportError:
7 |     from unittest.mock import patch
8 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import platform
  3 | import sys
  4 | 
  5 | import pytest
  6 | 
  7 | from pyanyapi.decorators import interface_property, interface_method
  8 | from pyanyapi.parsers import JSONParser, RegExpParser, CombinedParser, HTMLParser
  9 | 
 10 | 
 11 | class EmptyValuesParser(CombinedParser):
 12 |     parsers = [
 13 |         RegExpParser({'test': '\d,\d'}),
 14 |         JSONParser(
 15 |             {
 16 |                 'test': {
 17 |                     'base': 'container > test',
 18 |                 },
 19 |                 'second': {
 20 |                     'base': 'container > fail > 1',
 21 |                 },
 22 |                 'third': {
 23 |                     'base': 'container',
 24 |                     'children': '0'
 25 |                 },
 26 |                 'null': {
 27 |                     'base': 'container',
 28 |                 }
 29 |             }
 30 |         )
 31 |     ]
 32 | 
 33 |     @interface_property
 34 |     def combined(self):
 35 |         return '123-' + self.null
 36 | 
 37 | 
 38 | @pytest.fixture
 39 | def empty_values_parser():
 40 |     return EmptyValuesParser()
 41 | 
 42 | 
 43 | @pytest.fixture
 44 | def dummy_parser():
 45 | 
 46 |     class DummyParser(CombinedParser):
 47 |         parsers = (
 48 |             JSONParser({'success': 'container > test'}),
 49 |             RegExpParser({'test': 'href=\'(.*)\''}),
 50 |         )
 51 | 
 52 |         @interface_property
 53 |         def combined(self):
 54 |             return '123-' + self.success
 55 | 
 56 |         @interface_method
 57 |         def method(self, value):
 58 |             return self.success + value
 59 | 
 60 |     return DummyParser()
 61 | 
 62 | 
 63 | class ParentParser(JSONParser):
 64 |     settings = {
 65 |         'parent1': 'test1',
 66 |         'parent2': 'test2'
 67 |     }
 68 | 
 69 | 
 70 | class ChildParser(ParentParser):
 71 |     settings = {
 72 |         'parent2': 'child_override',
 73 |         'child1': 'test3',
 74 |         'child2': 'test4'
 75 |     }
 76 | 
 77 | 
 78 | class SubParser(HTMLParser):
 79 |     settings = {
 80 |         'href': 'string(//@href)',
 81 |         'text': 'string(//text())'
 82 |     }
 83 | 
 84 | 
 85 | class SimpleParser(RegExpParser):
 86 |     settings = {
 87 |         'test': '\d+.\d+',
 88 |         'test2': '\d+',
 89 |         'test3': 'a',
 90 |     }
 91 | 
 92 |     @interface_property
 93 |     def test4(self):
 94 |         return self.test2 + '_4'
 95 | 
 96 |     @interface_method
 97 |     def test_5(self, value):
 98 |         return 'Will not be included'
 99 | 
100 | 
101 | PYPY = hasattr(sys, 'pypy_translation_info') and sys.version_info[0] == 2
102 | PYPY3 = hasattr(sys, 'pypy_translation_info') and sys.version_info[0] == 3
103 | JYTHON = platform.system() == 'Java'
104 | 
105 | lxml_is_supported = pytest.mark.skipif(PYPY3 or JYTHON, reason='lxml is not supported')
106 | lxml_is_not_supported = pytest.mark.skipif(not (PYPY3 or JYTHON), reason='Only on if lxml is supported')
107 | not_pypy = pytest.mark.skipif(PYPY, reason='PyPy is not supported')
108 | 


--------------------------------------------------------------------------------
/tests/test_interfaces.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | from pyanyapi.interfaces import IndexOfInterface
3 | 
4 | 
5 | def test_indexof_interface():
6 |     interface = IndexOfInterface('this is dummy content')
7 |     assert interface.parse('dummy')
8 |     assert not interface.parse('foo')
9 | 


--------------------------------------------------------------------------------
/tests/test_parsers.py:
--------------------------------------------------------------------------------
  1 | # coding: utf-8
  2 | import re
  3 | 
  4 | import pytest
  5 | 
  6 | from ._compat import patch
  7 | from .conftest import ChildParser, SubParser, SimpleParser, lxml_is_supported, lxml_is_not_supported
  8 | from pyanyapi.exceptions import ResponseParseError
  9 | from pyanyapi.parsers import (
 10 |     XMLObjectifyParser,
 11 |     XMLParser,
 12 |     JSONParser,
 13 |     YAMLParser,
 14 |     RegExpParser,
 15 |     AJAXParser,
 16 |     CSVParser,
 17 |     HTMLParser,
 18 |     IndexOfParser
 19 | )
 20 | 
 21 | 
 22 | HTML_CONTENT = "<html><body><a href='#test'>test</body></html>"
 23 | XML_CONTENT = '''<?xml version="1.0" encoding="UTF-8"?>
 24 | <response>
 25 | <id>32e9a4a2</id>
 26 | <test-mode>1</test-mode>
 27 | <type>accept</type>
 28 | </response>
 29 | '''
 30 | JSON_CONTENT = '{"container":{"test":"value"},"another":"123"}'
 31 | YAML_CONTENT = 'container:\n    test: "123"'
 32 | AJAX_CONTENT = '{"content": "<p>Pcontent</p><span>SPANcontent</span>",' \
 33 |                '"second_part":"<p>second_p</p>","third":{"inner":"<p>third_p</p>"}}'
 34 | MULTILINE_CONTENT = '123\n234'
 35 | CSV_CONTENT = '1,2,3\r\n4,5,6\r\n'
 36 | CSV_CONTENT_DIFFERENT_DELIMITER = '1;2;3\r\n4;5;6\r\n'
 37 | 
 38 | 
 39 | @lxml_is_supported
 40 | def test_xml_objectify_parser():
 41 |     parsed = XMLObjectifyParser().parse('<xml><test>123</test></xml>')
 42 |     assert parsed.test == 123
 43 |     assert parsed.not_existing is None
 44 | 
 45 | 
 46 | @lxml_is_supported
 47 | def test_xml_objectify_parser_error():
 48 |     parsed = XMLObjectifyParser().parse('<xml><test>123')
 49 |     with pytest.raises(ResponseParseError):
 50 |         parsed.test
 51 | 
 52 | 
 53 | @lxml_is_supported
 54 | def test_xml_parser_error():
 55 |     parsed = XMLParser({'test': None}).parse('<xml><test>123')
 56 |     with pytest.raises(ResponseParseError):
 57 |         parsed.test
 58 | 
 59 | 
 60 | def test_yaml_parser_error():
 61 |     parsed = YAMLParser({'test': 'test'}).parse('||')
 62 |     with pytest.raises(ResponseParseError):
 63 |         parsed.test
 64 | 
 65 | 
 66 | def test_yaml_parser_vulnerability():
 67 |     """
 68 |     In case of usage of yaml.load `test` value will be equal to 0.
 69 |     """
 70 |     parsed = YAMLParser({'test': 'container > test'}).parse('!!python/object/apply:os.system ["exit 0"]')
 71 |     with pytest.raises(ResponseParseError):
 72 |         parsed.test
 73 | 
 74 | 
 75 | @lxml_is_supported
 76 | @pytest.mark.parametrize(
 77 |     'settings', (
 78 |         {'success': {'base': '//test-mode/text()'}},
 79 |         {'success': '//test-mode/text()'},
 80 |     )
 81 | )
 82 | def test_xml_parsed(settings):
 83 |     parsed = XMLParser(settings).parse(XML_CONTENT)
 84 |     assert parsed.success == ['1']
 85 |     assert parsed.parse('string(//id/text())') == '32e9a4a2'
 86 | 
 87 | 
 88 | @lxml_is_supported
 89 | def test_xml_simple_settings():
 90 |     assert XMLParser({'id': {'base': 'string(//id/text())'}}).parse(XML_CONTENT).id == '32e9a4a2'
 91 | 
 92 | 
 93 | def test_json_parsed():
 94 |     content = '''
 95 |     {
 96 |         "container":
 97 |         {
 98 |             "id": 1138003,
 99 |             "inner":
100 |             [
101 |                 {
102 |                     "end": {
103 |                         "id": 123
104 |                     }
105 |                 }
106 |             ]
107 |         }
108 |     }
109 |     '''
110 | 
111 |     parser = JSONParser({
112 |         'success': {
113 |             'base': 'container > inner > 0 > end > id'
114 |         }
115 |     })
116 |     assert parser.parse(content).success == 123
117 |     parser = JSONParser({
118 |         'success': {
119 |             'base': 'container > inner',
120 |             'children': 'end > id',
121 |         }
122 |     })
123 |     assert parser.parse(content).success == [123]
124 | 
125 | 
126 | def test_multiple_parser_join():
127 |     first_parser = RegExpParser({'test': 'href=\'(.*)\''})
128 |     second_parser = JSONParser({'success': 'container > test'})
129 |     for result_parser in ((first_parser & second_parser), (second_parser & first_parser)):
130 |         assert result_parser.parse(HTML_CONTENT).test == '#test'
131 |         assert result_parser.parse(JSON_CONTENT).success == 'value'
132 |     third_parser = JSONParser({
133 |         'fail': {
134 |             'base': 'container > test',
135 |         }
136 |     })
137 |     result_parser = first_parser & second_parser & third_parser
138 |     assert result_parser.parse(JSON_CONTENT).success == 'value'
139 | 
140 | 
141 | def test_multiply_parsers_declaration(dummy_parser):
142 |     parsed = dummy_parser.parse(JSON_CONTENT)
143 |     assert parsed.success == 'value'
144 |     assert parsed.combined == '123-value'
145 |     assert parsed.method('-123') == 'value-123'
146 |     assert parsed.test is None
147 | 
148 |     parsed = dummy_parser.parse(HTML_CONTENT)
149 |     assert parsed.test == '#test'
150 |     assert parsed.success is None
151 | 
152 | 
153 | @pytest.mark.parametrize(
154 |     'content, attr, expected',
155 |     (
156 |         ('{"container":{"test":"value"}}', 'test', 'value'),
157 |         ('{"container":{"test":"value"}}', 'second', None),
158 |         ('{"container":{"fail":[1]}}', 'second', None),
159 |         ('{"container":[[1],[],[3]]}', 'third', [1, None, 3]),
160 |         ('{"container":null}', 'null', None),
161 |         ('{"container":[1,2]}', 'test', '1,2'),
162 |     )
163 | )
164 | def test_empty_values(empty_values_parser, content, attr, expected):
165 |     parsed = empty_values_parser.parse(content)
166 |     assert getattr(parsed, attr) == expected
167 | 
168 | 
169 | def test_attributes(empty_values_parser):
170 |     assert set(empty_values_parser.attributes) == set(['combined', 'test', 'test', 'second', 'null', 'third'])
171 | 
172 | 
173 | def test_efficient_parsing(empty_values_parser):
174 |     with patch.object(empty_values_parser.parsers[0], 'parse') as regexp_parser:
175 |         assert empty_values_parser.parse(JSON_CONTENT).second is None
176 |         assert not regexp_parser.called
177 | 
178 | 
179 | @lxml_is_supported
180 | def test_simple_config_xml_parser():
181 |     parsed = XMLParser({'test': 'string(//test/text())'}).parse('<xml><test>123</test></xml>')
182 |     assert parsed.test == '123'
183 | 
184 | 
185 | def test_simple_config_json_parser():
186 |     parsed = JSONParser({'test': 'container > test'}).parse(JSON_CONTENT)
187 |     assert parsed.test == 'value'
188 | 
189 | 
190 | def test_settings_inheritance():
191 |     parser = ChildParser({'child2': 'override'})
192 |     assert parser.settings['child2'] == 'override'
193 |     assert parser.settings['child1'] == 'test3'
194 |     assert parser.settings['parent2'] == 'child_override'
195 |     assert parser.settings['parent1'] == 'test1'
196 | 
197 | 
198 | @lxml_is_supported
199 | def test_complex_config():
200 |     parsed = XMLParser({'test': {'base': '//test', 'children': 'text()|*//text()'}}).parse(
201 |         '<xml><test>123 </test><test><inside> 234</inside></test></xml>'
202 |     )
203 |     assert parsed.test == ['123 ', ' 234']
204 | 
205 | 
206 | def test_json_parse():
207 |     assert JSONParser({'test': 'container > test'}).parse(JSON_CONTENT).parse('another') == '123'
208 | 
209 | 
210 | def test_json_value_error_parse():
211 |     assert JSONParser({'test': 'container > test'}).parse('{"container":"1"}').test is None
212 | 
213 | 
214 | def test_regexp_parse():
215 |     assert RegExpParser({'digits': '\d+'}).parse('123abc').parse('[a-z]+') == 'abc'
216 | 
217 | 
218 | def test_yaml_parse():
219 |     assert YAMLParser({'test': 'container > test'}).parse(YAML_CONTENT).test == '123'
220 | 
221 | 
222 | @lxml_is_not_supported
223 | def test_lxml_not_supported():
224 |     with pytest.raises(AssertionError):
225 |         XMLParser({'test': '//p'}).parse('')
226 | 
227 | 
228 | @lxml_is_supported
229 | def test_ajax_parser():
230 |     parsed = AJAXParser({'p': 'content > string(//p)', 'span': 'content > string(//span)'}).parse(AJAX_CONTENT)
231 |     assert parsed.p == 'Pcontent'
232 |     assert parsed.span == 'SPANcontent'
233 |     assert parsed.parse('third > inner > string(//p)') == 'third_p'
234 | 
235 | 
236 | @lxml_is_supported
237 | def test_ajax_parser_cache():
238 |     parsed = AJAXParser({
239 |         'p': 'content > string(//p)',
240 |         'span': 'content > string(//span)',
241 |         'second': 'second_part > string(//p)'
242 |     }).parse(AJAX_CONTENT)
243 |     assert parsed.p == 'Pcontent'
244 |     inner_interface = parsed._inner_cache['content']
245 |     with patch.object(inner_interface, 'parse', wraps=inner_interface.parse) as patched:
246 |         assert parsed.span == 'SPANcontent'
247 |         assert len(parsed._inner_cache) == 1
248 |         assert patched.call_count == 1
249 |         assert parsed.second == 'second_p'
250 |         assert patched.call_count == 1
251 |         assert len(parsed._inner_cache) == 2
252 | 
253 | 
254 | @lxml_is_supported
255 | def test_ajax_parser_invalid_settings():
256 |     parsed = AJAXParser({
257 |         'valid': 'third > inner > string(//p)',
258 |         'invalid': 'third > string(//p)',
259 |     }).parse(AJAX_CONTENT)
260 |     assert parsed.valid == 'third_p'
261 |     assert parsed.invalid == ''
262 | 
263 | 
264 | def test_parse_memoization():
265 |     api = JSONParser().parse(JSON_CONTENT)
266 |     with patch.object(api, 'get_from_dict', wraps=api.get_from_dict) as patched:
267 |         assert api.parse('container > test') == 'value'
268 |         assert patched.call_count == 1
269 |         assert api.parse('container > test') == 'value'
270 |         assert patched.call_count == 1
271 | 
272 | 
273 | def test_regexp_settings():
274 |     assert RegExpParser({'test': '\d+.\d+'}).parse(MULTILINE_CONTENT).test == '123'
275 |     assert RegExpParser({'test': '\d+.\d+'}, flags=re.DOTALL).parse(MULTILINE_CONTENT).test == '123\n234'
276 | 
277 | 
278 | def test_parse_all():
279 |     expected = {'test': '123\n234', 'test2': '123', 'test3': None, 'test4': '123_4'}
280 |     parser = SimpleParser(flags=re.DOTALL)
281 |     assert parser.parse(MULTILINE_CONTENT).parse_all() == expected
282 |     assert parser.parse_all(MULTILINE_CONTENT) == expected
283 | 
284 | 
285 | def test_parse_all_combined_parser(dummy_parser):
286 |     assert dummy_parser.parse(JSON_CONTENT).parse_all() == {
287 |         'success': 'value',
288 |         'combined': '123-value',
289 |         'test': None
290 |     }
291 | 
292 | 
293 | def test_parse_csv():
294 |     api = CSVParser({'second': '1:2'}).parse(CSV_CONTENT)
295 |     assert api.second == '6'
296 |     assert api.parse('0:1') == '2'
297 |     assert api.parse('0:6') is None
298 | 
299 | 
300 | def test_parse_csv_custom_delimiter():
301 |     api = CSVParser({'second': '1:2'}, delimiter=';').parse(CSV_CONTENT_DIFFERENT_DELIMITER)
302 |     assert api.second == '6'
303 |     assert api.parse('0:1') == '2'
304 |     assert api.parse('0:6') is None
305 | 
306 | 
307 | def test_csv_parser_error():
308 |     parsed = CSVParser({'test': '1:1'}).parse(123)
309 |     with pytest.raises(ResponseParseError):
310 |         parsed.test
311 | 
312 | 
313 | @lxml_is_supported
314 | @pytest.mark.parametrize('sub_parser', (SubParser, SubParser()))
315 | def test_children(sub_parser):
316 | 
317 |     class Parser(HTMLParser):
318 |         settings = {
319 |             'elem': {
320 |                 'base': './/a',
321 |                 'parser': sub_parser
322 |             }
323 |         }
324 | 
325 |     api = Parser().parse(HTML_CONTENT)
326 |     sub_api = api.elem[0]
327 |     assert sub_api.href == '#test'
328 |     assert sub_api.text == 'test'
329 |     assert api.parse_all() == {'elem': [{'href': '#test', 'text': 'test'}]}
330 | 
331 | 
332 | class BrokenObject(object):
333 | 
334 |     def __str__(self):
335 |         return None
336 | 
337 | 
338 | class TestIndexOfParser:
339 |     parser = IndexOfParser({
340 |         'has_bar': 'bár',
341 |         'has_baz': 'báz',
342 |     })
343 | 
344 |     @pytest.mark.parametrize('content', ('foo-bár', b'foo-b\xc3\xa1r'))
345 |     def test_default(self, content):
346 |         parsed = self.parser.parse(content)
347 |         assert parsed.has_bar
348 |         assert not parsed.has_baz
349 | 
350 |     @pytest.mark.parametrize('attr', parser.settings.keys())
351 |     def test_parsing_error(self, attr):
352 |         parsed = self.parser.parse(BrokenObject())
353 |         with pytest.raises(ResponseParseError):
354 |             getattr(parsed, attr)
355 | 


--------------------------------------------------------------------------------
/tests/test_strip.py:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | from .conftest import lxml_is_supported, not_pypy
 3 | from pyanyapi.parsers import RegExpParser, JSONParser, AJAXParser, XMLParser, XMLObjectifyParser
 4 | 
 5 | 
 6 | JSON_CONTENT = '{"container":" 1 "}'
 7 | AJAX_CONTENT = '{"content": "<p> Pcontent </p>"}'
 8 | XML_CONTENT = '<p> Pcontent </p>'
 9 | OBJECTIFY_CONTENT = '''<xml>
10 | <Messages><Message> abc </Message></Messages>
11 | <test> bcd </test>
12 | <first><second><third> inside </third></second></first>
13 | </xml>'''
14 | 
15 | 
16 | def test_strip_regexp_parser():
17 |     settings = {'all': '.+'}
18 |     assert RegExpParser(settings).parse(' 1 ').all == ' 1 '
19 |     assert RegExpParser(settings, strip=True).parse(' 1 ').all == '1'
20 | 
21 | 
22 | def test_strip_json_parser():
23 |     settings = {'all': 'container'}
24 |     assert JSONParser(settings).parse(JSON_CONTENT).all == ' 1 '
25 |     assert JSONParser(settings, strip=True).parse(JSON_CONTENT).all == '1'
26 | 
27 | 
28 | @lxml_is_supported
29 | def test_strip_ajax_parser():
30 |     settings = {'all': 'content > string(//p)'}
31 |     assert AJAXParser(settings).parse(AJAX_CONTENT).all == ' Pcontent '
32 |     assert AJAXParser(settings, strip=True).parse(AJAX_CONTENT).all == 'Pcontent'
33 | 
34 | 
35 | @lxml_is_supported
36 | def test_strip_xml_parser():
37 |     settings = {'all': 'string(//p)'}
38 |     assert XMLParser(settings).parse(XML_CONTENT).all == ' Pcontent '
39 |     assert XMLParser(settings, strip=True).parse(XML_CONTENT).all == 'Pcontent'
40 | 
41 | 
42 | class CustomParser(RegExpParser):
43 |     settings = {'all': '.+'}
44 |     strip = True
45 | 
46 | 
47 | def test_class_override():
48 |     assert CustomParser().parse(' 1 ').all == '1'
49 |     assert CustomParser(strip=False).parse(' 1 ').all == ' 1 '
50 | 
51 | 
52 | @lxml_is_supported
53 | def test_objectify_strip_default():
54 |     default = XMLObjectifyParser().parse(OBJECTIFY_CONTENT)
55 |     assert default.Messages.Message == ' abc '
56 |     assert default.test == ' bcd '
57 |     assert default.first.second.third == ' inside '
58 | 
59 | 
60 | @lxml_is_supported
61 | @not_pypy
62 | def test_objectify_strip():
63 |     with_strip = XMLObjectifyParser(strip=True).parse(OBJECTIFY_CONTENT)
64 |     assert with_strip.Messages.Message == 'abc'
65 |     assert with_strip.test == 'bcd'
66 |     assert with_strip.first.second.third == 'inside'
67 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py26, py27, py32, py33, py34, py35, pypy, pypy3
3 | 
4 | [testenv]
5 | setenv =
6 |     PYTHONPATH = {toxinidir}:{toxinidir}/pyanyapi
7 | whitelist_externals = make
8 | commands = make test


--------------------------------------------------------------------------------