├── .coveragerc
├── .github
└── FUNDING.yml
├── .gitignore
├── .travis.yml
├── CODE_OF_CONDUCT.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.rst
├── docs
├── Makefile
├── changelog.rst
├── complex.rst
├── conf.py
├── index.rst
├── parsers.rst
├── requirements.txt
└── usage.rst
├── pyanyapi
├── __init__.py
├── _compat.py
├── decorators.py
├── exceptions.py
├── helpers.py
├── interfaces.py
└── parsers.py
├── setup.cfg
├── setup.py
├── tests
├── __init__.py
├── _compat.py
├── conftest.py
├── test_interfaces.py
├── test_parsers.py
└── test_strip.py
└── tox.ini
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | branch = true
3 |
4 | [report]
5 | show_missing = true
6 | precision = 2
7 | exclude_lines = raise NotImplementedError
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: Stranger6667
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.eggs
9 | *.egg-info
10 | dist
11 | build
12 | eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | develop-eggs
18 | .installed.cfg
19 | lib
20 | lib64
21 | venv*/
22 | pyvenv*/
23 |
24 | # Installer logs
25 | pip-log.txt
26 |
27 | # Unit test / coverage reports
28 | .coverage
29 | coverage.xml
30 | junit.xml
31 | .tox
32 | .coverage.*
33 | htmlcov
34 |
35 | # Translations
36 | *.mo
37 |
38 | .idea
39 |
40 | .DS_Store
41 | *~
42 | .*.sw[po]
43 | .build
44 | .ve
45 | .env
46 | .bootstrap
47 | *.bak
48 | docs/_build
49 | .cache
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - 3.5
4 | matrix:
5 | fast_finish: true
6 | include:
7 | - python: 3.5
8 | env: TOX_ENV=py35
9 | - python: 3.4
10 | env: TOX_ENV=py34
11 | - python: 3.3
12 | env: TOX_ENV=py33
13 | - python: 3.2
14 | env: TOX_ENV=py32
15 | - python: 2.7
16 | env: TOX_ENV=py27
17 | - python: 2.6
18 | env: TOX_ENV=py26
19 | - python: pypy
20 | env: TOX_ENV=pypy
21 | - python: pypy3
22 | env: TOX_ENV=pypy3
23 | - python: 3.5
24 | env: $JYTHON=true
25 | install:
26 | - if [ $TOX_ENV = "py32" ]; then travis_retry pip install "virtualenv<14.0.0" "tox<1.8.0"; fi
27 | - if [ $TOX_ENV = "pypy3" ]; then travis_retry pip install "virtualenv<14.0.0" "tox<1.8.0"; fi
28 | - if [ -z "$JYTHON" ]; then pip install codecov; fi
29 | - if [ "$TOX_ENV" ]; then travis_retry pip install "virtualenv<14.0.0" tox; fi
30 | before_install:
31 | - export JYTHON_URL='http://search.maven.org/remotecontent?filepath=org/python/jython-installer/2.7.0/jython-installer-2.7.0.jar'
32 | - if [ "$JYTHON" ]; then wget $JYTHON_URL -O jython_installer.jar; java -jar jython_installer.jar -s -d $HOME/jython; export PATH=$HOME/jython/bin:$PATH; fi
33 |
34 | script:
35 | - if [ "$JYTHON" ]; then travis_retry jython setup.py test; fi
36 | - if [ "$TOX_ENV" ]; then tox -e $TOX_ENV; fi
37 | after_success:
38 | - codecov
39 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as
6 | contributors and maintainers pledge to making participation in our project and
7 | our community a harassment-free experience for everyone, regardless of age, body
8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 |
12 | ## Our Standards
13 |
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 |
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 |
23 | Examples of unacceptable behavior by participants include:
24 |
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 |
34 | ## Our Responsibilities
35 |
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 |
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 |
46 | ## Scope
47 |
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 |
55 | ## Enforcement
56 |
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at dadygalo@gmail.com. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 |
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 |
68 | ## Attribution
69 |
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 |
73 | [homepage]: https://www.contributor-covenant.org
74 |
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Dmitry Dygalo
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CHANGELOG.md
2 | include README.rst
3 |
4 | recursive-include tests *
5 | recursive-exclude * __pycache__
6 | recursive-exclude * *.py[co]
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | help:
2 | @echo "clean - remove all build, test, coverage and Python artifacts"
3 | @echo "clean-build - remove build artifacts"
4 | @echo "clean-pyc - remove Python file artifacts"
5 | @echo "clean-test - remove test and coverage artifacts"
6 | @echo "test - run tests quickly with the default Python"
7 | @echo "test-all - run tests on every Python version with tox"
8 | @echo "coverage - check code coverage quickly with the default Python"
9 | @echo "install - install the package to the active Python's site-packages"
10 |
11 | clean: clean-test clean-build clean-pyc
12 |
13 | clean-build:
14 | rm -fr build/
15 | rm -fr dist/
16 | rm -fr .eggs/
17 | find . -name '*.egg-info' -exec rm -fr {} +
18 | find . -name '*.egg' -exec rm -fr {} +
19 |
20 | clean-pyc:
21 | find . -name '*.pyc' -exec rm -f {} +
22 | find . -name '*.pyo' -exec rm -f {} +
23 | find . -name '*~' -exec rm -f {} +
24 | find . -name '__pycache__' -exec rm -fr {} +
25 |
26 | clean-test:
27 | rm -fr .cache
28 | rm -fr .tox/
29 | rm -f .coverage
30 | rm -fr htmlcov/
31 |
32 | test:
33 | python setup.py test --pytest-args="--cov=pyanyapi --cov-report xml"
34 |
35 | test-all:
36 | tox
37 |
38 | coverage:
39 | coverage run --source pyanyapi setup.py test
40 | coverage report -m
41 | coverage html
42 | open htmlcov/index.html
43 |
44 | install: clean
45 | python setup.py install
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | PyAnyAPI
2 | ========
3 |
4 | Tools for convenient interface creation over various types of data in
5 | a declarative way.
6 |
7 | .. image:: https://travis-ci.org/Stranger6667/pyanyapi.svg?branch=master
8 | :target: https://travis-ci.org/Stranger6667/pyanyapi
9 | :alt: Build Status
10 |
11 | .. image:: https://codecov.io/github/Stranger6667/pyanyapi/coverage.svg?branch=master
12 | :target: https://codecov.io/github/Stranger6667/pyanyapi?branch=master
13 | :alt: Coverage Status
14 |
15 | .. image:: https://readthedocs.org/projects/pyanyapi/badge/?version=latest
16 | :target: http://pyanyapi.readthedocs.io/en/latest/?badge=latest
17 | :alt: Documentation Status
18 |
19 | Installation
20 | ------------
21 |
22 | The current stable release:
23 |
24 | ::
25 |
26 | pip install pyanyapi
27 |
28 | or:
29 |
30 | ::
31 |
32 | easy_install pyanyapi
33 |
34 | or from source:
35 |
36 | ::
37 |
38 | $ sudo python setup.py install
39 |
40 | Usage
41 | -----
42 |
43 | The library provides an ability to create API over various content.
44 | Currently there are bundled tools to work with HTML, XML, CSV, JSON and YAML.
45 | Initially it was created to work with ``requests`` library.
46 |
47 | Basic parsers can be declared in the following way:
48 |
49 | .. code-block:: python
50 |
51 | from pyanyapi.parsers import HTMLParser
52 |
53 |
54 | class SimpleParser(HTMLParser):
55 | settings = {'header': 'string(.//h1/text())'}
56 |
57 |
58 | >>> api = SimpleParser().parse('
Value
')
59 | >>> api.header
60 | Value
61 |
62 | Documentation
63 | -------------
64 |
65 | You can view documentation online at:
66 |
67 | - https://pyanyapi.readthedocs.io
68 |
69 | Or you can look at the docs/ directory in the repository.
70 |
71 | Python support
72 | --------------
73 |
74 | PyAnyAPI supports Python 2.6, 2.7, 3.2, 3.3, 3.4, 3.5, PyPy and partially PyPy3 and Jython.
75 | Unfortunately ``lxml`` doesn't support PyPy3 and Jython, so HTML & XML parsing is not supported on PyPy3 and Jython.
76 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = _build
9 |
10 | # Internal variables.
11 | PAPEROPT_a4 = -D latex_paper_size=a4
12 | PAPEROPT_letter = -D latex_paper_size=letter
13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
14 | # the i18n builder cannot share the environment and doctrees with the others
15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
16 |
17 | .PHONY: help
18 | help:
19 | @echo "Please use \`make ' where is one of"
20 | @echo " html to make standalone HTML files"
21 | @echo " dirhtml to make HTML files named index.html in directories"
22 | @echo " singlehtml to make a single large HTML file"
23 | @echo " pickle to make pickle files"
24 | @echo " json to make JSON files"
25 | @echo " htmlhelp to make HTML files and a HTML help project"
26 | @echo " qthelp to make HTML files and a qthelp project"
27 | @echo " applehelp to make an Apple Help Book"
28 | @echo " devhelp to make HTML files and a Devhelp project"
29 | @echo " epub to make an epub"
30 | @echo " epub3 to make an epub3"
31 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
32 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
33 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
34 | @echo " text to make text files"
35 | @echo " man to make manual pages"
36 | @echo " texinfo to make Texinfo files"
37 | @echo " info to make Texinfo files and run them through makeinfo"
38 | @echo " gettext to make PO message catalogs"
39 | @echo " changes to make an overview of all changed/added/deprecated items"
40 | @echo " xml to make Docutils-native XML files"
41 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
42 | @echo " linkcheck to check all external links for integrity"
43 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
44 | @echo " coverage to run coverage check of the documentation (if enabled)"
45 | @echo " dummy to check syntax errors of document sources"
46 |
47 | .PHONY: clean
48 | clean:
49 | rm -rf $(BUILDDIR)/*
50 |
51 | .PHONY: html
52 | html:
53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | .PHONY: dirhtml
58 | dirhtml:
59 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
60 | @echo
61 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
62 |
63 | .PHONY: singlehtml
64 | singlehtml:
65 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
66 | @echo
67 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
68 |
69 | .PHONY: pickle
70 | pickle:
71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
72 | @echo
73 | @echo "Build finished; now you can process the pickle files."
74 |
75 | .PHONY: json
76 | json:
77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
78 | @echo
79 | @echo "Build finished; now you can process the JSON files."
80 |
81 | .PHONY: htmlhelp
82 | htmlhelp:
83 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
84 | @echo
85 | @echo "Build finished; now you can run HTML Help Workshop with the" \
86 | ".hhp project file in $(BUILDDIR)/htmlhelp."
87 |
88 | .PHONY: qthelp
89 | qthelp:
90 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
91 | @echo
92 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
93 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
94 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyAnyAPI.qhcp"
95 | @echo "To view the help file:"
96 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyAnyAPI.qhc"
97 |
98 | .PHONY: applehelp
99 | applehelp:
100 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
101 | @echo
102 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
103 | @echo "N.B. You won't be able to view it unless you put it in" \
104 | "~/Library/Documentation/Help or install it in your application" \
105 | "bundle."
106 |
107 | .PHONY: devhelp
108 | devhelp:
109 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
110 | @echo
111 | @echo "Build finished."
112 | @echo "To view the help file:"
113 | @echo "# mkdir -p $$HOME/.local/share/devhelp/PyAnyAPI"
114 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyAnyAPI"
115 | @echo "# devhelp"
116 |
117 | .PHONY: epub
118 | epub:
119 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
120 | @echo
121 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
122 |
123 | .PHONY: epub3
124 | epub3:
125 | $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
126 | @echo
127 | @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
128 |
129 | .PHONY: latex
130 | latex:
131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
132 | @echo
133 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
134 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
135 | "(use \`make latexpdf' here to do that automatically)."
136 |
137 | .PHONY: latexpdf
138 | latexpdf:
139 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
140 | @echo "Running LaTeX files through pdflatex..."
141 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
142 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
143 |
144 | .PHONY: latexpdfja
145 | latexpdfja:
146 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
147 | @echo "Running LaTeX files through platex and dvipdfmx..."
148 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
149 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
150 |
151 | .PHONY: text
152 | text:
153 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
154 | @echo
155 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
156 |
157 | .PHONY: man
158 | man:
159 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
160 | @echo
161 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
162 |
163 | .PHONY: texinfo
164 | texinfo:
165 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
166 | @echo
167 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
168 | @echo "Run \`make' in that directory to run these through makeinfo" \
169 | "(use \`make info' here to do that automatically)."
170 |
171 | .PHONY: info
172 | info:
173 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
174 | @echo "Running Texinfo files through makeinfo..."
175 | make -C $(BUILDDIR)/texinfo info
176 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
177 |
178 | .PHONY: gettext
179 | gettext:
180 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
181 | @echo
182 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
183 |
184 | .PHONY: changes
185 | changes:
186 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
187 | @echo
188 | @echo "The overview file is in $(BUILDDIR)/changes."
189 |
190 | .PHONY: linkcheck
191 | linkcheck:
192 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
193 | @echo
194 | @echo "Link check complete; look for any errors in the above output " \
195 | "or in $(BUILDDIR)/linkcheck/output.txt."
196 |
197 | .PHONY: doctest
198 | doctest:
199 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
200 | @echo "Testing of doctests in the sources finished, look at the " \
201 | "results in $(BUILDDIR)/doctest/output.txt."
202 |
203 | .PHONY: coverage
204 | coverage:
205 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
206 | @echo "Testing of coverage in the sources finished, look at the " \
207 | "results in $(BUILDDIR)/coverage/python.txt."
208 |
209 | .PHONY: xml
210 | xml:
211 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
212 | @echo
213 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
214 |
215 | .PHONY: pseudoxml
216 | pseudoxml:
217 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
218 | @echo
219 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
220 |
221 | .PHONY: dummy
222 | dummy:
223 | $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
224 | @echo
225 | @echo "Build finished. Dummy builder generates no files."
226 |
--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. _changelog:
2 |
3 | Changelog
4 | =========
5 |
6 | 0.6.0 - 09.08.2016
7 | ------------------
8 |
9 | * IndexOf parser.
10 |
11 | 0.5.8 - 14.07.2016
12 | ------------------
13 |
14 | * Fixed XML content parsing for bytes input.
15 |
16 | 0.5.7 - 28.01.2016
17 | ------------------
18 |
19 | * Added ``parse_all`` call on subparsers (`#37`_).
20 |
21 | 0.5.6 - 24.11.2015
22 | ------------------
23 |
24 | * Fixed ``super`` call in exception.
25 |
26 | 0.5.5 - 23.11.2015
27 | ------------------
28 |
29 | * Add content to exceptions in case of parsing errors (`#35`_).
30 |
31 | 0.5.4 - 15.11.2015
32 | ------------------
33 |
34 | * Fixed ``lxml`` installation on PyPy (`#34`_).
35 | * Add support for subparsers (`#32`_).
36 |
37 | 0.5.3 - 30.10.2015
38 | ------------------
39 |
40 | * Disable stripping in XMLObjectifyParser on PyPy (`#30`_).
41 |
42 | 0.5.2 - 20.10.2015
43 | ------------------
44 |
45 | * Fix incorrect stripping in XMLObjectifyParser (`#29`_).
46 |
47 | 0.5.1 - 20.10.2015
48 | ------------------
49 |
50 | * Ability to override ``strip`` attribute at class level (`#27`_).
51 | * Fix ``strip`` in XMLObjectifyParser (`#28`_).
52 |
53 | 0.5 - 05.10.2015
54 | ----------------
55 |
56 | * Add ``parse_all`` to parse all settings (`#20`_).
57 | * Settings for regular expressions (`#19`_).
58 | * Add ``strip`` option to strip trailing whitespaces (`#14`_).
59 | * Add CSVParser (`#11`_).
60 |
61 | 0.4 - 29.09.2015
62 | ----------------
63 |
64 | * Add YAMLParser (`#5`_).
65 | * Add AJAXParser (`#9`_).
66 | * ``parse`` calls memoization (`#18`_).
67 |
68 | 0.3 - 24.09.2015
69 | ----------------
70 |
71 | * Add partial support for PyPy3 (`#7`_).
72 | * Add partial support for Jython (`#6`_).
73 | * Add ujson as dependency where it is possible (`#4`_).
74 | * Lxml will not be installed where it is not supported (`#3`_).
75 |
76 | 0.2.1 - 23.09.2015
77 | ------------------
78 |
79 | * Remove encoding declaration for XMLObjectifyParser
80 |
81 | 0.2 - 23.09.2015
82 | ----------------
83 |
84 | * Add ``parse`` methods for JSONInterface & RegExpInterface (`#8`_).
85 | * Add universal wheel config (`#2`_).
86 |
87 | 0.1 - 22.09.2015
88 | ----------------
89 |
90 | * First release.
91 |
92 | .. _#37: https://github.com/Stranger6667/pyanyapi/issues/37
93 | .. _#35: https://github.com/Stranger6667/pyanyapi/issues/35
94 | .. _#34: https://github.com/Stranger6667/pyanyapi/issues/34
95 | .. _#32: https://github.com/Stranger6667/pyanyapi/issues/32
96 | .. _#30: https://github.com/Stranger6667/pyanyapi/issues/30
97 | .. _#29: https://github.com/Stranger6667/pyanyapi/issues/29
98 | .. _#28: https://github.com/Stranger6667/pyanyapi/issues/28
99 | .. _#27: https://github.com/Stranger6667/pyanyapi/issues/27
100 | .. _#20: https://github.com/Stranger6667/pyanyapi/issues/20
101 | .. _#19: https://github.com/Stranger6667/pyanyapi/issues/19
102 | .. _#18: https://github.com/Stranger6667/pyanyapi/issues/18
103 | .. _#14: https://github.com/Stranger6667/pyanyapi/issues/14
104 | .. _#11: https://github.com/Stranger6667/pyanyapi/issues/11
105 | .. _#9: https://github.com/Stranger6667/pyanyapi/issues/9
106 | .. _#8: https://github.com/Stranger6667/pyanyapi/issues/8
107 | .. _#7: https://github.com/Stranger6667/pyanyapi/issues/7
108 | .. _#6: https://github.com/Stranger6667/pyanyapi/issues/6
109 | .. _#5: https://github.com/Stranger6667/pyanyapi/issues/5
110 | .. _#4: https://github.com/Stranger6667/pyanyapi/issues/4
111 | .. _#3: https://github.com/Stranger6667/pyanyapi/issues/3
112 | .. _#2: https://github.com/Stranger6667/pyanyapi/issues/2
--------------------------------------------------------------------------------
/docs/complex.rst:
--------------------------------------------------------------------------------
1 | .. _complex:
2 |
3 | Complex content parsing
4 | =======================
5 |
6 | Combined parsers
7 | ~~~~~~~~~~~~~~~~
8 |
9 | In situations, when particular content type is unknown before parsing,
10 | you can create combined parser, which allows you to use multiply
11 | different parsers transparently. E.g. some server usually returns JSON,
12 | but in cases of server errors it returns HTML pages with some text.
13 | Then:
14 |
15 | .. code-block:: python
16 |
17 | from pyanyapi.parsers import CombinedParser, HTMLParser, JSONParser
18 |
19 |
20 | class Parser(CombinedParser):
21 | parsers = [
22 | JSONParser({'test': 'test'}),
23 | HTMLParser({'error': 'string(//span)'})
24 | ]
25 |
26 | >>> parser = Parser()
27 | >>> parser.parse('{"test": "Text"}').test
28 | Text
29 | >>> parser.parse('123').error
30 | 123
31 |
32 | Another example
33 | ~~~~~~~~~~~~~~~
34 |
35 | Sometimes different content types can be combined inside single string.
36 | Often with AJAX requests.
37 |
38 | .. code:: javascript
39 |
40 | {"content": "Text"}
41 |
42 | You can work with such data in the following way:
43 |
44 | .. code-block:: python
45 |
46 | from pyanyapi.decorators import interface_property
47 | from pyanyapi.parsers import HTMLParser, JSONParser
48 |
49 |
50 | inner_parser = HTMLParser({'text': 'string(.//span/text())'})
51 |
52 |
53 | class AJAXParser(JSONParser):
54 | settings = {'content': 'content'}
55 |
56 | @interface_property
57 | def text(self):
58 | return inner_parser.parse(self.content).text
59 |
60 |
61 | >>> api = AJAXParser().parse('{"content": "Text"}')
62 | >>> api.text
63 | Text
64 |
65 | Now AJAXParser is bundled in pyanyapi, but it works differently.
66 | But anyway, this example can be helpful for building custom parsers.
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | #
4 | # PyAnyAPI documentation build configuration file, created by
5 | # sphinx-quickstart on Tue Sep 27 12:18:20 2016.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another directory,
17 | # add these directories to sys.path here. If the directory is relative to the
18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
19 | #
20 | import os
21 | import sys
22 |
23 | sys.path.insert(0, os.path.abspath('..'))
24 |
25 |
26 | import sphinx_rtd_theme
27 | from pyanyapi import __version__
28 |
29 | # -- General configuration ------------------------------------------------
30 |
31 | # If your documentation needs a minimal Sphinx version, state it here.
32 | #
33 | # needs_sphinx = '1.4.6'
34 |
35 | # Add any Sphinx extension module names here, as strings. They can be
36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
37 | # ones.
38 | extensions = [
39 | 'sphinx.ext.autodoc',
40 | 'sphinx.ext.coverage',
41 | 'sphinx.ext.viewcode',
42 | ]
43 | # Add any paths that contain templates here, relative to this directory.
44 | templates_path = ['_templates']
45 |
46 | # The suffix(es) of source filenames.
47 | # You can specify multiple suffix as a list of string:
48 | #
49 | # source_suffix = ['.rst', '.md']
50 | source_suffix = '.rst'
51 |
52 | # The encoding of source files.
53 | #
54 | # source_encoding = 'utf-8-sig'
55 |
56 | # The master toctree document.
57 | master_doc = 'index'
58 |
59 | # General information about the project.
60 | project = 'PyAnyAPI'
61 | copyright = '2016, Dmitry Dygalo'
62 | author = 'Dmitry Dygalo'
63 |
64 | # The version info for the project you're documenting, acts as replacement for
65 | # |version| and |release|, also used in various other places throughout the
66 | # built documents.
67 | #
68 | # The short X.Y version.
69 | version = release = __version__
70 |
71 | # The full version, including alpha/beta/rc tags.
72 |
73 | # The language for content autogenerated by Sphinx. Refer to documentation
74 | # for a list of supported languages.
75 | #
76 | # This is also used if you do content translation via gettext catalogs.
77 | # Usually you set "language" from the command line for these cases.
78 | language = None
79 |
80 | # There are two options for replacing |today|: either, you set today to some
81 | # non-false value, then it is used:
82 | #
83 | # today = ''
84 | #
85 | # Else, today_fmt is used as the format for a strftime call.
86 | #
87 | # today_fmt = '%B %d, %Y'
88 |
89 | # List of patterns, relative to source directory, that match files and
90 | # directories to ignore when looking for source files.
91 | # This patterns also effect to html_static_path and html_extra_path
92 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
93 |
94 | # The reST default role (used for this markup: `text`) to use for all
95 | # documents.
96 | #
97 | # default_role = None
98 |
99 | # If true, '()' will be appended to :func: etc. cross-reference text.
100 | #
101 | # add_function_parentheses = True
102 |
103 | # If true, the current module name will be prepended to all description
104 | # unit titles (such as .. function::).
105 | #
106 | # add_module_names = True
107 |
108 | # If true, sectionauthor and moduleauthor directives will be shown in the
109 | # output. They are ignored by default.
110 | #
111 | # show_authors = False
112 |
113 | # The name of the Pygments (syntax highlighting) style to use.
114 | pygments_style = 'sphinx'
115 |
116 | # A list of ignored prefixes for module index sorting.
117 | # modindex_common_prefix = []
118 |
119 | # If true, keep warnings as "system message" paragraphs in the built documents.
120 | # keep_warnings = False
121 |
122 | # If true, `todo` and `todoList` produce output, else they produce nothing.
123 | todo_include_todos = False
124 |
125 |
126 | # -- Options for HTML output ----------------------------------------------
127 |
128 | # The theme to use for HTML and HTML Help pages. See the documentation for
129 | # a list of builtin themes.
130 | #
131 | html_theme = 'sphinx_rtd_theme'
132 |
133 | # Theme options are theme-specific and customize the look and feel of a theme
134 | # further. For a list of options available for each theme, see the
135 | # documentation.
136 | #
137 | # html_theme_options = {}
138 |
139 | # Add any paths that contain custom themes here, relative to this directory.
140 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
141 |
142 | # The name for this set of Sphinx documents.
143 | # " v documentation" by default.
144 | #
145 | # html_title = 'PyAnyAPI v0.6.0'
146 |
147 | # A shorter title for the navigation bar. Default is the same as html_title.
148 | #
149 | # html_short_title = None
150 |
151 | # The name of an image file (relative to this directory) to place at the top
152 | # of the sidebar.
153 | #
154 | # html_logo = None
155 |
156 | # The name of an image file (relative to this directory) to use as a favicon of
157 | # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
158 | # pixels large.
159 | #
160 | # html_favicon = None
161 |
162 | # Add any paths that contain custom static files (such as style sheets) here,
163 | # relative to this directory. They are copied after the builtin static files,
164 | # so a file named "default.css" will overwrite the builtin "default.css".
165 | html_static_path = ['_static']
166 |
167 | # Add any extra paths that contain custom files (such as robots.txt or
168 | # .htaccess) here, relative to this directory. These files are copied
169 | # directly to the root of the documentation.
170 | #
171 | # html_extra_path = []
172 |
173 | # If not None, a 'Last updated on:' timestamp is inserted at every page
174 | # bottom, using the given strftime format.
175 | # The empty string is equivalent to '%b %d, %Y'.
176 | #
177 | # html_last_updated_fmt = None
178 |
179 | # If true, SmartyPants will be used to convert quotes and dashes to
180 | # typographically correct entities.
181 | #
182 | # html_use_smartypants = True
183 |
184 | # Custom sidebar templates, maps document names to template names.
185 | #
186 | # html_sidebars = {}
187 |
188 | # Additional templates that should be rendered to pages, maps page names to
189 | # template names.
190 | #
191 | # html_additional_pages = {}
192 |
193 | # If false, no module index is generated.
194 | #
195 | # html_domain_indices = True
196 |
197 | # If false, no index is generated.
198 | #
199 | # html_use_index = True
200 |
201 | # If true, the index is split into individual pages for each letter.
202 | #
203 | # html_split_index = False
204 |
205 | # If true, links to the reST sources are added to the pages.
206 | #
207 | # html_show_sourcelink = True
208 |
209 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
210 | #
211 | # html_show_sphinx = True
212 |
213 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
214 | #
215 | # html_show_copyright = True
216 |
217 | # If true, an OpenSearch description file will be output, and all pages will
218 | # contain a tag referring to it. The value of this option must be the
219 | # base URL from which the finished HTML is served.
220 | #
221 | # html_use_opensearch = ''
222 |
223 | # This is the file name suffix for HTML files (e.g. ".xhtml").
224 | # html_file_suffix = None
225 |
226 | # Language to be used for generating the HTML full-text search index.
227 | # Sphinx supports the following languages:
228 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
229 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh'
230 | #
231 | # html_search_language = 'en'
232 |
233 | # A dictionary with options for the search language support, empty by default.
234 | # 'ja' uses this config value.
235 | # 'zh' user can custom change `jieba` dictionary path.
236 | #
237 | # html_search_options = {'type': 'default'}
238 |
239 | # The name of a javascript file (relative to the configuration directory) that
240 | # implements a search results scorer. If empty, the default will be used.
241 | #
242 | # html_search_scorer = 'scorer.js'
243 |
244 | # Output file base name for HTML help builder.
245 | htmlhelp_basename = 'PyAnyAPIdoc'
246 |
247 | # -- Options for LaTeX output ---------------------------------------------
248 |
249 | latex_elements = {
250 | # The paper size ('letterpaper' or 'a4paper').
251 | #
252 | # 'papersize': 'letterpaper',
253 |
254 | # The font size ('10pt', '11pt' or '12pt').
255 | #
256 | # 'pointsize': '10pt',
257 |
258 | # Additional stuff for the LaTeX preamble.
259 | #
260 | # 'preamble': '',
261 |
262 | # Latex figure (float) alignment
263 | #
264 | # 'figure_align': 'htbp',
265 | }
266 |
267 | # Grouping the document tree into LaTeX files. List of tuples
268 | # (source start file, target name, title,
269 | # author, documentclass [howto, manual, or own class]).
270 | latex_documents = [
271 | (master_doc, 'PyAnyAPI.tex', 'PyAnyAPI Documentation',
272 | 'Dmitry Dygalo', 'manual'),
273 | ]
274 |
275 | # The name of an image file (relative to this directory) to place at the top of
276 | # the title page.
277 | #
278 | # latex_logo = None
279 |
280 | # For "manual" documents, if this is true, then toplevel headings are parts,
281 | # not chapters.
282 | #
283 | # latex_use_parts = False
284 |
285 | # If true, show page references after internal links.
286 | #
287 | # latex_show_pagerefs = False
288 |
289 | # If true, show URL addresses after external links.
290 | #
291 | # latex_show_urls = False
292 |
293 | # Documents to append as an appendix to all manuals.
294 | #
295 | # latex_appendices = []
296 |
297 | # It false, will not define \strong, \code, itleref, \crossref ... but only
298 | # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added
299 | # packages.
300 | #
301 | # latex_keep_old_macro_names = True
302 |
303 | # If false, no module index is generated.
304 | #
305 | # latex_domain_indices = True
306 |
307 |
308 | # -- Options for manual page output ---------------------------------------
309 |
310 | # One entry per manual page. List of tuples
311 | # (source start file, name, description, authors, manual section).
312 | man_pages = [
313 | (master_doc, 'pyanyapi', 'PyAnyAPI Documentation',
314 | [author], 1)
315 | ]
316 |
317 | # If true, show URL addresses after external links.
318 | #
319 | # man_show_urls = False
320 |
321 |
322 | # -- Options for Texinfo output -------------------------------------------
323 |
324 | # Grouping the document tree into Texinfo files. List of tuples
325 | # (source start file, target name, title, author,
326 | # dir menu entry, description, category)
327 | texinfo_documents = [
328 | (master_doc, 'PyAnyAPI', 'PyAnyAPI Documentation',
329 | author, 'PyAnyAPI', 'One line description of project.',
330 | 'Miscellaneous'),
331 | ]
332 |
333 | # Documents to append as an appendix to all manuals.
334 | #
335 | # texinfo_appendices = []
336 |
337 | # If false, no module index is generated.
338 | #
339 | # texinfo_domain_indices = True
340 |
341 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
342 | #
343 | # texinfo_show_urls = 'footnote'
344 |
345 | # If true, do not generate a @detailmenu in the "Top" node's menu.
346 | #
347 | # texinfo_no_detailmenu = False
348 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Welcome to PyAnyAPI's documentation!
2 | ====================================
3 |
4 | Contents:
5 |
6 | .. toctree::
7 | :maxdepth: 2
8 |
9 | usage
10 | parsers
11 | complex
12 | changelog
13 |
14 |
15 | Indices and tables
16 | ==================
17 |
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 |
22 |
--------------------------------------------------------------------------------
/docs/parsers.rst:
--------------------------------------------------------------------------------
1 | .. _parsers:
2 |
3 | Parsers
4 | =======
5 |
6 | HTML & XML
7 | ~~~~~~~~~~
8 |
9 | For HTML and XML based interfaces XPath 1.0 syntax is used for settings
10 | declaration. Unfortunately XPath 2.0 is not supported by lxml. XML is
11 | about the same as HTMLParser, but uses a different lxml parser internally.
12 | Here is an example of usage with ``requests``:
13 |
14 | .. code-block:: python
15 |
16 | >>> import requests
17 | >>> import pyanyapi
18 | >>> parser = pyanyapi.HTMLParser({'header': 'string(.//h1/text())'})
19 | >>> response = requests.get('http://example.com')
20 | >>> api = parser.parse(response.text)
21 | >>> api.header
22 | Example Domain
23 |
24 | If you need, you can execute more XPath queries at any time you want:
25 |
26 | .. code-block:: python
27 |
28 | from pyanyapi.parsers import HTMLParser
29 |
30 |
31 | >>> parser = HTMLParser({'header': 'string(.//h1/text())'})
32 | >>> api = parser.parse('
This is
test
')
33 | >>> api.header
34 | This is
35 | >>> api.parse('string(//p)')
36 | test
37 |
38 | XML Objectify
39 | ~~~~~~~~~~~~~
40 |
41 | Lxml provides interesting feature - objectified interface for XML. It
42 | converts whole XML to Python object. This parser doesn't require any
43 | settings. E.g:
44 |
45 | .. code-block:: python
46 |
47 | from pyanyapi.parsers import XMLObjectifyParser
48 |
49 |
50 | >>> XMLObjectifyParser().parse('123').test
51 | 123
52 |
53 | JSON
54 | ~~~~
55 |
56 | Settings syntax in based on PostgreSQL statements syntax.
57 |
58 | .. code-block:: python
59 |
60 | from pyanyapi.parsers import JSONParser
61 |
62 |
63 | >>> JSONParser({'id': 'container > id'}).parse('{"container":{"id":"123"}}').id
64 | 123
65 |
66 | Or you can get access to values in lists by index:
67 |
68 | .. code-block:: python
69 |
70 | from pyanyapi.parsers import JSONParser
71 |
72 |
73 | >>> JSONParser({'second': 'container > 1'}).parse('{"container":["first", "second", "third"]}').second
74 | second
75 |
76 | And executes more queries after initial parsing:
77 |
78 | .. code-block:: python
79 |
80 | from pyanyapi.parsers import JSONParser
81 |
82 |
83 | >>> api = JSONParser({'second': 'container > 1'}).parse('{"container":[],"second_container":[123]}')
84 | >>> api.parse('second_container > 0')
85 | 123
86 |
87 | YAML
88 | ~~~~
89 | Equal to JSON parser, but works with YAML data.
90 |
91 | .. code-block:: python
92 |
93 | from pyanyapi.parsers import YAMLParser
94 |
95 |
96 | >>> YAMLParser({'test': 'container > test'}).parse('container:\n test: "123"').test
97 | 123
98 |
99 | Regular Expressions Interface
100 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101 |
102 | In case, when data has wrong format or is just very complicated to be parsed
103 | with bundled tools, you can use a parser based on regular expressions.
104 | Settings are based on Python's regular expressions. It is the most powerful
105 | parser, because of its simplicity.
106 |
107 | .. code-block:: python
108 |
109 | from pyanyapi.parsers import RegExpParser
110 |
111 |
112 | >>> RegExpParser({'error_code': 'Error (\d+)'}).parse('Oh no!!! It is Error 100!!!').error_code
113 | 100
114 |
115 | And executes more queries after initial parsing:
116 |
117 | .. code-block:: python
118 |
119 | from pyanyapi.parsers import RegExpParser
120 |
121 |
122 | >>> api = RegExpParser({'digits': '\d+'}).parse('123abc')
123 | >>> api.parse('[a-z]+')
124 | abc
125 |
126 | Also, you can pass flags for regular expressions on parser initialization:
127 |
128 | .. code-block:: python
129 |
130 | from pyanyapi.parsers import RegExpParser
131 |
132 |
133 | >>> RegExpParser({'test': '\d+.\d+'}).parse('123\n234').test
134 | 123
135 | >>> RegExpParser({'test': '\d+.\d+'}, flags=re.DOTALL).parse('123\n234').test
136 | 123
137 | 234
138 |
139 |
140 | CSV Interface
141 | ~~~~~~~~~~~~~
142 |
143 | Operates with CSV data with simple queries in format 'row_id:column_id'.
144 |
145 | .. code-block:: python
146 |
147 | from pyanyapi.parsers import CSVParser
148 |
149 |
150 | >>> CSVParser({'value': '1:2'}).parse('1,2,3\r\n4,5,6\r\n').value
151 | 6
152 |
153 | Also, you can pass custom kwargs for `csv.reader` on parser initialization:
154 |
155 | .. code-block:: python
156 |
157 | from pyanyapi.parsers import CSVParser
158 |
159 |
160 | >>> CSVParser({'value': '1:2'}, delimiter=';').parse('1;2;3\r\n4;5;6\r\n').value
161 | 6
162 |
163 | AJAX Interface
164 | ~~~~~~~~~~~~~~
165 |
166 | AJAX is a very popular technology and often use JSON data with HTML values. Here is an example:
167 |
168 | .. code-block:: python
169 |
170 | from pyanyapi.parsers import AJAXParser
171 |
172 |
173 | >>> api = AJAXParser({'p': 'content > string(//p)'}).parse('{"content": "
Pcontent
"}')
174 | >>> api.p
175 | Pcontent
176 |
177 | It uses combination of XPath queries and PostgreSQL-based JSON lookups.
178 | Custom queries execution is also available:
179 |
180 | .. code-block:: python
181 |
182 | from pyanyapi.parsers import AJAXParser
183 |
184 |
185 | >>> api = AJAXParser().parse('{"content": "
Pcontent
123"}')
186 | >>> api.parse('content > string(//span)')
187 | 123
188 |
189 |
190 | Custom Interface
191 | ~~~~~~~~~~~~~~~~
192 |
193 | You can easily declare your own interface. For that you should define
194 | ``execute_method`` method. And optionally ``perform_parsing``. Here is
195 | an example of naive CSVInterface, which provides an ability to get the column
196 | value by index. Also you should create a separate parser for that.
197 |
198 | .. code-block:: python
199 |
200 | from pyanyapi.interfaces import BaseInterface
201 | from pyanyapi.parsers import BaseParser
202 |
203 |
204 | class CSVInterface(BaseInterface):
205 |
206 | def perform_parsing(self):
207 | return self.content.split(',')
208 |
209 | def execute_method(self, settings):
210 | return self.parsed_content[settings]
211 |
212 |
213 | class CSVParser(BaseParser):
214 | interface_class = CSVInterface
215 |
216 |
217 | >>> CSVParser({'second': 1}).parse('1,2,3').second
218 | 2
219 |
220 | Extending interfaces
221 | ~~~~~~~~~~~~~~~~~~~~
222 |
223 | Also content can be parsed with regular Python code. It can be done with
224 | special decorators ``interface_method`` and ``interface_property``.
225 |
226 | Custom method example:
227 |
228 | .. code-block:: python
229 |
230 | from pyanyapi.decorators import interface_method
231 | from pyanyapi.parsers import interface_method
232 |
233 |
234 | class ParserWithMethod(HTMLParser):
235 | settings = {'occupation': 'string(.//p/text())'}
236 |
237 | @interface_method
238 | def hello(self, name):
239 | return name + ' is ' + self.occupation
240 |
241 |
242 | >>> api = ParserWithMethod().parse('
')
266 | >>> api.h1
267 | This is
268 |
269 | >>> api.p
270 | test
271 |
272 | >>> api.test
273 | This is test
274 |
275 | Certainly the previous example can be done with more complex XPath
276 | expression, but in general case XPath is not enough.
277 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx_rtd_theme
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | .. _usage:
2 |
3 | Usage
4 | =====
5 |
6 | The library provides an ability to create API over various content.
7 | Currently there are bundled tools to work with HTML, XML, CSV, JSON and YAML.
8 | Initially it was created to work with ``requests`` library.
9 |
10 | Basic setup
11 | ~~~~~~~~~~~
12 |
13 | Basic parsers can be declared in the following way:
14 |
15 | .. code-block:: python
16 |
17 | from pyanyapi.parsers import HTMLParser
18 |
19 |
20 | class SimpleParser(HTMLParser):
21 | settings = {'header': 'string(.//h1/text())'}
22 |
23 |
24 | >>> api = SimpleParser().parse('
Value
')
25 | >>> api.header
26 | Value
27 |
28 | Or it can be configured in runtime:
29 |
30 | .. code-block:: python
31 |
32 | from pyanyapi.parsers import HTMLParser
33 |
34 |
35 | >>> api = HTMLParser({
36 | 'header': 'string(.//h1/text())'
37 | }).parse('
Value
')
38 | >>> api.header
39 | Value
40 |
41 | To get all parsing results as a dict there is ``parse_all`` method.
42 | All properties (include defined with ``@interface_property`` decorator) will be returned.
43 |
44 | .. code-block:: python
45 |
46 | from pyanyapi.parsers import JSONParser
47 |
48 | >>> JSONParser({
49 | 'first': 'container > 0',
50 | 'second': 'container > 1',
51 | 'third': 'container > 2',
52 | }).parse('{"container":["first", "second", "third"]}').parse_all()
53 | {
54 | 'first': 'first',
55 | 'second': 'second',
56 | 'third': 'third',
57 | }
58 |
59 | Complex setup
60 | ~~~~~~~~~~~~~
61 |
62 | In some cases you may want to apply extra transformations to result
63 | list. Here comes "base-children" setup style.
64 |
65 | .. code-block:: python
66 |
67 | from pyanyapi.parsers import HTMLParser
68 |
69 |
70 | class SimpleParser(HTMLParser):
71 | settings = {
72 | 'test': {
73 | 'base': '//test',
74 | 'children': 'text()|*//text()'
75 | }
76 | }
77 |
78 |
79 | >>> api = SimpleParser().parse('123 234')
80 | >>> api.test
81 | ['123 ', ' 234']
82 |
83 | There is another option to interact with sub-elements. Sub parsers!
84 |
85 | .. code-block:: python
86 |
87 | from pyanyapi.parsers import HTMLParser
88 |
89 |
90 | class SubParser(HTMLParser):
91 | settings = {
92 | 'href': 'string(//@href)',
93 | 'text': 'string(//text())'
94 | }
95 |
96 |
97 | class Parser(HTMLParser):
98 | settings = {
99 | 'elem': {
100 | 'base': './/a',
101 | 'parser': SubParser
102 | }
103 | }
104 |
105 | >>> api = Parser().parse("test")
106 | >>> api.elem[0].href
107 | #test
108 | >>> api.elem[0].text
109 | test
110 | >>> api.parse_all()
111 | {'elem': [{'href': '#test', 'text': 'test'}]}
112 |
113 | Also you can pass sub parsers as classes or like instances.
114 |
115 | Settings inheritance
116 | ~~~~~~~~~~~~~~~~~~~~
117 |
118 | Settings attribute is merged from all ancestors of current parser.
119 |
120 | .. code-block:: python
121 |
122 | from pyanyapi.parsers import HTMLParser
123 |
124 |
125 | class ParentParser(HTMLParser):
126 | settings = {'parent': '//p'}
127 |
128 |
129 | class FirstChildParser(ParentParser):
130 | settings = {'parent': '//override'}
131 |
132 |
133 | class SecondChildParser(ParentParser):
134 | settings = {'child': '//h1'}
135 |
136 |
137 | >>> FirstChildParser().settings['parent']
138 | //override
139 |
140 | >>> SecondChildParser().settings['parent']
141 | //p
142 |
143 | >>> SecondChildParser().settings['child']
144 | //h1
145 |
146 | >>> SecondChildParser({'child': '//more'}).settings['child']
147 | //more
148 |
149 | Results stripping
150 | ~~~~~~~~~~~~~~~~~
151 |
152 | Parsers can automagically strip trailing whitespaces with ``strip=True`` option.
153 |
154 | .. code-block:: python
155 |
156 | from pyanyapi.parsers import XMLParser
157 |
158 |
159 | >>> settings = {'p': 'string(//p)'}
160 | >>> XMLParser(settings).parse('