├── .gitattributes
├── .gitignore
├── .travis.yml
├── COPYING.txt
├── MANIFEST.in
├── README.md
├── requirements.txt
├── setup.py
├── tests
└── unit
│ └── test_xpyth.py
├── tox.ini
└── xpyth
└── __init__.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 |
25 | # PyInstaller
26 | # Usually these files are written by a python script from a template
27 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 |
31 | # Installer logs
32 | pip-log.txt
33 | pip-delete-this-directory.txt
34 |
35 | # Unit test / coverage reports
36 | htmlcov/
37 | .tox/
38 | .coverage
39 | .cache
40 | nosetests.xml
41 | coverage.xml
42 |
43 | # Translations
44 | *.mo
45 | *.pot
46 |
47 | # Django stuff:
48 | *.log
49 |
50 | # Sphinx documentation
51 | docs/_build/
52 |
53 | # PyBuilder
54 | target/
55 |
56 | # =========================
57 | # Operating System Files
58 | # =========================
59 |
60 | # OSX
61 | # =========================
62 |
63 | .DS_Store
64 | .AppleDouble
65 | .LSOverride
66 |
67 | # Thumbnails
68 | ._*
69 |
70 | # Files that might appear on external disk
71 | .Spotlight-V100
72 | .Trashes
73 |
74 | # Directories potentially created on remote AFP share
75 | .AppleDB
76 | .AppleDesktop
77 | Network Trash Folder
78 | Temporary Items
79 | .apdisk
80 |
81 | # Windows
82 | # =========================
83 |
84 | # Windows image file caches
85 | Thumbs.db
86 | ehthumbs.db
87 |
88 | # Folder config file
89 | Desktop.ini
90 |
91 | # Recycle Bin used on file shares
92 | $RECYCLE.BIN/
93 |
94 | # Windows Installer files
95 | *.cab
96 | *.msi
97 | *.msm
98 | *.msp
99 |
100 | # Windows shortcuts
101 | *.lnk
102 |
103 | xpyth_env/
104 | xpyth.pyproj
105 | *.sln
106 | xpyth.v12.suo
107 | .idea/
108 | .pytest_cache/
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: python
3 | python:
4 | - "2.7"
5 | - "3.6"
6 | install: pip install tox-travis
7 | script: tox
8 |
--------------------------------------------------------------------------------
/COPYING.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Hodgdon Chase Stevens
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include COPYING.txt
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # xpyth
2 |
3 | [](https://travis-ci.org/hchasestevens/xpyth)
4 | [](https://badge.fury.io/py/xpyth)
5 | 
6 |
7 | A module for querying the DOM tree and writing XPath expressions using native Python syntax.
8 |
9 | Example usage
10 | -------------
11 | ```python
12 | >>> from xpyth import xpath, DOM, X
13 |
14 | >>> xpath(X for X in DOM if X.name == 'main')
15 | "//*[@name='main']"
16 |
17 | >>> xpath(span for div in DOM for span in div if div.id == 'main')
18 | "//div[@id='main']//span"
19 |
20 | >>> xpath(a for a in DOM if '.com' not in a.href)
21 | "//a[not(contains(@href, '.com'))]"
22 |
23 | >>> xpath(a.href for a in DOM if any(p for p in a.ancestors if p.id))
24 | "//a[./ancestor::p[@id]]/@href"
25 |
26 | >>> xpath(X.data-bind for X in DOM if X.data-bind == '1')
27 | "//*[@data-bind='1']/@data-bind"
28 |
29 | >>> xpath(
30 | ... form.action
31 | ... for form in DOM
32 | ... if all(
33 | ... input
34 | ... for input in form.children
35 | ... if input.value == 'a'
36 | ... )
37 | ... )
38 | "//form[not(./input[not(@value='a')])]/@action"
39 |
40 | >>> allowed_ids = list('abc')
41 | >>> xpath(X for X in DOM if X.id in allowed_ids)
42 | "//*[@id='a' or @id='b' or @id='c']"
43 | ```
44 |
45 | Motivation
46 | ----------
47 |
48 | XPath is the de facto standard in querying XML and HTML documents. In Python (and most other languages), XPath expressions are represented as strings; this not only constitutes a potential security threat, but also means that developers are denied standard text-editor and IDE features such as syntax highlighting and autocomplete when writing XPaths. Furthermore, having to become familiar with XPath (or CSS selectors) presents a barrier to entry for developers who want to interact with the web.
49 |
50 | [Great inroads](https://msdn.microsoft.com/en-us/library/bb397933.aspx) have been made in various programming languages in allowing the use of native list-comprehension-like syntax to generate SQL queries. __xpyth__ piggybacks off one such effort, [Pony](http://ponyorm.com/), to extend this functionality to XPath. __Now anyone familiar with Python comprehension syntax can query XML/HTML documents quickly and easily__. Moreover, __xpyth__ integrates with the popular [lxml](http://lxml.de/) library to enable developers to go beyond the querying capabilities of XPath (when necessary).
51 |
52 | Installation
53 | ------------
54 |
55 | ```bash
56 | pip install xpyth
57 | ```
58 |
59 |
60 | Use with lxml
61 | -------------
62 |
63 | __xpyth__ supports querying lxml ```ElementTree```s using the ```query``` function. For example, given a document
64 | ```html
65 |
66 |
67 |
Google
68 |
Not Google
69 |
Lorem ipsum
70 |
no numbers here
71 |
123
72 |
73 |
77 |
78 | ```
79 | accessible as the ```ElementTree``` ```tree```, the following can be executed:
80 | ```python
81 | >>> len(query(a for a in tree))
82 | 4
83 | >>> query(a for a in tree if 'Not Google' not in a.text)[0].attrib.get('href')
84 | "http://www.google.com"
85 | >>> next(
86 | ... node
87 | ... for node in
88 | ... query(
89 | ... p
90 | ... for p in
91 | ... tree
92 | ... if p.id
93 | ... )
94 | ... if re.match(r'\D+', node.attrib.get('id'))
95 | ... ).text
96 | "123"
97 | ```
98 |
99 | Known Issues
100 | ------------
101 |
102 | * HTML tag names that contain special characters (dashes) cannot be selected, as they violate Python's generator comprehension syntax. HTML attributes containing dashes, e.g. ``data-bind``, work normally.
103 | * The use of ```all``` is quite buggy, e.g. the following return incorrect expressions:
104 |
105 | ```python
106 | >>> xpath(X for X in DOM if all(p.id in ('a', 'b') for p in X))
107 | "//*[not(.//p/@id='a' or //p/@id='b')]" # expected "//*[not(.//p[./@id!='a' and ./@id!='b'])]"
108 | >>> xpath(X for X in DOM if all('x' in p.id for p in X))
109 | "//*[not(.contains(@id, //p))]" # expected "//*[not(.//p[not(contains(@id, 'x'))])]"
110 | ```
111 |
112 | Contacts
113 | --------
114 |
115 | * Name: [H. Chase Stevens](http://www.chasestevens.com)
116 | * Twitter: [@hchasestevens](https://twitter.com/hchasestevens)
117 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | lxml==3.4.2
2 | pony==0.6.1
3 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name='xpyth',
5 | packages=['xpyth'],
6 | version='0.2.0',
7 | description='Generate XPath expressions from Python comprehensions',
8 | license='MIT',
9 | author='H. Chase Stevens',
10 | author_email='chase@chasestevens.com',
11 | url='https://github.com/hchasestevens/xpyth',
12 | install_requires=[
13 | 'lxml>=4.1.1',
14 | 'pony>=0.7.3',
15 | ],
16 | tests_require=['pytest>=3.1.2'],
17 | extras_require={'dev': ['pytest>=3.1.2']},
18 | keywords='xpath xml html',
19 | classifiers=[
20 | 'Development Status :: 2 - Pre-Alpha',
21 | 'Programming Language :: Python',
22 | 'Programming Language :: Python :: 2',
23 | 'Programming Language :: Python :: 2.7',
24 | 'Programming Language :: Python :: 3',
25 | 'Programming Language :: Python :: 3.6',
26 | 'Intended Audience :: Developers',
27 | 'Operating System :: OS Independent',
28 | 'License :: OSI Approved :: MIT License',
29 | 'Natural Language :: English',
30 | 'Topic :: Internet :: WWW/HTTP',
31 | 'Topic :: Software Development :: Code Generators',
32 | ]
33 | )
--------------------------------------------------------------------------------
/tests/unit/test_xpyth.py:
--------------------------------------------------------------------------------
1 | """Unit tests for xpyth."""
2 |
3 | import re
4 |
5 | import pytest
6 |
7 | from lxml import etree
8 | from pony.orm.decompiling import Decompiler
9 |
10 | from xpyth import xpath, DOM, X, query
11 |
12 |
13 | def test_iter_insertion():
14 | """Ensure custom node inserted as comprehension iterator."""
15 | assert (div for div in DOM).gi_frame.f_locals['.0'] is DOM
16 |
17 |
18 | @pytest.mark.parametrize('comprehension,expected_expression', (
19 | ((div for div in DOM), '//div'),
20 | ((span for div in DOM for span in div), '//div//span'),
21 | ((span.cls for div in DOM for span in div), '//div//span/@class'),
22 | ((span.text for span in DOM), '//span/text()'),
23 | ((span for span in DOM if span.name == 'main'), "//span[@name='main']"),
24 | ((div for span in DOM if span.name == 'main' for div in span), "//span[@name='main']//div"),
25 | ((div for span in DOM for div in span if span.name == 'main'), "//span[@name='main']//div"),
26 | ((div for span in DOM if span.name == 'main' for div in span if div.cls == 'row'), "//span[@name='main']//div[@class='row']"),
27 | ((div for span in DOM for div in span if div.cls == 'row' and span.name == 'main'), "//span[@name='main']//div[@class='row']"), # tricky case - need to dissect And
28 | ((a for a in DOM if a.href == 'http://www.google.com' and a.name == 'goog'), "//a[@href='http://www.google.com' and @name='goog']"),
29 | ((a for a in DOM if '.com' in a.href), "//a[contains(@href, '.com')]"),
30 | ((a for a in DOM if '.com' not in a.href), "//a[not(contains(@href, '.com'))]"),
31 | ((a for a in DOM if not '.com' in a.href), "//a[not(contains(@href, '.com'))]"),
32 | ((div for div in DOM if div.id != 'main'), "//div[@id!='main']"),
33 | ((div for div in DOM if not div.id == 'main'), "//div[not(@id='main')]"),
34 | ((X for X in DOM if X.name == 'main'), "//*[@name='main']"),
35 | ((span for div in DOM for X in div.following_siblings for span in X.children), '//div/following-sibling::*/span'),
36 | ((a.href for a in DOM if any(p for p in a.following_siblings)), '//a[./following-sibling::p]/@href'),
37 | ((a.href for a in DOM if any(p for p in a.following_siblings if p.id)), '//a[./following-sibling::p[@id]]/@href'),
38 | ((X for X in DOM if any(p for p in DOM)), '//*[//p]'),
39 | ((span for div in DOM for span in div if div.id in ('main', 'other')), "//div[@id='main' or @id='other']//span"),
40 | ((X for X in DOM if X.name in ('a', 'b', 'c')), "//*[@name='a' or @name='b' or @name='c']"),
41 | ((X for X in DOM if all(p for p in X if p.id == 'a')), "//*[not(.//p[not(@id='a')])]"),
42 | ((X for X in DOM if all(p for p in DOM if p.id == 'a')), "//*[not(//p[not(@id='a')])]"),
43 | ((X for X in DOM if any(p.id == 'a' for p in X)), "//*[.//p/@id='a']"),
44 | ((X for X in DOM if all(not p.id == 'a' for p in X)), "//*[not(.//p/@id!='a')]"),
45 | ((X for X in DOM if all(not p.id != 'a' for p in X)), "//*[not(.//p/@id='a')]"),
46 | ((X for X in DOM if len(td for td in X.following_siblings) == 0), "//*[count(./following-sibling::td)=0]"),
47 | ((td.text for td in DOM if td.cls == 'wideonly' and len(td for td in td.following_siblings) == 0), "//td[@class='wideonly' and count(./following-sibling::td)=0]/text()"),
48 | ((X for X in DOM if X.data-bind == 'a'), "//*[@data-bind='a']"),
49 | ((X.data-bind for X in DOM), "//*/@data-bind"),
50 |
51 | pytest.mark.skip(((form.action for form in DOM if all(input.name == 'a' for input in form.children)), "//form[not(./input/@name!='a')]/@action")),
52 | pytest.mark.skip(((X for X in DOM if all(p.id in ('a', 'b') for p in X)), "//*[not(.//p[./@id!='a' and ./@id!='b'])]")),
53 | pytest.mark.skip(((X for X in DOM if all('x' in p.id for p in X)), "//*[not(.//p[not(contains(@id, 'x'))])]")), # Gives //*[not(.contains(@id, //p))]
54 |
55 | # TODO: position (e.g. xpath(a for a in (a for a in DOM)[:20]) ???)
56 | # TODO: position (e.g. xpath(a for X in DOM for a in X[20:]) ???)
57 | ))
58 | def test_expression_generation(comprehension, expected_expression):
59 | """Ensure comprehensions are transformed into expected XPath expressions."""
60 | try:
61 | expr = xpath(comprehension)
62 | assert expr == expected_expression
63 | except AssertionError:
64 | ast = Decompiler(comprehension.gi_code).ast
65 | print(ast)
66 | print()
67 | raise
68 |
69 |
70 | def test_context():
71 | """Ensure local context is handled correct when constructing expression."""
72 | allowed_values = 'a b c'.split()
73 | comprehension = (X for X in DOM if X.name in allowed_values)
74 | expected_expression = "//*[@name='a' or @name='b' or @name='c']"
75 | assert xpath(comprehension) == expected_expression
76 |
77 |
78 | def test_lxml():
79 | """Ensure lxml compatibility."""
80 | tree = etree.fromstring('''
81 |
82 |
83 |
Google
84 |
Not Google
85 |
Lorem ipsum
86 |
no numbers here
87 |
123
88 |
89 |
93 |
94 | ''')
95 | assert len(query(a for a in tree)) == 4
96 | assert query(a for a in tree if 'Not Google' in a.text)[0].attrib.get('href') != 'http://www.google.com'
97 | assert query(a for a in tree if 'Not Google' not in a.text)[0].attrib.get('href') == 'http://www.google.com'
98 | assert next(
99 | node
100 | for node in
101 | query(
102 | p
103 | for p in
104 | tree
105 | if node.id
106 | )
107 | if re.match(r'\D+', node.attrib.get('id'))
108 | ).text == '123'
109 | assert query( # switch between xpyth and regular comprehensions
110 | a
111 | for a in
112 | next(
113 | node
114 | for node in
115 | query(
116 | div
117 | for div in
118 | tree
119 | )
120 | if re.match(r'\d+', node.attrib.get('id'))
121 | )
122 | if 'google' in a.href
123 | )[0].text == 'Google Charity'
124 | assert set(query(
125 | a.href
126 | for a in
127 | tree
128 | if any(
129 | p
130 | for p in
131 | a.following_siblings
132 | )
133 | )) == {'http://www.google.com', 'http://www.chasestevens.com'}
134 | assert set(query(
135 | a.href
136 | for a in
137 | tree
138 | if not any(
139 | p
140 | for p in
141 | a.following_siblings
142 | )
143 | )) == {'http://www.google.org', 'http://www.chasestevens.org'}
144 | assert set(query(
145 | a.href
146 | for a in
147 | tree
148 | if not any(
149 | p
150 | for p in
151 | a.following_siblings
152 | )
153 | and any(
154 | p
155 | for p in
156 | a.following_siblings
157 | )
158 | )) == set()
159 | assert set(query(
160 | a.href
161 | for a in
162 | tree
163 | if any(
164 | p
165 | for p in
166 | tree
167 | )
168 | )) == {'http://www.google.com', 'http://www.chasestevens.com', 'http://www.google.org', 'http://www.chasestevens.org'}
169 | assert not query(
170 | a.href
171 | for a in
172 | tree
173 | if not any(
174 | p
175 | for p in
176 | tree
177 | )
178 | )
179 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | skip_missing_interpreters = True
3 | envlist = py27,py36
4 |
5 | [testenv]
6 | extras = dev
7 | commands = py.test ./tests
8 |
--------------------------------------------------------------------------------
/xpyth/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | from functools import reduce
3 | except ImportError:
4 | pass
5 |
6 | from pony.orm.decompiling import Decompiler
7 | from pony.thirdparty.compiler.ast import *
8 | from lxml import etree
9 |
10 | import ctypes
11 | import collections
12 | import functools
13 |
14 |
15 | __all__ = 'DOM X xpath query'.split()
16 | __author__ = 'H. Chase Stevens'
17 |
18 |
19 | DEBUG = False
20 |
21 |
22 | class _DOM(object):
23 | def __iter__(self):
24 | return self
25 |
26 | def __next__(self):
27 | return self
28 | next = __next__
29 |
30 |
31 | DOM = _DOM()
32 |
33 |
34 | class X:
35 | '''Wildcard, to give autocomplete suggestions.'''
36 | (text,
37 | ancestors,
38 | ancestors_or_self,
39 | children,
40 | descendants,
41 | descendants_or_self,
42 | following,
43 | followings,
44 | following_siblings,
45 | parent,
46 | parents,
47 | preceding,
48 | precedings,
49 | preceding_siblings,
50 | self) = [None] * 15
51 |
52 |
53 | def xpath(g):
54 | """Returns XPath expression corresponding to generator."""
55 | assert g.gi_frame.f_locals['.0'] == DOM, "Only root-level expressions are supported."
56 | ast = Decompiler(g.gi_code).ast
57 | frame_locals = g.gi_frame.f_locals
58 | frame_globals = g.gi_frame.f_globals
59 | frame_globals.update(frame_locals) # Any danger in this?
60 | expression = _handle_genexpr(ast, frame_globals)
61 | try:
62 | etree.XPath(expression) # Verify syntax
63 | except etree.XPathSyntaxError:
64 | raise etree.XPathSyntaxError(expression)
65 | return expression
66 |
67 |
68 | def query(g):
69 | """Queries a DOM tree (lxml Element)."""
70 | try:
71 | dom = next(g.gi_frame.f_locals['.0']).getparent() # lxml # TODO: change for selenium etc.
72 | except StopIteration:
73 | return [] # copying what lxml does
74 |
75 | # Magic to convert our generator into a DOM-generator (http://pydev.blogspot.co.uk/2014/02/changing-locals-of-frame-frameflocals.html)
76 | g.gi_frame.f_locals['.0'] = DOM
77 | ctypes.pythonapi.PyFrame_LocalsToFast(ctypes.py_object(g.gi_frame), ctypes.c_int(0))
78 |
79 | expression = '.' + xpath(g)
80 |
81 | method_names = (
82 | 'xpath', # lxml ElementTree
83 | 'findall', # xml ElementTree
84 | 'find_elements_by_xpath', # selenium WebDriver/WebElement
85 | )
86 | for method_name in method_names:
87 | try:
88 | xpath_method = getattr(dom, method_name)
89 | break
90 | except AttributeError:
91 | pass
92 | else:
93 | raise NotImplementedError(dom.__class__.__name__)
94 |
95 | return xpath_method(expression)
96 |
97 |
98 | _ATTR_REPLACEMENTS = {
99 | 'cls': 'class',
100 | '__class__': 'class',
101 | }
102 |
103 | _ATTR_FORMAT_OVERRIDES = {
104 | 'text': '{}()',
105 | }
106 |
107 | _COMPARE_OP_REPLACEMENTS = {
108 | '==': '=',
109 | 'in': 'contains',
110 | }
111 |
112 | _COMPARE_OP_FORMAT_OVERRIDES = {
113 | 'contains': '{1}({2}, {0})',
114 | 'not in': 'not(contains({2}, {0}))',
115 | }
116 |
117 | _COMPARE_OP_OPPOSITES = {
118 | '==': '!=',
119 | 'in': 'not in',
120 | '>': '<=',
121 | '<': '>=',
122 | }
123 | _COMPARE_OP_OPPOSITES.update({v: k for k, v in _COMPARE_OP_OPPOSITES.items()})
124 | _COMPARE_OP_OPPOSITES['='] = '!='
125 |
126 | _GENEXPRFOR_GETATTR_SEP_OVERRIDES = {
127 | 'ancestors': '/ancestor::',
128 | 'ancestors_or_self': '/ancestor-or-self::',
129 | 'children': '/',
130 | 'descendants': '/descendant::',
131 | 'descendants_or_self': '/descendant-or-self::',
132 | 'following': '/following::',
133 | 'followings': '/following::',
134 | 'following_siblings': '/following-sibling::',
135 | 'parent': '/parent::',
136 | 'parents': '/parent::',
137 | 'preceding': '/preceding::',
138 | 'precedings': '/preceding::',
139 | 'preceding_siblings': '/preceding-sibling::',
140 | 'self': '/self::',
141 | }
142 |
143 |
144 | def _root_level(genexpr, frame_locals):
145 | genexprfor_src = genexpr.code.quals[0].getChildren()[1]
146 | if genexprfor_src.__class__ == Name:
147 | name = genexprfor_src.name
148 | known_dom = name in ('DOM', '.0')
149 | return known_dom or isinstance(frame_locals.get(name), etree._Element)
150 |
151 |
152 | def _get_highest_src(if_, ranked_srcs):
153 | ntype = if_.__class__
154 |
155 | if ntype == GenExprIf:
156 | return _get_highest_src(if_.test, ranked_srcs)
157 |
158 | if ntype in (Name, AssName):
159 | return [if_.name]
160 |
161 | if hasattr(if_, 'getChildren'):
162 | srcs = [
163 | src
164 | for child in
165 | if_.getChildren()
166 | for src in
167 | _get_highest_src(child, ranked_srcs)
168 | if src in ranked_srcs
169 | ]
170 | if srcs:
171 | return [sorted(srcs, key=ranked_srcs.index)[0]]
172 |
173 | return []
174 |
175 |
176 | def _subtree_handler_factory():
177 | SUBTREE_HANDLERS = {}
178 |
179 | def _subtree_handler(*ntypes, **kwargs):
180 | supply_ast = kwargs.get('supply_ast', False)
181 | def decorator(f):
182 | @functools.wraps(f)
183 | def wrapper(ast_subtree, frame_locals, relative=False):
184 | children = ast_subtree.getChildren()
185 | result = f(ast_subtree if supply_ast else children, frame_locals, relative)
186 | if DEBUG:
187 | print(f.__name__)
188 | print(result)
189 | print()
190 | return result
191 | for ntype in ntypes:
192 | SUBTREE_HANDLERS[ntype] = wrapper
193 | return wrapper
194 | return decorator
195 |
196 | def _dispatch(subtree):
197 | """Choose appropriate subtree handler for subtree type"""
198 | ntype = subtree.__class__
199 | try:
200 | return functools.partial(SUBTREE_HANDLERS[ntype], subtree)
201 | except KeyError:
202 | raise NotImplementedError(ntype.__name__)
203 |
204 | return _subtree_handler, _dispatch
205 |
206 | _subtree_handler, _dispatch = _subtree_handler_factory()
207 |
208 |
209 | @_subtree_handler(GenExpr)
210 | def _handle_genexpr(children, frame_locals, relative):
211 | child, = children
212 | rel = '.' if relative else ''
213 | assert child.__class__ == GenExprInner # TODO: remove
214 | return rel + _handle_genexprinner(child, frame_locals)
215 |
216 |
217 | @_subtree_handler(GenExprInner)
218 | def _handle_genexprinner(children, frame_locals, relative):
219 | name = children[0]
220 | fors = children[1:]
221 | rel = '.' if relative else ''
222 |
223 | # Rearrange tree if returning booleans, not nodes (all, any)
224 | return_type = name.__class__
225 | if return_type in (Compare, Not, And, Or):
226 | if return_type in (And, Or):
227 | raise NotImplementedError("Conjunction and disjunction not supported as return type of generator.")
228 | if return_type == Not:
229 | name = name.expr
230 | assert name.__class__ == Compare
231 | ops = name.ops
232 | if ops:
233 | (op, val), = ops
234 | ops = [(_COMPARE_OP_OPPOSITES[op], val)]
235 | else:
236 | ops = name.ops
237 | new_tree = Compare(
238 | GenExprInner(
239 | name.expr,
240 | fors
241 | ),
242 | ops
243 | )
244 | return rel + _dispatch(new_tree)(frame_locals) # TODO: replace with Compare, since we know this
245 |
246 | # Rearrange ifs
247 | for_srcs = {for_.assign.name: for_ for for_ in fors if for_.__class__}
248 | ranked_srcs = (for_.getChildren()[1] for for_ in fors)
249 | ranked_src_names = [
250 | src.getChildren()[0].name
251 | if src.__class__ == Getattr
252 | else src.name
253 | for src in
254 | ranked_srcs
255 | ]
256 | for for_ in fors:
257 | for_src = for_.assign.name
258 |
259 | # decompose Ands
260 | ifs = for_.ifs[:]
261 | for if_ in ifs:
262 | try:
263 | test = if_.test
264 | except AttributeError: # e.g. Not has no test attr
265 | continue
266 | if isinstance(test, And):
267 | for_.ifs.remove(if_)
268 | for_.ifs.extend([GenExprIf(node) for node in test.nodes])
269 |
270 | # shuffle conditionals around so that they test the appropriate level
271 | ifs = for_.ifs[:]
272 | for if_ in ifs:
273 | highest_src = _get_highest_src(if_, ranked_src_names)
274 | if not highest_src:
275 | continue
276 | highest_src, = highest_src
277 | if highest_src != for_src:
278 | for_srcs[highest_src].ifs.append(if_)
279 | try:
280 | for_.ifs.remove(if_)
281 | except ValueError: # we constructed this conditional artificially
282 | pass
283 |
284 | # conjoin any loose conditionals
285 | if len(for_.ifs) > 1:
286 | for_.ifs = [reduce(lambda x, y: And([x, y]), for_.ifs)]
287 |
288 | assert all(for_.__class__ == GenExprFor for for_ in fors) # TODO: remove
289 | fors = ''.join([_handle_genexprfor(for_, frame_locals) for for_ in fors])
290 | if return_type in (Getattr, Sub):
291 | return '{}/{}'.format(fors, _dispatch(name)(frame_locals))
292 | return fors
293 |
294 |
295 | @_subtree_handler(Name, AssName, supply_ast=True)
296 | def _handle_name(ast_subtree, frame_locals, relative=False):
297 | name = ast_subtree.name
298 | if name == '.0':
299 | return '.'
300 | if name == 'X':
301 | return '*'
302 | return name
303 |
304 |
305 | @_subtree_handler(GenExprFor)
306 | def _handle_genexprfor(children, frame_locals, relative):
307 | name, src = children[:2]
308 | conds = children[2:]
309 | sep = '//'
310 | if isinstance(src, Getattr):
311 | sep = _GENEXPRFOR_GETATTR_SEP_OVERRIDES.get(src.attrname, '//')
312 | if not conds:
313 | # TODO: determine type of name
314 | return '{}{}'.format(sep, _dispatch(name)(frame_locals)) # slashes are contingent on src
315 | # TODO: determine type of conds
316 | return '{}{}[{}]'.format(sep, _dispatch(name)(frame_locals), _dispatch(conds[0])(frame_locals)) # 0?
317 |
318 |
319 | @_subtree_handler(Getattr)
320 | def _handle_getattr(children, frame_locals, relative):
321 | name, attr = children
322 | attr = _ATTR_REPLACEMENTS.get(attr, attr)
323 | # this might need to be context-sensitive... Almost assuredly, actually
324 | # consider: .//div/@class, .//div[./@class='x']
325 | return _ATTR_FORMAT_OVERRIDES.get(attr, '@{}').format(attr)
326 |
327 |
328 | @_subtree_handler(GenExprIf)
329 | def _handle_genexprif(children, frame_locals, relative):
330 | rel = '.' if relative else ''
331 | if len(children) == 1:
332 | return _dispatch(children[0])(frame_locals) # TODO: see if child type is consistent
333 | raise NotImplementedError(children)
334 |
335 |
336 | @_subtree_handler(Compare)
337 | def _handle_compare(children, frame_locals, relative):
338 | rel = '.' if relative else ''
339 |
340 | if len(children) == 3:
341 | n1, op, n2 = children
342 | if n2.__class__ == Name:
343 | # Special case - drag in from outer scope if we're checking inclusion of value in iterable
344 | local = frame_locals.get(n2.name)
345 | if isinstance(local, collections.Iterable) and op == 'in':
346 | n2 = Const(local)
347 | if op == 'in' and n2.__class__ == Const and not isinstance(n2.value, str):
348 | # Special case - checking whether value is in iterable
349 | comparisons = [Compare(n1, ('==', Const(val))) for val in n2.value]
350 | return rel + _handle_or(Or(comparisons), frame_locals)
351 | op = _COMPARE_OP_REPLACEMENTS.get(op, op)
352 | format_str = _COMPARE_OP_FORMAT_OVERRIDES.get(op, '{}{}{}')
353 | return format_str.format(rel + _dispatch(n1)(frame_locals), op, rel + _dispatch(n2)(frame_locals))
354 | raise NotImplementedError(children)
355 |
356 |
357 | @_subtree_handler(Const, supply_ast=True)
358 | def _handle_const(ast_subtree, frame_locals, relative=False):
359 | return repr(ast_subtree.value)
360 |
361 |
362 | @_subtree_handler(And)
363 | def _handle_and(children, frame_locals, relative):
364 | rel = '.' if relative else ''
365 | return ' and '.join(rel + _dispatch(child)(frame_locals) for child in children)
366 |
367 |
368 | @_subtree_handler(Or)
369 | def _handle_or(children, frame_locals, relative):
370 | rel = '.' if relative else ''
371 | return ' or '.join(rel + _dispatch(child)(frame_locals) for child in children)
372 |
373 |
374 | @_subtree_handler(Not)
375 | def _handle_not(children, frame_locals, relative):
376 | child, = children
377 | rel = '.' if relative else ''
378 | return 'not({})'.format(rel + _dispatch(child)(frame_locals))
379 |
380 |
381 | @_subtree_handler(Sub)
382 | def _handle_sub(children, frame_locals, relative):
383 | return '-'.join(_dispatch(child)(frame_locals) for child in children)
384 |
385 |
386 | @_subtree_handler(CallFunc)
387 | def _handle_callfunc(children, frame_locals, relative):
388 | rel = '.' if relative else ''
389 | if isinstance(children[0], Name):
390 | func_name = children[0].name
391 | is_relative = lambda: not _root_level(children[1], frame_locals)
392 | if func_name == 'any':
393 | return rel + _dispatch(children[1])(frame_locals, is_relative())
394 | if func_name == 'len':
395 | return 'count({})'.format(rel + _dispatch(children[1])(frame_locals, is_relative()))
396 | elif func_name == 'all':
397 | # Need to change (\all x. P) to (\not \exists x. \not P)
398 | genexprinner = children[1].getChildren()[0]
399 | assert genexprinner.__class__ == GenExprInner
400 | name, genexprfor = genexprinner.getChildren()
401 | gef_assname, gef_name = genexprfor.getChildren()[:2]
402 | gef_ifs = genexprfor.ifs
403 | new_tree = Not(
404 | GenExpr(
405 | GenExprInner(
406 | name,
407 | [GenExprFor(
408 | gef_assname,
409 | gef_name,
410 | [Not(gef_ifs[0])] if gef_ifs else []
411 | )]
412 | )
413 | )
414 | )
415 | return rel + _handle_not(new_tree, frame_locals, is_relative())
416 | raise NotImplementedError(children)
417 |
--------------------------------------------------------------------------------