├── tests
    ├── __init__.py
    ├── test_html.py
    └── test_xml.py
├── xq
    ├── __init__.py
    └── __main__.py
├── setup.py
├── README.rst
└── .gitignore


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/xq/__init__.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.0.4'
2 | NAME = 'xq'
3 | DESCRIPTION = 'Like jq but for XML and XPath.'
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from codecs import open
 3 | 
 4 | from xq import VERSION, NAME, DESCRIPTION
 5 | 
 6 | with open('README.rst', encoding='utf-8') as f:
 7 |     long_description = f.read()
 8 | 
 9 | setup(
10 |     name=NAME,
11 |     description=DESCRIPTION,
12 |     long_description=long_description,
13 |     keywords='xml xpath text',
14 |     version=VERSION,
15 |     license='MPL 2.0',
16 | 
17 |     author='Ben Jeffrey',
18 |     author_email='mail@benjeffrey.net',
19 |     url='https://github.com/jeffbr13/xq',
20 | 
21 |     classifiers=[
22 |         'Development Status :: 3 - Alpha',
23 |         'Programming Language :: Python :: 3',
24 |         'Intended Audience :: Developers',
25 |         'Environment :: Console',
26 |         'Topic :: Terminals',
27 |         'Topic :: Text Processing :: Markup :: XML',
28 |         'Topic :: Utilities',
29 |         'License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)',
30 |     ],
31 | 
32 |     packages=find_packages(),
33 | 
34 |     install_requires=[
35 |         'lxml',
36 |         'pygments'
37 |     ],
38 | 
39 |     entry_points={
40 |         'console_scripts': [
41 |             'xq=xq.__main__:main',
42 |         ],
43 |     },
44 | )
45 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | xq
 2 | ==
 3 | 
 4 | Apply XPath expressions to XML, like ``jq`` does for JSONPath and JSON.
 5 | 
 6 | 
 7 | Installation
 8 | ------------
 9 | 
10 | Install with ``pip``::
11 | 
12 |     pip install xq
13 | 
14 | Or download the repo and install via ``setuptools``::
15 | 
16 |     python setup.py install
17 | 
18 | 
19 | Usage
20 | -----
21 | 
22 | Extract download URLs from an RSS feed::
23 | 
24 |     http get 'http://br-rss.jeffbr13.net/rss/channels/1/' | xq '//item/enclosure/@url'
25 | 
26 | 
27 | Extract all links from an HTML page footer::
28 | 
29 |     http get 'http://br-rss.jeffbr13.net/ | xq '//footer//a/@href'
30 | 
31 | 
32 | Test
33 | ----
34 | 
35 | Run ``unittest`` in the root directory to autodetect and run tests::
36 | 
37 |     python -m unittest
38 | 
39 | 
40 | Build
41 | -----
42 | 
43 | Increment ``xq.VERSION`` and run the following two commands
44 | to create a `source distribution <https://packaging.python.org/tutorials/distributing-packages/#source-distributions>`_,
45 | create a `universal wheel <https://packaging.python.org/tutorials/distributing-packages/#universal-wheels>`_,
46 | and `upload to PyPI <https://packaging.python.org/tutorials/distributing-packages/#upload-your-distributions>`_ ::
47 | 
48 |     python setup.py sdist
49 |     python setup.py bdist_wheel --universal
50 |     twine upload dist/*
51 | 
52 | 
53 | See Also
54 | --------
55 | 
56 | - `jq <https://github.com/stedolan/jq>`_
57 | - `hq <https://github.com/rbwinslow/hq>`_
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by .ignore support plugin (hsz.mobi)
  2 | ### Python template
  3 | # Byte-compiled / optimized / DLL files
  4 | __pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | .static_storage/
 58 | .media/
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 
108 | 


--------------------------------------------------------------------------------
/xq/__main__.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | from typing import Union
 4 | 
 5 | from lxml import etree
 6 | from lxml.builder import E
 7 | from pygments import highlight
 8 | from pygments.formatters.other import NullFormatter
 9 | from pygments.formatters.terminal import TerminalFormatter
10 | from pygments.lexers.html import XmlLexer
11 | 
12 | from . import NAME, DESCRIPTION, VERSION
13 | 
14 | 
15 | def wrap_in_results(elements: [Union[etree._Element, etree._ElementUnicodeResult]]) -> etree._Element:
16 |     results = E.results()
17 |     for el in elements:
18 |         results.append(E.result(el))
19 |     return results
20 | 
21 | 
22 | def apply_xpath(infile, xpath_query=None, colorize=False):
23 |     try:
24 |         parsed = etree.parse(infile, etree.XMLParser(remove_blank_text=True))
25 |     except etree.XMLSyntaxError:
26 |         parsed = etree.parse(infile, etree.HTMLParser(remove_blank_text=True))
27 | 
28 |     if xpath_query:
29 |         matches = parsed.xpath(xpath_query)
30 |         results = wrap_in_results(matches)
31 |         output = etree.tostring(results, pretty_print=True)
32 |     else:
33 |         output = etree.tostring(parsed, pretty_print=True)
34 | 
35 |     formatter = TerminalFormatter() if colorize else NullFormatter()
36 |     return highlight(output, XmlLexer(), formatter)
37 | 
38 | 
39 | def main():
40 |     parser = argparse.ArgumentParser(
41 |         prog=NAME,
42 |         description=DESCRIPTION,
43 |     )
44 |     parser.add_argument('-v', '--version', action='version', version='%(prog)s ' + VERSION)
45 |     parser.add_argument(
46 |         'xpath_query', nargs='?', type=str,
47 |         help='XPath query to apply to XML document.'
48 |     )
49 |     parser.add_argument(
50 |         'file', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
51 |         help='XML file to process. Defaults to STDIN.',
52 |     )
53 |     args = parser.parse_args()
54 |     sys.stdout.write(apply_xpath(args.file, args.xpath_query, sys.stdout.isatty()))
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     main()
59 | 


--------------------------------------------------------------------------------
/tests/test_html.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from io import StringIO
 3 | from lxml import etree
 4 | from lxml.builder import E
 5 | 
 6 | from xq.__main__ import apply_xpath
 7 | 
 8 | SAMPLE_HTML = """
 9 | <!DOCTYPE html>
10 | <html>
11 | <head>
12 |     <title>Page Title</title>
13 |     <meta charset="utf-8">
14 |     <link rel="alternate" type="application/rss+xml" href="/rss/" />
15 |     <link rel="shortcut icon" href="/favicon.ico" />
16 | </head>
17 | <body>
18 |     <header>
19 |         <h1>Heading 1</h1>
20 |     </header>
21 |     <main>
22 |         <p>Paragraph 1 with a <a href="/url/1">link</a>.</p>
23 |         <h1>Heading 2</h1>
24 |         <p>Paragraph 2.</p>
25 |     </main>
26 |     <hr>
27 |     <footer>
28 |         <p><a href="/url/2">🛠</a></p>
29 |     </footer>
30 | </body>
31 | </html>
32 | """
33 | 
34 | 
35 | class TestHtmlXpathExpressions(unittest.TestCase):
36 |     def setUp(self):
37 |         self.test_input = StringIO(SAMPLE_HTML)
38 | 
39 |     def tearDown(self):
40 |         self.test_input.close()
41 | 
42 |     def test_extract_elements(self):
43 |         expected_output = ("<results>\n"
44 |                            "  <result><p>Paragraph 1 with a <a href=\"/url/1\">link</a>.</p>\n"
45 |                            "        </result>\n"
46 |                            "  <result><p>Paragraph 2.</p>\n"
47 |                            "    </result>\n"
48 |                            "  <result><p><a href=\"/url/2\">&#128736;</a></p>\n"
49 |                            "    </result>\n"
50 |                            "</results>\n")
51 | 
52 |         self.assertEqual(
53 |             apply_xpath(self.test_input, '//p', colorize=False),
54 |             expected_output
55 |         )
56 | 
57 |     def test_extract_attributes(self):
58 |         expected_output = etree.tounicode(
59 |             E.results(
60 |                 E.result('/url/1'),
61 |                 E.result('/url/2'),
62 |             ),
63 |             pretty_print=True
64 |         )
65 |         self.assertEqual(
66 |             apply_xpath(self.test_input, '//a/@href', colorize=False),
67 |             expected_output
68 |         )
69 | 
70 |     def test_extract_text(self):
71 |         expected_output = etree.tounicode(
72 |             E.results(
73 |                 E.result('Heading 1'),
74 |                 E.result('Heading 2'),
75 |             ),
76 |             pretty_print=True
77 |         )
78 |         self.assertEqual(
79 |             apply_xpath(self.test_input, '//h1/text()', colorize=False),
80 |             expected_output
81 |         )
82 | 


--------------------------------------------------------------------------------
/tests/test_xml.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | from io import StringIO
  3 | from lxml import etree
  4 | 
  5 | from lxml.builder import E
  6 | 
  7 | from xq.__main__ import apply_xpath
  8 | 
  9 | 
 10 | SAMPLE_XML = """
 11 | <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
 12 |     <channel>
 13 |         <atom:link rel="self" type="application/rss+xml" href="http://example.com/rss"/>
 14 |         <language>en-gb</language>
 15 | 
 16 |         <title>Example RSS feed</title>
 17 |         <description>This is a podcast RSS feed</description>
 18 |         <itunes:summary>This is a podcast RSS feed</itunes:summary>
 19 |         <itunes:image href="http://example.com/rss.png"/>
 20 |         <itunes:category text="Music"/>
 21 |         <itunes:explicit>yes</itunes:explicit>
 22 |         <copyright>Example</copyright>
 23 | 
 24 |         <item>
 25 |             <guid>http://example.com/rss/1</guid>
 26 |             <title>Episode 1</title>
 27 |             <description>This is the first podcast episode.</description>
 28 |             <itunes:summary>This is the first podcast episode</itunes:summary>
 29 |             <pubDate>Wed, 13 Sep 2017 10:49:56 +0000</pubDate>
 30 |             <itunes:duration>2:00:00</itunes:duration>
 31 |             <enclosure url="http://example.com/rss/1/download" length="287686422" type="audio/mp3"/>
 32 |         </item>
 33 | 
 34 |         <item>
 35 |             <guid>http://example.com/rss/2</guid>
 36 |             <title>Episode 2</title>
 37 |             <description>This is the second podcast episode.</description>
 38 |             <itunes:summary>This is the second podcast episode</itunes:summary>
 39 |             <pubDate>Mon, 11 Sep 2017 11:48:21 +0000</pubDate>
 40 |             <itunes:duration>1:00:00</itunes:duration>
 41 |             <enclosure url="http://example.com/rss/2/download" length="169357320" type="audio/mp3"/>
 42 |         </item>
 43 | 
 44 |         <item>
 45 |             <guid>http://example.com/rss/3</guid>
 46 |             <title>Episode 3</title>
 47 |             <description>This is the third podcast episode.</description>
 48 |             <itunes:summary>This is the third podcast episode</itunes:summary>
 49 |             <pubDate>Thu, 24 Aug 2017 00:50:10 +0000</pubDate>
 50 |             <itunes:duration>1:03:00</itunes:duration>
 51 |             <enclosure url="http://example.com/rss/3/download" length="151042044" type="audio/mp3"/>
 52 |         </item>
 53 |     </channel>
 54 | </rss>
 55 | """
 56 | 
 57 | 
 58 | class TestXmlXpathExpressions(unittest.TestCase):
 59 | 
 60 |     def setUp(self):
 61 |         self.test_input = StringIO(SAMPLE_XML)
 62 | 
 63 |     def tearDown(self):
 64 |         self.test_input.close()
 65 | 
 66 |     def test_extract_elements(self):
 67 |         expected_output = etree.tounicode(
 68 |             E.results(
 69 |                 E.result(
 70 |                     E.title('Episode 1')
 71 |                 ),
 72 |                 E.result(
 73 |                     E.title('Episode 2')
 74 |                 ),
 75 |                 E.result(
 76 |                     E.title('Episode 3')
 77 |                 ),
 78 |             ),
 79 |             pretty_print=True
 80 |         )
 81 |         self.assertEqual(expected_output, apply_xpath(self.test_input, './channel/item/title', colorize=False))
 82 | 
 83 |     def test_extract_single_attribute(self):
 84 |         expected_output = etree.tounicode(
 85 |             E.results(
 86 |                 E.result('http://example.com/rss/2/download'),
 87 |             ),
 88 |             pretty_print=True
 89 |         )
 90 |         self.assertEqual(expected_output, apply_xpath(self.test_input, './channel/item[2]/enclosure/@url', colorize=False))
 91 | 
 92 |     def test_extract_text(self):
 93 |         expected_output = etree.tounicode(
 94 |             E.results(
 95 |                 E.result('Episode 1'),
 96 |                 E.result('Episode 2'),
 97 |                 E.result('Episode 3'),
 98 |             ),
 99 |             pretty_print=True
100 |         )
101 |         self.assertEqual(expected_output, apply_xpath(self.test_input, './channel/item/title/text()', colorize=False))
102 | 


--------------------------------------------------------------------------------