├── .editorconfig
├── .git-blame-ignore-revs
├── .github
    └── workflows
    │   ├── checks.yml
    │   ├── publish.yml
    │   ├── tests-macos.yml
    │   ├── tests-ubuntu.yml
    │   └── tests-windows.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yml
├── AUTHORS
├── CHANGES
├── LICENSE
├── README.rst
├── cssselect
    ├── __init__.py
    ├── parser.py
    ├── py.typed
    └── xpath.py
├── docs
    ├── conf.py
    ├── conftest.py
    ├── index.rst
    └── requirements.txt
├── pyproject.toml
├── tests
    ├── __init__.py
    └── test_cssselect.py
└── tox.ini


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | indent_style = space
 6 | indent_size = 4
 7 | insert_final_newline = true
 8 | end_of_line = lf
 9 | 
10 | [*.{yml,yaml}]
11 | indent_size = 2
12 | 


--------------------------------------------------------------------------------
/.git-blame-ignore-revs:
--------------------------------------------------------------------------------
1 | # applying pre-commit hooks to the project
2 | e91101b37f82558db84a6b8ee9a6dba1fd2ae0bb


--------------------------------------------------------------------------------
/.github/workflows/checks.yml:
--------------------------------------------------------------------------------
 1 | name: Checks
 2 | on: [push, pull_request]
 3 | 
 4 | jobs:
 5 |   checks:
 6 |     runs-on: ubuntu-latest
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         include:
11 |         - python-version: 3.13
12 |           env:
13 |             TOXENV: pylint
14 |         - python-version: 3.13  # Keep in sync with .readthedocs.yml
15 |           env:
16 |             TOXENV: docs
17 |         - python-version: 3.13
18 |           env:
19 |             TOXENV: typing
20 |         - python-version: 3.13
21 |           env:
22 |             TOXENV: twinecheck
23 | 
24 |     steps:
25 |     - uses: actions/checkout@v4
26 | 
27 |     - name: Set up Python ${{ matrix.python-version }}
28 |       uses: actions/setup-python@v5
29 |       with:
30 |         python-version: ${{ matrix.python-version }}
31 | 
32 |     - name: Run check
33 |       env: ${{ matrix.env }}
34 |       run: |
35 |         pip install -U pip
36 |         pip install -U tox
37 |         tox
38 |   
39 |   pre-commit:
40 |     runs-on: ubuntu-latest
41 |     steps:
42 |     - uses: actions/checkout@v4
43 |     - uses: pre-commit/action@v3.0.1
44 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish
 2 | on:
 3 |   push:
 4 |     tags:
 5 |       - 'v[0-9]+.[0-9]+.[0-9]+'
 6 | 
 7 | jobs:
 8 |   publish:
 9 |     runs-on: ubuntu-latest
10 | 
11 |     environment:
12 |       name: pypi
13 |       url: https://pypi.org/p/cssselect
14 | 
15 |     permissions:
16 |       id-token: write
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v4
20 | 
21 |     - name: Set up Python
22 |       uses: actions/setup-python@v5
23 |       with:
24 |         python-version: 3.13
25 | 
26 |     - name: Build
27 |       run: |
28 |         python -m pip install --upgrade build
29 |         python -m build
30 | 
31 |     - name: Publish to PyPI
32 |       uses: pypa/gh-action-pypi-publish@release/v1
33 | 


--------------------------------------------------------------------------------
/.github/workflows/tests-macos.yml:
--------------------------------------------------------------------------------
 1 | name: macOS
 2 | on: [push, pull_request]
 3 | 
 4 | jobs:
 5 |   tests:
 6 |     runs-on: macos-latest
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v4
14 | 
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v5
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 | 
20 |     - name: Run tests
21 |       run: |
22 |         pip install -U pip
23 |         pip install -U tox
24 |         tox -e py
25 | 
26 |     - name: Upload coverage report
27 |       uses: codecov/codecov-action@v5
28 | 


--------------------------------------------------------------------------------
/.github/workflows/tests-ubuntu.yml:
--------------------------------------------------------------------------------
 1 | name: Ubuntu
 2 | on: [push, pull_request]
 3 | 
 4 | jobs:
 5 |   tests:
 6 |     runs-on: ubuntu-latest
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "pypy3.10"]
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v4
14 | 
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v5
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 | 
20 |     - name: Run tests
21 |       run: |
22 |         pip install -U pip
23 |         pip install -U tox
24 |         tox -e py
25 | 
26 |     - name: Upload coverage report
27 |       uses: codecov/codecov-action@v5
28 | 


--------------------------------------------------------------------------------
/.github/workflows/tests-windows.yml:
--------------------------------------------------------------------------------
 1 | name: Windows
 2 | on: [push, pull_request]
 3 | 
 4 | jobs:
 5 |   tests:
 6 |     runs-on: windows-latest
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
11 | 
12 |     steps:
13 |     - uses: actions/checkout@v4
14 | 
15 |     - name: Set up Python ${{ matrix.python-version }}
16 |       uses: actions/setup-python@v5
17 |       with:
18 |         python-version: ${{ matrix.python-version }}
19 | 
20 |     - name: Run tests
21 |       run: |
22 |         pip install -U pip
23 |         pip install -U tox
24 |         tox -e py
25 | 
26 |     - name: Upload coverage report
27 |       uses: codecov/codecov-action@v5
28 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.egg-info
 3 | /.tox
 4 | /MANIFEST
 5 | /dist
 6 | /docs/_build
 7 | /.coverage
 8 | .idea
 9 | htmlcov/
10 | coverage.xml
11 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/astral-sh/ruff-pre-commit
3 |   rev: v0.11.2
4 |   hooks:
5 |     - id: ruff
6 |       args: [ --fix ]
7 |     - id: ruff-format
8 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | formats: all
 3 | sphinx:
 4 |   configuration: docs/conf.py
 5 |   fail_on_warning: true
 6 | build:
 7 |   os: ubuntu-24.04
 8 |   tools:
 9 |     # For available versions, see:
10 |     # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python
11 |     python: "3.13"  # Keep in sync with .github/workflows/checks.yml
12 | python:
13 |   install:
14 |     - requirements: docs/requirements.txt
15 |     - path: .
16 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
 1 | Daniel Graña
 2 | Ian Bicking
 3 | James Salter
 4 | Laurence Rowe
 5 | Mikhail Korobov
 6 | Nik Nyby
 7 | Paul Tremberth
 8 | Simon Potter
 9 | Simon Sapin
10 | Stefan Behnel
11 | Thomas Grainger
12 | Varialus
13 | Arthur Darcet
14 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 
  4 | Version 1.3.0
  5 | -------------
  6 | 
  7 | Released on 2025-03-10.
  8 | 
  9 | *   Dropped support for Python 3.7-3.8, added support for Python 3.12-3.13 and
 10 |     PyPy 3.10.
 11 | 
 12 | *   Removed ``_unicode_safe_getattr()``, deprecated in 1.2.0.
 13 | 
 14 | *   Added ``pre-commit`` and formatted the code with ``ruff``.
 15 | 
 16 | *   Many CI additions and improvements.
 17 | 
 18 | 
 19 | Version 1.2.0
 20 | -------------
 21 | 
 22 | Released on 2022-10-27.
 23 | 
 24 | *   Drop support for Python 2.7, 3.4-3.6, add support for Python 3.7-3.11.
 25 | 
 26 | *   Add type annotations (PEP 484 and PEP 561).
 27 | 
 28 | *   More features from the CSS Selectors Level 4:
 29 | 
 30 |     *   The ``:is()`` pseudo-class.
 31 | 
 32 |     *   The ``:where()`` pseudo-class.
 33 | 
 34 |     *   The ``:has()`` pseudo-class, with some limitations.
 35 | 
 36 | *   Fix parsing ``:scope`` after a comma.
 37 | 
 38 | *   Add parentheses to fix condition precedence in some cases.
 39 | 
 40 | *   Private API changes related to the removal of the Python 2 support:
 41 | 
 42 |     * Remove ``_unicode`` and ``_unichr`` aliases from ``csselect.parser``.
 43 | 
 44 |     * Remove ``_basestring`` and ``_unicode`` aliases from ``csselect.xpath``.
 45 | 
 46 |     * Deprecate ``csselect.xpath._unicode_safe_getattr()`` and change it to just
 47 |       call ``getattr()``.
 48 | 
 49 | *   Include tests in the PyPI tarball.
 50 | 
 51 | *   Many CI additions and improvements.
 52 | 
 53 | *   Improve the test coverage.
 54 | 
 55 | 
 56 | Version 1.1.0
 57 | -------------
 58 | 
 59 | Released on 2019-08-09.
 60 | 
 61 | *   Support for the ``:scope`` selector, which allows to access immediate
 62 |     children of a selector.
 63 | 
 64 | *   Support for the ``|E`` syntax for type selectors without a namespace.
 65 | 
 66 | *   A new selector method, ``canonical``, returns the CSS expression of the
 67 |     selector, as a string.
 68 | 
 69 | 
 70 | Version 1.0.3
 71 | -------------
 72 | 
 73 | Released on 2017-12-27.
 74 | 
 75 | * Fix artifact uploads to pypi
 76 | 
 77 | 
 78 | Version 1.0.2
 79 | -------------
 80 | 
 81 | Released on 2017-12-26.
 82 | 
 83 | * Drop support for Python 2.6 and Python 3.3.
 84 | * Fix deprecation warning in Python 3.6.
 85 | * Minor cleanups.
 86 | 
 87 | 
 88 | Version 1.0.1
 89 | -------------
 90 | 
 91 | Released on 2017-01-10.
 92 | 
 93 | * Add support for Python 3.6.
 94 | * Documentation hosted `on Read the Docs <https://cssselect.readthedocs.io/>`_
 95 | 
 96 | 
 97 | Version 1.0.0
 98 | -------------
 99 | 
100 | Released on 2016-10-21.
101 | 
102 | * Add code coverage reports.
103 | * Fix ``:nth-*(an+b)`` pseudo-classes selectors.
104 |   (except ``*:nth-child()`` which looks untranslatable to XPath 1.0.)
105 | 
106 | 
107 | Version 0.9.2
108 | -------------
109 | 
110 | Released on 2016-06-15.
111 | 
112 | * Distribute as universal wheel.
113 | * Add support for Python 3.3, 3.4 and 3.5.
114 | * Drop support for Python 2.5 as testing is getting difficult.
115 | * Improve tests on pseudo-elements.
116 | 
117 | 
118 | Version 0.9.1
119 | -------------
120 | 
121 | Released on 2013-10-17.
122 | 
123 | * **Backward incompatible change from 0.9**:
124 |   :meth:`~GenericTranslator.selector_to_xpath` defaults to
125 |   ignoring pseudo-elements,
126 |   as it did in 0.8 and previous versions.
127 |   (:meth:`~GenericTranslator.css_to_xpath` doesn’t change.)
128 | * Drop official support for Python 2.4 and 3.1,
129 |   as testing was becoming difficult.
130 |   Nothing will break overnight,
131 |   but future releases may on may not work on these versions.
132 |   Older releases will remain available on PyPI.
133 | 
134 | 
135 | Version 0.9
136 | -----------
137 | 
138 | Released on 2013-10-11.
139 | 
140 | Add parser support for :attr:`functional
141 | pseudo-elements <Selector.pseudo_element>`.
142 | 
143 | *Update:*
144 | This version accidentally introduced a **backward incompatible** change:
145 | :meth:`~GenericTranslator.selector_to_xpath` defaults to
146 | rejecting pseudo-elements instead of ignoring them.
147 | 
148 | 
149 | Version 0.8
150 | -----------
151 | 
152 | Released on 2013-03-15.
153 | 
154 | Improvements:
155 | 
156 | * `#22 <https://github.com/SimonSapin/cssselect/issues/22>`_
157 |   Let extended translators override what XPathExpr class is used
158 | * `#19 <https://github.com/SimonSapin/cssselect/issues/19>`_
159 |   Use the built-in ``lang()`` XPath function
160 |   for implementing the ``:lang()`` pseudo-class
161 |   with XML documents.
162 |   This is probably faster than ``ancestor-or-self::``.
163 | 
164 | Bug fixes:
165 | 
166 | * `#14 <https://github.com/SimonSapin/cssselect/issues/14>`_
167 |   Fix non-ASCII pseudo-classes. (Invalid selector instead of crash.)
168 | * `#20 <https://github.com/SimonSapin/cssselect/issues/20>`_
169 |   As per the spec, elements containing only whitespace are not considered empty
170 |   for the ``:empty`` pseudo-class.
171 | 
172 | 
173 | Version 0.7.1
174 | -------------
175 | 
176 | Released on 2012-06-14. Code name *remember-to-test-with-tox*.
177 | 
178 | 0.7 broke the parser in Python 2.4 and 2.5; the tests in 2.x.
179 | Now all is well again.
180 | 
181 | Also, pseudo-elements are now correctly made lower-case. (They are supposed
182 | to be case-insensitive.)
183 | 
184 | 
185 | Version 0.7
186 | -----------
187 | 
188 | Released on 2012-06-14.
189 | 
190 | Bug fix release: see #2, #7 and #10 on GitHub.
191 | 
192 | * The tokenizer and parser have been rewritten to be much closer to the
193 |   specified grammar. In particular, non-ASCII characters and backslash-escapes
194 |   are now handled correctly.
195 | * Special characters are protected in the output so that generated XPath
196 |   exrpessions should always be valid
197 | * The ``~=``, ``^=`` and ``*=`` attribute operators now correctly never match
198 |   when used with an empty string.
199 | 
200 | 
201 | Version 0.6.1
202 | -------------
203 | 
204 | Released on 2012-04-25.
205 | 
206 | Make sure that internal token objects do not "leak" into the public API and
207 | :attr:`Selector.pseudo_element` is an unicode string.
208 | 
209 | 
210 | Version 0.6
211 | -----------
212 | 
213 | Released on 2012-04-24.
214 | 
215 | * In ``setup.py`` use setuptools/distribute if available, but fall back
216 |   on distutils.
217 | * Implement the ``:lang()`` pseudo-class, although it is only based on
218 |   ``xml:lang`` or ``lang`` attributes. If the document language is known from
219 |   some other meta-data (like a ``Content-Language`` HTTP header or ``<meta>``
220 |   element), a workaround is to set a lang attribute on the root element.
221 | 
222 | 
223 | Version 0.5
224 | -----------
225 | 
226 | Released on 2012-04-20.
227 | 
228 | * Fix case sensitivity issues.
229 | * Implement :class:`HTMLTranslator` based on the `HTML5 specification`_
230 |   rather than guessing; add the ``xhtml`` parameter.
231 | * Several bug fixes and better test coverage.
232 | 
233 | .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
234 | 
235 | 
236 | Version 0.4
237 | -----------
238 | 
239 | Released on 2012-04-18.
240 | 
241 | * Add proper support for pseudo-elements
242 | * Add specificity calculation
243 | * Expose the :func:`parse` function and the parsed :class:`Selector` objects
244 |   in the API.
245 | * Add the :meth:`~GenericTranslator.selector_to_xpath` method.
246 | 
247 | 
248 | Version 0.3
249 | -----------
250 | 
251 | Released on 2012-04-17.
252 | 
253 | * Fix many parsing bugs.
254 | * Rename the ``Translator`` class to :class:`GenericTranslator`
255 | * There, implement ``:target``, ``:hover``, ``:focus``, ``:active``
256 |   ``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
257 |   as never matching.
258 | * Make a new HTML-specific ``HTMLTranslator`` subclass. There, implement
259 |   ``:checked``, ``:enabled``, ``:disabled``, ``:link`` and ``:visited``
260 |   as appropriate for HTML, with all links  "not visited".
261 | * Remove the ``css_to_xpath`` function. The translator classes
262 |   are the new API.
263 | * Add support for ``:contains()`` back, but case-sensitive. lxml will
264 |   override it to be case-insensitive for backward-compatibility.
265 | 
266 | Discussion is open if anyone is interested in implementing eg. ``:target``
267 | or ``:visited`` differently, but they can always do it in a ``Translator``
268 | subclass.
269 | 
270 | 
271 | Version 0.2
272 | -----------
273 | 
274 | Released on 2012-04-16.
275 | 
276 | * Remove the ``CSSSelector`` class. (The ``css_to_xpath()`` function is now
277 |   the main API.)
278 | * Remove support for the ``:contains()`` pseudo-class.
279 | 
280 | These changes allow cssselect to be used without lxml. (Hey, this was
281 | the whole point of this project.) The tests still require lxml, though.
282 | The removed parts are expected to stay in lxml for backward-compatibility.
283 | 
284 | ``:contains()`` only existed in an `early draft
285 | <http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors>`_
286 | of the Selectors specification, and was removed before Level 3 stabilized.
287 | Internally, it used a custom XPath extension function which can be
288 | difficult to express outside of lxml.
289 | 
290 | 
291 | * Separate the XPath translation from the parsed objects into a new
292 |   ``Translator`` class.
293 | 
294 | Subclasses of ``Translator`` can be made to change the way that some selector
295 | (eg. a pseudo-class) is implemented.
296 | 
297 | 
298 | Version 0.1
299 | -----------
300 | 
301 | Released on 2012-04-13.
302 | 
303 | Extract lxml.cssselect from the rest of lxml and make it a stand-alone project.
304 | 
305 | Commit ``ea53ceaf7e44ba4fbb5c818ae31370932f47774e`` was taken on 2012-04-11
306 | from the 'master' branch of lxml’s git repository. This is somewhere
307 | between versions 2.3.4 and 2.4.
308 | 
309 | The commit history has been rewritten to:
310 | 
311 | * Remove lxml files unrelated to cssselect
312 | * Import the early history from the 'html' branch in the old SVN repository
313 | * Fix author names in commits from SVN
314 | 
315 | This project has its own import name, tests and documentation. But the
316 | code itself is unchanged and still depends on lxml.
317 | 
318 | 
319 | Earlier history
320 | ---------------
321 | 
322 | Search for *cssselect* in `lxml’s changelog
323 | <https://github.com/lxml/lxml/blob/master/CHANGES.txt>`_
324 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2007-2012 Ian Bicking and contributors. See AUTHORS
 2 | for more details.
 3 | 
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright
11 | notice, this list of conditions and the following disclaimer.
12 | 
13 | 2. Redistributions in binary form must reproduce the above copyright
14 | notice, this list of conditions and the following disclaimer in
15 | the documentation and/or other materials provided with the
16 | distribution.
17 | 
18 | 3. Neither the name of Ian Bicking nor the names of its contributors may
19 | be used to endorse or promote products derived from this software
20 | without specific prior written permission.
21 | 
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IAN BICKING OR
26 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | ===================================
 3 | cssselect: CSS Selectors for Python
 4 | ===================================
 5 | 
 6 | .. image:: https://img.shields.io/pypi/v/cssselect.svg
 7 |    :target: https://pypi.python.org/pypi/cssselect
 8 |    :alt: PyPI Version
 9 | 
10 | .. image:: https://img.shields.io/pypi/pyversions/cssselect.svg
11 |    :target: https://pypi.python.org/pypi/cssselect
12 |    :alt: Supported Python Versions
13 | 
14 | .. image:: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml/badge.svg
15 |    :target: https://github.com/scrapy/cssselect/actions/workflows/tests-ubuntu.yml
16 |    :alt: Tests
17 | 
18 | .. image:: https://img.shields.io/codecov/c/github/scrapy/cssselect/master.svg
19 |    :target: https://codecov.io/github/scrapy/cssselect?branch=master
20 |    :alt: Coverage report
21 | 
22 | **cssselect** is a BSD-licensed Python library to parse `CSS3 selectors`_ and
23 | translate them to `XPath 1.0`_ expressions.
24 | 
25 | `XPath 1.0`_ expressions can be used in lxml_ or another XPath engine to find
26 | the matching elements in an XML or HTML document.
27 | 
28 | Find the cssselect online documentation at https://cssselect.readthedocs.io.
29 | 
30 | Quick facts:
31 | 
32 | * Source, issues and pull requests `on GitHub
33 |   <https://github.com/scrapy/cssselect>`_
34 | * Releases `on PyPI <https://pypi.org/project/cssselect/>`_
35 | * Install with ``pip install cssselect``
36 | 
37 | 
38 | .. _CSS3 selectors: https://www.w3.org/TR/selectors-3/
39 | .. _XPath 1.0: https://www.w3.org/TR/xpath/all/
40 | .. _lxml: https://lxml.de/
41 | 


--------------------------------------------------------------------------------
/cssselect/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | CSS Selectors based on XPath
 3 | ============================
 4 | 
 5 | This module supports selecting XML/HTML elements based on CSS selectors.
 6 | See the `CSSSelector` class for details.
 7 | 
 8 | 
 9 | :copyright: (c) 2007-2012 Ian Bicking and contributors.
10 | See AUTHORS for more details.
11 | :license: BSD, see LICENSE for more details.
12 | 
13 | """
14 | 
15 | from cssselect.parser import (
16 |     FunctionalPseudoElement,
17 |     Selector,
18 |     SelectorError,
19 |     SelectorSyntaxError,
20 |     parse,
21 | )
22 | from cssselect.xpath import ExpressionError, GenericTranslator, HTMLTranslator
23 | 
24 | __all__ = (
25 |     "ExpressionError",
26 |     "FunctionalPseudoElement",
27 |     "GenericTranslator",
28 |     "HTMLTranslator",
29 |     "Selector",
30 |     "SelectorError",
31 |     "SelectorSyntaxError",
32 |     "parse",
33 | )
34 | 
35 | VERSION = "1.3.0"
36 | __version__ = VERSION
37 | 


--------------------------------------------------------------------------------
/cssselect/parser.py:
--------------------------------------------------------------------------------
   1 | """
   2 | cssselect.parser
   3 | ================
   4 | 
   5 | Tokenizer, parser and parsed objects for CSS selectors.
   6 | 
   7 | 
   8 | :copyright: (c) 2007-2012 Ian Bicking and contributors.
   9 | See AUTHORS for more details.
  10 | :license: BSD, see LICENSE for more details.
  11 | 
  12 | """
  13 | 
  14 | from __future__ import annotations
  15 | 
  16 | import operator
  17 | import re
  18 | import sys
  19 | from typing import TYPE_CHECKING, Literal, Optional, Protocol, Union, cast, overload
  20 | 
  21 | if TYPE_CHECKING:
  22 |     from collections.abc import Iterable, Iterator, Sequence
  23 | 
  24 |     # typing.Self requires Python 3.11
  25 |     from typing_extensions import Self
  26 | 
  27 | 
  28 | def ascii_lower(string: str) -> str:
  29 |     """Lower-case, but only in the ASCII range."""
  30 |     return string.encode("utf8").lower().decode("utf8")
  31 | 
  32 | 
  33 | class SelectorError(Exception):
  34 |     """Common parent for :class:`SelectorSyntaxError` and
  35 |     :class:`ExpressionError`.
  36 | 
  37 |     You can just use ``except SelectorError:`` when calling
  38 |     :meth:`~GenericTranslator.css_to_xpath` and handle both exceptions types.
  39 | 
  40 |     """
  41 | 
  42 | 
  43 | class SelectorSyntaxError(SelectorError, SyntaxError):
  44 |     """Parsing a selector that does not match the grammar."""
  45 | 
  46 | 
  47 | #### Parsed objects
  48 | 
  49 | Tree = Union[
  50 |     "Element",
  51 |     "Hash",
  52 |     "Class",
  53 |     "Function",
  54 |     "Pseudo",
  55 |     "Attrib",
  56 |     "Negation",
  57 |     "Relation",
  58 |     "Matching",
  59 |     "SpecificityAdjustment",
  60 |     "CombinedSelector",
  61 | ]
  62 | PseudoElement = Union["FunctionalPseudoElement", str]
  63 | 
  64 | 
  65 | class Selector:
  66 |     """
  67 |     Represents a parsed selector.
  68 | 
  69 |     :meth:`~GenericTranslator.selector_to_xpath` accepts this object,
  70 |     but ignores :attr:`pseudo_element`. It is the user’s responsibility
  71 |     to account for pseudo-elements and reject selectors with unknown
  72 |     or unsupported pseudo-elements.
  73 | 
  74 |     """
  75 | 
  76 |     def __init__(self, tree: Tree, pseudo_element: PseudoElement | None = None) -> None:
  77 |         self.parsed_tree = tree
  78 |         if pseudo_element is not None and not isinstance(
  79 |             pseudo_element, FunctionalPseudoElement
  80 |         ):
  81 |             pseudo_element = ascii_lower(pseudo_element)
  82 |         #: A :class:`FunctionalPseudoElement`,
  83 |         #: or the identifier for the pseudo-element as a string,
  84 |         #  or ``None``.
  85 |         #:
  86 |         #: +-------------------------+----------------+--------------------------------+
  87 |         #: |                         | Selector       | Pseudo-element                 |
  88 |         #: +=========================+================+================================+
  89 |         #: | CSS3 syntax             | ``a::before``  | ``'before'``                   |
  90 |         #: +-------------------------+----------------+--------------------------------+
  91 |         #: | Older syntax            | ``a:before``   | ``'before'``                   |
  92 |         #: +-------------------------+----------------+--------------------------------+
  93 |         #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'``                   |
  94 |         #: | not in Selectors3       |                |                                |
  95 |         #: +-------------------------+----------------+--------------------------------+
  96 |         #: | Invalid pseudo-class    | ``li:marker``  | ``None``                       |
  97 |         #: +-------------------------+----------------+--------------------------------+
  98 |         #: | Functional              | ``a::foo(2)``  | ``FunctionalPseudoElement(…)`` |
  99 |         #: +-------------------------+----------------+--------------------------------+
 100 |         #:
 101 |         #: .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
 102 |         self.pseudo_element = pseudo_element
 103 | 
 104 |     def __repr__(self) -> str:
 105 |         if isinstance(self.pseudo_element, FunctionalPseudoElement):
 106 |             pseudo_element = repr(self.pseudo_element)
 107 |         elif self.pseudo_element:
 108 |             pseudo_element = f"::{self.pseudo_element}"
 109 |         else:
 110 |             pseudo_element = ""
 111 |         return f"{self.__class__.__name__}[{self.parsed_tree!r}{pseudo_element}]"
 112 | 
 113 |     def canonical(self) -> str:
 114 |         """Return a CSS representation for this selector (a string)"""
 115 |         if isinstance(self.pseudo_element, FunctionalPseudoElement):
 116 |             pseudo_element = f"::{self.pseudo_element.canonical()}"
 117 |         elif self.pseudo_element:
 118 |             pseudo_element = f"::{self.pseudo_element}"
 119 |         else:
 120 |             pseudo_element = ""
 121 |         res = f"{self.parsed_tree.canonical()}{pseudo_element}"
 122 |         if len(res) > 1:
 123 |             res = res.lstrip("*")
 124 |         return res
 125 | 
 126 |     def specificity(self) -> tuple[int, int, int]:
 127 |         """Return the specificity_ of this selector as a tuple of 3 integers.
 128 | 
 129 |         .. _specificity: http://www.w3.org/TR/selectors/#specificity
 130 | 
 131 |         """
 132 |         a, b, c = self.parsed_tree.specificity()
 133 |         if self.pseudo_element:
 134 |             c += 1
 135 |         return a, b, c
 136 | 
 137 | 
 138 | class Class:
 139 |     """
 140 |     Represents selector.class_name
 141 |     """
 142 | 
 143 |     def __init__(self, selector: Tree, class_name: str) -> None:
 144 |         self.selector = selector
 145 |         self.class_name = class_name
 146 | 
 147 |     def __repr__(self) -> str:
 148 |         return f"{self.__class__.__name__}[{self.selector!r}.{self.class_name}]"
 149 | 
 150 |     def canonical(self) -> str:
 151 |         return f"{self.selector.canonical()}.{self.class_name}"
 152 | 
 153 |     def specificity(self) -> tuple[int, int, int]:
 154 |         a, b, c = self.selector.specificity()
 155 |         b += 1
 156 |         return a, b, c
 157 | 
 158 | 
 159 | class FunctionalPseudoElement:
 160 |     """
 161 |     Represents selector::name(arguments)
 162 | 
 163 |     .. attribute:: name
 164 | 
 165 |         The name (identifier) of the pseudo-element, as a string.
 166 | 
 167 |     .. attribute:: arguments
 168 | 
 169 |         The arguments of the pseudo-element, as a list of tokens.
 170 | 
 171 |         **Note:** tokens are not part of the public API,
 172 |         and may change between cssselect versions.
 173 |         Use at your own risks.
 174 | 
 175 |     """
 176 | 
 177 |     def __init__(self, name: str, arguments: Sequence[Token]):
 178 |         self.name = ascii_lower(name)
 179 |         self.arguments = arguments
 180 | 
 181 |     def __repr__(self) -> str:
 182 |         token_values = [token.value for token in self.arguments]
 183 |         return f"{self.__class__.__name__}[::{self.name}({token_values!r})]"
 184 | 
 185 |     def argument_types(self) -> list[str]:
 186 |         return [token.type for token in self.arguments]
 187 | 
 188 |     def canonical(self) -> str:
 189 |         args = "".join(token.css() for token in self.arguments)
 190 |         return f"{self.name}({args})"
 191 | 
 192 | 
 193 | class Function:
 194 |     """
 195 |     Represents selector:name(expr)
 196 |     """
 197 | 
 198 |     def __init__(self, selector: Tree, name: str, arguments: Sequence[Token]) -> None:
 199 |         self.selector = selector
 200 |         self.name = ascii_lower(name)
 201 |         self.arguments = arguments
 202 | 
 203 |     def __repr__(self) -> str:
 204 |         token_values = [token.value for token in self.arguments]
 205 |         return f"{self.__class__.__name__}[{self.selector!r}:{self.name}({token_values!r})]"
 206 | 
 207 |     def argument_types(self) -> list[str]:
 208 |         return [token.type for token in self.arguments]
 209 | 
 210 |     def canonical(self) -> str:
 211 |         args = "".join(token.css() for token in self.arguments)
 212 |         return f"{self.selector.canonical()}:{self.name}({args})"
 213 | 
 214 |     def specificity(self) -> tuple[int, int, int]:
 215 |         a, b, c = self.selector.specificity()
 216 |         b += 1
 217 |         return a, b, c
 218 | 
 219 | 
 220 | class Pseudo:
 221 |     """
 222 |     Represents selector:ident
 223 |     """
 224 | 
 225 |     def __init__(self, selector: Tree, ident: str) -> None:
 226 |         self.selector = selector
 227 |         self.ident = ascii_lower(ident)
 228 | 
 229 |     def __repr__(self) -> str:
 230 |         return f"{self.__class__.__name__}[{self.selector!r}:{self.ident}]"
 231 | 
 232 |     def canonical(self) -> str:
 233 |         return f"{self.selector.canonical()}:{self.ident}"
 234 | 
 235 |     def specificity(self) -> tuple[int, int, int]:
 236 |         a, b, c = self.selector.specificity()
 237 |         b += 1
 238 |         return a, b, c
 239 | 
 240 | 
 241 | class Negation:
 242 |     """
 243 |     Represents selector:not(subselector)
 244 |     """
 245 | 
 246 |     def __init__(self, selector: Tree, subselector: Tree) -> None:
 247 |         self.selector = selector
 248 |         self.subselector = subselector
 249 | 
 250 |     def __repr__(self) -> str:
 251 |         return f"{self.__class__.__name__}[{self.selector!r}:not({self.subselector!r})]"
 252 | 
 253 |     def canonical(self) -> str:
 254 |         subsel = self.subselector.canonical()
 255 |         if len(subsel) > 1:
 256 |             subsel = subsel.lstrip("*")
 257 |         return f"{self.selector.canonical()}:not({subsel})"
 258 | 
 259 |     def specificity(self) -> tuple[int, int, int]:
 260 |         a1, b1, c1 = self.selector.specificity()
 261 |         a2, b2, c2 = self.subselector.specificity()
 262 |         return a1 + a2, b1 + b2, c1 + c2
 263 | 
 264 | 
 265 | class Relation:
 266 |     """
 267 |     Represents selector:has(subselector)
 268 |     """
 269 | 
 270 |     def __init__(self, selector: Tree, combinator: Token, subselector: Selector):
 271 |         self.selector = selector
 272 |         self.combinator = combinator
 273 |         self.subselector = subselector
 274 | 
 275 |     def __repr__(self) -> str:
 276 |         return f"{self.__class__.__name__}[{self.selector!r}:has({self.subselector!r})]"
 277 | 
 278 |     def canonical(self) -> str:
 279 |         try:
 280 |             subsel = self.subselector[0].canonical()  # type: ignore[index]
 281 |         except TypeError:
 282 |             subsel = self.subselector.canonical()
 283 |         if len(subsel) > 1:
 284 |             subsel = subsel.lstrip("*")
 285 |         return f"{self.selector.canonical()}:has({subsel})"
 286 | 
 287 |     def specificity(self) -> tuple[int, int, int]:
 288 |         a1, b1, c1 = self.selector.specificity()
 289 |         try:
 290 |             a2, b2, c2 = self.subselector[-1].specificity()  # type: ignore[index]
 291 |         except TypeError:
 292 |             a2, b2, c2 = self.subselector.specificity()
 293 |         return a1 + a2, b1 + b2, c1 + c2
 294 | 
 295 | 
 296 | class Matching:
 297 |     """
 298 |     Represents selector:is(selector_list)
 299 |     """
 300 | 
 301 |     def __init__(self, selector: Tree, selector_list: Iterable[Tree]):
 302 |         self.selector = selector
 303 |         self.selector_list = selector_list
 304 | 
 305 |     def __repr__(self) -> str:
 306 |         args_str = ", ".join(repr(s) for s in self.selector_list)
 307 |         return f"{self.__class__.__name__}[{self.selector!r}:is({args_str})]"
 308 | 
 309 |     def canonical(self) -> str:
 310 |         selector_arguments = []
 311 |         for s in self.selector_list:
 312 |             selarg = s.canonical()
 313 |             selector_arguments.append(selarg.lstrip("*"))
 314 |         args_str = ", ".join(str(s) for s in selector_arguments)
 315 |         return f"{self.selector.canonical()}:is({args_str})"
 316 | 
 317 |     def specificity(self) -> tuple[int, int, int]:
 318 |         return max(x.specificity() for x in self.selector_list)
 319 | 
 320 | 
 321 | class SpecificityAdjustment:
 322 |     """
 323 |     Represents selector:where(selector_list)
 324 |     Same as selector:is(selector_list), but its specificity is always 0
 325 |     """
 326 | 
 327 |     def __init__(self, selector: Tree, selector_list: list[Tree]):
 328 |         self.selector = selector
 329 |         self.selector_list = selector_list
 330 | 
 331 |     def __repr__(self) -> str:
 332 |         args_str = ", ".join(repr(s) for s in self.selector_list)
 333 |         return f"{self.__class__.__name__}[{self.selector!r}:where({args_str})]"
 334 | 
 335 |     def canonical(self) -> str:
 336 |         selector_arguments = []
 337 |         for s in self.selector_list:
 338 |             selarg = s.canonical()
 339 |             selector_arguments.append(selarg.lstrip("*"))
 340 |         args_str = ", ".join(str(s) for s in selector_arguments)
 341 |         return f"{self.selector.canonical()}:where({args_str})"
 342 | 
 343 |     def specificity(self) -> tuple[int, int, int]:
 344 |         return 0, 0, 0
 345 | 
 346 | 
 347 | class Attrib:
 348 |     """
 349 |     Represents selector[namespace|attrib operator value]
 350 |     """
 351 | 
 352 |     @overload
 353 |     def __init__(
 354 |         self,
 355 |         selector: Tree,
 356 |         namespace: str | None,
 357 |         attrib: str,
 358 |         operator: Literal["exists"],
 359 |         value: None,
 360 |     ) -> None: ...
 361 | 
 362 |     @overload
 363 |     def __init__(
 364 |         self,
 365 |         selector: Tree,
 366 |         namespace: str | None,
 367 |         attrib: str,
 368 |         operator: str,
 369 |         value: Token,
 370 |     ) -> None: ...
 371 | 
 372 |     def __init__(
 373 |         self,
 374 |         selector: Tree,
 375 |         namespace: str | None,
 376 |         attrib: str,
 377 |         operator: str,
 378 |         value: Token | None,
 379 |     ) -> None:
 380 |         self.selector = selector
 381 |         self.namespace = namespace
 382 |         self.attrib = attrib
 383 |         self.operator = operator
 384 |         self.value = value
 385 | 
 386 |     def __repr__(self) -> str:
 387 |         attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib
 388 |         if self.operator == "exists":
 389 |             return f"{self.__class__.__name__}[{self.selector!r}[{attrib}]]"
 390 |         assert self.value is not None
 391 |         return f"{self.__class__.__name__}[{self.selector!r}[{attrib} {self.operator} {self.value.value!r}]]"
 392 | 
 393 |     def canonical(self) -> str:
 394 |         attrib = f"{self.namespace}|{self.attrib}" if self.namespace else self.attrib
 395 | 
 396 |         if self.operator == "exists":
 397 |             op = attrib
 398 |         else:
 399 |             assert self.value is not None
 400 |             op = f"{attrib}{self.operator}{self.value.css()}"
 401 | 
 402 |         return f"{self.selector.canonical()}[{op}]"
 403 | 
 404 |     def specificity(self) -> tuple[int, int, int]:
 405 |         a, b, c = self.selector.specificity()
 406 |         b += 1
 407 |         return a, b, c
 408 | 
 409 | 
 410 | class Element:
 411 |     """
 412 |     Represents namespace|element
 413 | 
 414 |     `None` is for the universal selector '*'
 415 | 
 416 |     """
 417 | 
 418 |     def __init__(
 419 |         self, namespace: str | None = None, element: str | None = None
 420 |     ) -> None:
 421 |         self.namespace = namespace
 422 |         self.element = element
 423 | 
 424 |     def __repr__(self) -> str:
 425 |         return f"{self.__class__.__name__}[{self.canonical()}]"
 426 | 
 427 |     def canonical(self) -> str:
 428 |         element = self.element or "*"
 429 |         if self.namespace:
 430 |             element = f"{self.namespace}|{element}"
 431 |         return element
 432 | 
 433 |     def specificity(self) -> tuple[int, int, int]:
 434 |         if self.element:
 435 |             return 0, 0, 1
 436 |         return 0, 0, 0
 437 | 
 438 | 
 439 | class Hash:
 440 |     """
 441 |     Represents selector#id
 442 |     """
 443 | 
 444 |     def __init__(self, selector: Tree, id: str) -> None:
 445 |         self.selector = selector
 446 |         self.id = id
 447 | 
 448 |     def __repr__(self) -> str:
 449 |         return f"{self.__class__.__name__}[{self.selector!r}#{self.id}]"
 450 | 
 451 |     def canonical(self) -> str:
 452 |         return f"{self.selector.canonical()}#{self.id}"
 453 | 
 454 |     def specificity(self) -> tuple[int, int, int]:
 455 |         a, b, c = self.selector.specificity()
 456 |         a += 1
 457 |         return a, b, c
 458 | 
 459 | 
 460 | class CombinedSelector:
 461 |     def __init__(self, selector: Tree, combinator: str, subselector: Tree) -> None:
 462 |         assert selector is not None
 463 |         self.selector = selector
 464 |         self.combinator = combinator
 465 |         self.subselector = subselector
 466 | 
 467 |     def __repr__(self) -> str:
 468 |         comb = "<followed>" if self.combinator == " " else self.combinator
 469 |         return (
 470 |             f"{self.__class__.__name__}[{self.selector!r} {comb} {self.subselector!r}]"
 471 |         )
 472 | 
 473 |     def canonical(self) -> str:
 474 |         subsel = self.subselector.canonical()
 475 |         if len(subsel) > 1:
 476 |             subsel = subsel.lstrip("*")
 477 |         return f"{self.selector.canonical()} {self.combinator} {subsel}"
 478 | 
 479 |     def specificity(self) -> tuple[int, int, int]:
 480 |         a1, b1, c1 = self.selector.specificity()
 481 |         a2, b2, c2 = self.subselector.specificity()
 482 |         return a1 + a2, b1 + b2, c1 + c2
 483 | 
 484 | 
 485 | #### Parser
 486 | 
 487 | # foo
 488 | _el_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$")
 489 | 
 490 | # foo#bar or #bar
 491 | _id_re = re.compile(r"^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$")
 492 | 
 493 | # foo.bar or .bar
 494 | _class_re = re.compile(
 495 |     r"^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$"
 496 | )
 497 | 
 498 | 
 499 | def parse(css: str) -> list[Selector]:
 500 |     """Parse a CSS *group of selectors*.
 501 | 
 502 |     If you don't care about pseudo-elements or selector specificity,
 503 |     you can skip this and use :meth:`~GenericTranslator.css_to_xpath`.
 504 | 
 505 |     :param css:
 506 |         A *group of selectors* as a string.
 507 |     :raises:
 508 |         :class:`SelectorSyntaxError` on invalid selectors.
 509 |     :returns:
 510 |         A list of parsed :class:`Selector` objects, one for each
 511 |         selector in the comma-separated group.
 512 | 
 513 |     """
 514 |     # Fast path for simple cases
 515 |     match = _el_re.match(css)
 516 |     if match:
 517 |         return [Selector(Element(element=match.group(1)))]
 518 |     match = _id_re.match(css)
 519 |     if match is not None:
 520 |         return [Selector(Hash(Element(element=match.group(1) or None), match.group(2)))]
 521 |     match = _class_re.match(css)
 522 |     if match is not None:
 523 |         return [
 524 |             Selector(Class(Element(element=match.group(1) or None), match.group(2)))
 525 |         ]
 526 | 
 527 |     stream = TokenStream(tokenize(css))
 528 |     stream.source = css
 529 |     return list(parse_selector_group(stream))
 530 | 
 531 | 
 532 | #    except SelectorSyntaxError:
 533 | #        e = sys.exc_info()[1]
 534 | #        message = "%s at %s -> %r" % (
 535 | #            e, stream.used, stream.peek())
 536 | #        e.msg = message
 537 | #        e.args = tuple([message])
 538 | #        raise
 539 | 
 540 | 
 541 | def parse_selector_group(stream: TokenStream) -> Iterator[Selector]:
 542 |     stream.skip_whitespace()
 543 |     while 1:
 544 |         yield Selector(*parse_selector(stream))
 545 |         if stream.peek() == ("DELIM", ","):
 546 |             stream.next()
 547 |             stream.skip_whitespace()
 548 |         else:
 549 |             break
 550 | 
 551 | 
 552 | def parse_selector(stream: TokenStream) -> tuple[Tree, PseudoElement | None]:
 553 |     result, pseudo_element = parse_simple_selector(stream)
 554 |     while 1:
 555 |         stream.skip_whitespace()
 556 |         peek = stream.peek()
 557 |         if peek in (("EOF", None), ("DELIM", ",")):
 558 |             break
 559 |         if pseudo_element:
 560 |             raise SelectorSyntaxError(
 561 |                 f"Got pseudo-element ::{pseudo_element} not at the end of a selector"
 562 |             )
 563 |         if peek.is_delim("+", ">", "~"):
 564 |             # A combinator
 565 |             combinator = cast("str", stream.next().value)
 566 |             stream.skip_whitespace()
 567 |         else:
 568 |             # By exclusion, the last parse_simple_selector() ended
 569 |             # at peek == ' '
 570 |             combinator = " "
 571 |         next_selector, pseudo_element = parse_simple_selector(stream)
 572 |         result = CombinedSelector(result, combinator, next_selector)
 573 |     return result, pseudo_element
 574 | 
 575 | 
 576 | def parse_simple_selector(
 577 |     stream: TokenStream, inside_negation: bool = False
 578 | ) -> tuple[Tree, PseudoElement | None]:
 579 |     stream.skip_whitespace()
 580 |     selector_start = len(stream.used)
 581 |     peek = stream.peek()
 582 |     if peek.type == "IDENT" or peek == ("DELIM", "*"):
 583 |         if peek.type == "IDENT":
 584 |             namespace = stream.next().value
 585 |         else:
 586 |             stream.next()
 587 |             namespace = None
 588 |         if stream.peek() == ("DELIM", "|"):
 589 |             stream.next()
 590 |             element = stream.next_ident_or_star()
 591 |         else:
 592 |             element = namespace
 593 |             namespace = None
 594 |     else:
 595 |         element = namespace = None
 596 |     result: Tree = Element(namespace, element)
 597 |     pseudo_element: PseudoElement | None = None
 598 |     while 1:
 599 |         peek = stream.peek()
 600 |         if (
 601 |             peek.type in ("S", "EOF")
 602 |             or peek.is_delim(",", "+", ">", "~")
 603 |             or (inside_negation and peek == ("DELIM", ")"))
 604 |         ):
 605 |             break
 606 |         if pseudo_element:
 607 |             raise SelectorSyntaxError(
 608 |                 f"Got pseudo-element ::{pseudo_element} not at the end of a selector"
 609 |             )
 610 |         if peek.type == "HASH":
 611 |             result = Hash(result, cast("str", stream.next().value))
 612 |         elif peek == ("DELIM", "."):
 613 |             stream.next()
 614 |             result = Class(result, stream.next_ident())
 615 |         elif peek == ("DELIM", "|"):
 616 |             stream.next()
 617 |             result = Element(None, stream.next_ident())
 618 |         elif peek == ("DELIM", "["):
 619 |             stream.next()
 620 |             result = parse_attrib(result, stream)
 621 |         elif peek == ("DELIM", ":"):
 622 |             stream.next()
 623 |             if stream.peek() == ("DELIM", ":"):
 624 |                 stream.next()
 625 |                 pseudo_element = stream.next_ident()
 626 |                 if stream.peek() == ("DELIM", "("):
 627 |                     stream.next()
 628 |                     pseudo_element = FunctionalPseudoElement(
 629 |                         pseudo_element, parse_arguments(stream)
 630 |                     )
 631 |                 continue
 632 |             ident = stream.next_ident()
 633 |             if ident.lower() in ("first-line", "first-letter", "before", "after"):
 634 |                 # Special case: CSS 2.1 pseudo-elements can have a single ':'
 635 |                 # Any new pseudo-element must have two.
 636 |                 pseudo_element = str(ident)
 637 |                 continue
 638 |             if stream.peek() != ("DELIM", "("):
 639 |                 result = Pseudo(result, ident)
 640 |                 if repr(result) == "Pseudo[Element[*]:scope]" and not (
 641 |                     len(stream.used) == 2
 642 |                     or (len(stream.used) == 3 and stream.used[0].type == "S")
 643 |                     or (len(stream.used) >= 3 and stream.used[-3].is_delim(","))
 644 |                     or (
 645 |                         len(stream.used) >= 4
 646 |                         and stream.used[-3].type == "S"
 647 |                         and stream.used[-4].is_delim(",")
 648 |                     )
 649 |                 ):
 650 |                     raise SelectorSyntaxError(
 651 |                         'Got immediate child pseudo-element ":scope" '
 652 |                         "not at the start of a selector"
 653 |                     )
 654 |                 continue
 655 |             stream.next()
 656 |             stream.skip_whitespace()
 657 |             if ident.lower() == "not":
 658 |                 if inside_negation:
 659 |                     raise SelectorSyntaxError("Got nested :not()")
 660 |                 argument, argument_pseudo_element = parse_simple_selector(
 661 |                     stream, inside_negation=True
 662 |                 )
 663 |                 next = stream.next()
 664 |                 if argument_pseudo_element:
 665 |                     raise SelectorSyntaxError(
 666 |                         f"Got pseudo-element ::{argument_pseudo_element} inside :not() at {next.pos}"
 667 |                     )
 668 |                 if next != ("DELIM", ")"):
 669 |                     raise SelectorSyntaxError(f"Expected ')', got {next}")
 670 |                 result = Negation(result, argument)
 671 |             elif ident.lower() == "has":
 672 |                 combinator, arguments = parse_relative_selector(stream)
 673 |                 result = Relation(result, combinator, arguments)
 674 | 
 675 |             elif ident.lower() in ("matches", "is"):
 676 |                 selectors = parse_simple_selector_arguments(stream)
 677 |                 result = Matching(result, selectors)
 678 |             elif ident.lower() == "where":
 679 |                 selectors = parse_simple_selector_arguments(stream)
 680 |                 result = SpecificityAdjustment(result, selectors)
 681 |             else:
 682 |                 result = Function(result, ident, parse_arguments(stream))
 683 |         else:
 684 |             raise SelectorSyntaxError(f"Expected selector, got {peek}")
 685 |     if len(stream.used) == selector_start:
 686 |         raise SelectorSyntaxError(f"Expected selector, got {stream.peek()}")
 687 |     return result, pseudo_element
 688 | 
 689 | 
 690 | def parse_arguments(stream: TokenStream) -> list[Token]:
 691 |     arguments: list[Token] = []
 692 |     while 1:  # noqa: RET503
 693 |         stream.skip_whitespace()
 694 |         next = stream.next()
 695 |         if next.type in ("IDENT", "STRING", "NUMBER") or next in [
 696 |             ("DELIM", "+"),
 697 |             ("DELIM", "-"),
 698 |         ]:
 699 |             arguments.append(next)
 700 |         elif next == ("DELIM", ")"):
 701 |             return arguments
 702 |         else:
 703 |             raise SelectorSyntaxError(f"Expected an argument, got {next}")
 704 | 
 705 | 
 706 | def parse_relative_selector(stream: TokenStream) -> tuple[Token, Selector]:
 707 |     stream.skip_whitespace()
 708 |     subselector = ""
 709 |     next = stream.next()
 710 | 
 711 |     if next in [("DELIM", "+"), ("DELIM", "-"), ("DELIM", ">"), ("DELIM", "~")]:
 712 |         combinator = next
 713 |         stream.skip_whitespace()
 714 |         next = stream.next()
 715 |     else:
 716 |         combinator = Token("DELIM", " ", pos=0)
 717 | 
 718 |     while 1:  # noqa: RET503
 719 |         if next.type in ("IDENT", "STRING", "NUMBER") or next in [
 720 |             ("DELIM", "."),
 721 |             ("DELIM", "*"),
 722 |         ]:
 723 |             subselector += cast("str", next.value)
 724 |         elif next == ("DELIM", ")"):
 725 |             result = parse(subselector)
 726 |             return combinator, result[0]
 727 |         else:
 728 |             raise SelectorSyntaxError(f"Expected an argument, got {next}")
 729 |         next = stream.next()
 730 | 
 731 | 
 732 | def parse_simple_selector_arguments(stream: TokenStream) -> list[Tree]:
 733 |     arguments = []
 734 |     while 1:
 735 |         result, pseudo_element = parse_simple_selector(stream, True)
 736 |         if pseudo_element:
 737 |             raise SelectorSyntaxError(
 738 |                 f"Got pseudo-element ::{pseudo_element} inside function"
 739 |             )
 740 |         stream.skip_whitespace()
 741 |         next = stream.next()
 742 |         if next in (("EOF", None), ("DELIM", ",")):
 743 |             stream.next()
 744 |             stream.skip_whitespace()
 745 |             arguments.append(result)
 746 |         elif next == ("DELIM", ")"):
 747 |             arguments.append(result)
 748 |             break
 749 |         else:
 750 |             raise SelectorSyntaxError(f"Expected an argument, got {next}")
 751 |     return arguments
 752 | 
 753 | 
 754 | def parse_attrib(selector: Tree, stream: TokenStream) -> Attrib:
 755 |     stream.skip_whitespace()
 756 |     attrib = stream.next_ident_or_star()
 757 |     if attrib is None and stream.peek() != ("DELIM", "|"):
 758 |         raise SelectorSyntaxError(f"Expected '|', got {stream.peek()}")
 759 |     namespace: str | None
 760 |     op: str | None
 761 |     if stream.peek() == ("DELIM", "|"):
 762 |         stream.next()
 763 |         if stream.peek() == ("DELIM", "="):
 764 |             namespace = None
 765 |             stream.next()
 766 |             op = "|="
 767 |         else:
 768 |             namespace = attrib
 769 |             attrib = stream.next_ident()
 770 |             op = None
 771 |     else:
 772 |         namespace = op = None
 773 |     if op is None:
 774 |         stream.skip_whitespace()
 775 |         next = stream.next()
 776 |         if next == ("DELIM", "]"):
 777 |             return Attrib(selector, namespace, cast("str", attrib), "exists", None)
 778 |         if next == ("DELIM", "="):
 779 |             op = "="
 780 |         elif next.is_delim("^", "$", "*", "~", "|", "!") and (
 781 |             stream.peek() == ("DELIM", "=")
 782 |         ):
 783 |             op = cast("str", next.value) + "="
 784 |             stream.next()
 785 |         else:
 786 |             raise SelectorSyntaxError(f"Operator expected, got {next}")
 787 |     stream.skip_whitespace()
 788 |     value = stream.next()
 789 |     if value.type not in ("IDENT", "STRING"):
 790 |         raise SelectorSyntaxError(f"Expected string or ident, got {value}")
 791 |     stream.skip_whitespace()
 792 |     next = stream.next()
 793 |     if next != ("DELIM", "]"):
 794 |         raise SelectorSyntaxError(f"Expected ']', got {next}")
 795 |     return Attrib(selector, namespace, cast("str", attrib), op, value)
 796 | 
 797 | 
 798 | def parse_series(tokens: Iterable[Token]) -> tuple[int, int]:
 799 |     """
 800 |     Parses the arguments for :nth-child() and friends.
 801 | 
 802 |     :raises: A list of tokens
 803 |     :returns: :``(a, b)``
 804 | 
 805 |     """
 806 |     for token in tokens:
 807 |         if token.type == "STRING":
 808 |             raise ValueError("String tokens not allowed in series.")
 809 |     s = "".join(cast("str", token.value) for token in tokens).strip()
 810 |     if s == "odd":
 811 |         return 2, 1
 812 |     if s == "even":
 813 |         return 2, 0
 814 |     if s == "n":
 815 |         return 1, 0
 816 |     if "n" not in s:
 817 |         # Just b
 818 |         return 0, int(s)
 819 |     a, b = s.split("n", 1)
 820 |     a_as_int: int
 821 |     if not a:
 822 |         a_as_int = 1
 823 |     elif a in {"-", "+"}:
 824 |         a_as_int = int(a + "1")
 825 |     else:
 826 |         a_as_int = int(a)
 827 |     b_as_int = int(b) if b else 0
 828 |     return a_as_int, b_as_int
 829 | 
 830 | 
 831 | #### Token objects
 832 | 
 833 | 
 834 | class Token(tuple[str, Optional[str]]):  # noqa: SLOT001
 835 |     @overload
 836 |     def __new__(
 837 |         cls,
 838 |         type_: Literal["IDENT", "HASH", "STRING", "S", "DELIM", "NUMBER"],
 839 |         value: str,
 840 |         pos: int,
 841 |     ) -> Self: ...
 842 | 
 843 |     @overload
 844 |     def __new__(cls, type_: Literal["EOF"], value: None, pos: int) -> Self: ...
 845 | 
 846 |     def __new__(cls, type_: str, value: str | None, pos: int) -> Self:
 847 |         obj = tuple.__new__(cls, (type_, value))
 848 |         obj.pos = pos
 849 |         return obj
 850 | 
 851 |     def __repr__(self) -> str:
 852 |         return f"<{self.type} '{self.value}' at {self.pos}>"
 853 | 
 854 |     def is_delim(self, *values: str) -> bool:
 855 |         return self.type == "DELIM" and self.value in values
 856 | 
 857 |     pos: int
 858 | 
 859 |     @property
 860 |     def type(self) -> str:
 861 |         return self[0]
 862 | 
 863 |     @property
 864 |     def value(self) -> str | None:
 865 |         return self[1]
 866 | 
 867 |     def css(self) -> str:
 868 |         if self.type == "STRING":
 869 |             return repr(self.value)
 870 |         return cast("str", self.value)
 871 | 
 872 | 
 873 | class EOFToken(Token):
 874 |     def __new__(cls, pos: int) -> Self:
 875 |         return Token.__new__(cls, "EOF", None, pos)
 876 | 
 877 |     def __repr__(self) -> str:
 878 |         return f"<{self.type} at {self.pos}>"
 879 | 
 880 | 
 881 | #### Tokenizer
 882 | 
 883 | 
 884 | class TokenMacros:
 885 |     unicode_escape = r"\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?"
 886 |     escape = unicode_escape + r"|\\[^\n\r\f0-9a-f]"
 887 |     string_escape = r"\\(?:\n|\r\n|\r|\f)|" + escape
 888 |     nonascii = r"[^\0-\177]"
 889 |     nmchar = f"[_a-z0-9-]|{escape}|{nonascii}"
 890 |     nmstart = f"[_a-z]|{escape}|{nonascii}"
 891 | 
 892 | 
 893 | class MatchFunc(Protocol):
 894 |     def __call__(
 895 |         self, string: str, pos: int = ..., endpos: int = ...
 896 |     ) -> re.Match[str] | None: ...
 897 | 
 898 | 
 899 | def _compile(pattern: str) -> MatchFunc:
 900 |     return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
 901 | 
 902 | 
 903 | _match_whitespace = _compile(r"[ \t\r\n\f]+")
 904 | _match_number = _compile(r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)")
 905 | _match_hash = _compile("#(?:%(nmchar)s)+")
 906 | _match_ident = _compile("-?(?:%(nmstart)s)(?:%(nmchar)s)*")
 907 | _match_string_by_quote = {
 908 |     "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
 909 |     '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
 910 | }
 911 | 
 912 | _sub_simple_escape = re.compile(r"\\(.)").sub
 913 | _sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.IGNORECASE).sub
 914 | _sub_newline_escape = re.compile(r"\\(?:\n|\r\n|\r|\f)").sub
 915 | 
 916 | # Same as r'\1', but faster on CPython
 917 | _replace_simple = operator.methodcaller("group", 1)
 918 | 
 919 | 
 920 | def _replace_unicode(match: re.Match[str]) -> str:
 921 |     codepoint = int(match.group(1), 16)
 922 |     if codepoint > sys.maxunicode:
 923 |         codepoint = 0xFFFD
 924 |     return chr(codepoint)
 925 | 
 926 | 
 927 | def unescape_ident(value: str) -> str:
 928 |     value = _sub_unicode_escape(_replace_unicode, value)
 929 |     return _sub_simple_escape(_replace_simple, value)
 930 | 
 931 | 
 932 | def tokenize(s: str) -> Iterator[Token]:
 933 |     pos = 0
 934 |     len_s = len(s)
 935 |     while pos < len_s:
 936 |         match = _match_whitespace(s, pos=pos)
 937 |         if match:
 938 |             yield Token("S", " ", pos)
 939 |             pos = match.end()
 940 |             continue
 941 | 
 942 |         match = _match_ident(s, pos=pos)
 943 |         if match:
 944 |             value = _sub_simple_escape(
 945 |                 _replace_simple, _sub_unicode_escape(_replace_unicode, match.group())
 946 |             )
 947 |             yield Token("IDENT", value, pos)
 948 |             pos = match.end()
 949 |             continue
 950 | 
 951 |         match = _match_hash(s, pos=pos)
 952 |         if match:
 953 |             value = _sub_simple_escape(
 954 |                 _replace_simple,
 955 |                 _sub_unicode_escape(_replace_unicode, match.group()[1:]),
 956 |             )
 957 |             yield Token("HASH", value, pos)
 958 |             pos = match.end()
 959 |             continue
 960 | 
 961 |         quote = s[pos]
 962 |         if quote in _match_string_by_quote:
 963 |             match = _match_string_by_quote[quote](s, pos=pos + 1)
 964 |             assert match, "Should have found at least an empty match"
 965 |             end_pos = match.end()
 966 |             if end_pos == len_s:
 967 |                 raise SelectorSyntaxError(f"Unclosed string at {pos}")
 968 |             if s[end_pos] != quote:
 969 |                 raise SelectorSyntaxError(f"Invalid string at {pos}")
 970 |             value = _sub_simple_escape(
 971 |                 _replace_simple,
 972 |                 _sub_unicode_escape(
 973 |                     _replace_unicode, _sub_newline_escape("", match.group())
 974 |                 ),
 975 |             )
 976 |             yield Token("STRING", value, pos)
 977 |             pos = end_pos + 1
 978 |             continue
 979 | 
 980 |         match = _match_number(s, pos=pos)
 981 |         if match:
 982 |             value = match.group()
 983 |             yield Token("NUMBER", value, pos)
 984 |             pos = match.end()
 985 |             continue
 986 | 
 987 |         pos2 = pos + 2
 988 |         if s[pos:pos2] == "/*":
 989 |             pos = s.find("*/", pos2)
 990 |             if pos == -1:
 991 |                 pos = len_s
 992 |             else:
 993 |                 pos += 2
 994 |             continue
 995 | 
 996 |         yield Token("DELIM", s[pos], pos)
 997 |         pos += 1
 998 | 
 999 |     assert pos == len_s
1000 |     yield EOFToken(pos)
1001 | 
1002 | 
1003 | class TokenStream:
1004 |     def __init__(self, tokens: Iterable[Token], source: str | None = None) -> None:
1005 |         self.used: list[Token] = []
1006 |         self.tokens = iter(tokens)
1007 |         self.source = source
1008 |         self.peeked: Token | None = None
1009 |         self._peeking = False
1010 |         self.next_token = self.tokens.__next__
1011 | 
1012 |     def next(self) -> Token:
1013 |         if self._peeking:
1014 |             self._peeking = False
1015 |             assert self.peeked is not None
1016 |             self.used.append(self.peeked)
1017 |             return self.peeked
1018 |         next = self.next_token()
1019 |         self.used.append(next)
1020 |         return next
1021 | 
1022 |     def peek(self) -> Token:
1023 |         if not self._peeking:
1024 |             self.peeked = self.next_token()
1025 |             self._peeking = True
1026 |         assert self.peeked is not None
1027 |         return self.peeked
1028 | 
1029 |     def next_ident(self) -> str:
1030 |         next = self.next()
1031 |         if next.type != "IDENT":
1032 |             raise SelectorSyntaxError(f"Expected ident, got {next}")
1033 |         return cast("str", next.value)
1034 | 
1035 |     def next_ident_or_star(self) -> str | None:
1036 |         next = self.next()
1037 |         if next.type == "IDENT":
1038 |             return next.value
1039 |         if next == ("DELIM", "*"):
1040 |             return None
1041 |         raise SelectorSyntaxError(f"Expected ident or '*', got {next}")
1042 | 
1043 |     def skip_whitespace(self) -> None:
1044 |         peek = self.peek()
1045 |         if peek.type == "S":
1046 |             self.next()
1047 | 


--------------------------------------------------------------------------------
/cssselect/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapy/cssselect/b478ce96deddd07bd7bd5311d49fd0b5bbf3f54f/cssselect/py.typed


--------------------------------------------------------------------------------
/cssselect/xpath.py:
--------------------------------------------------------------------------------
  1 | """
  2 | cssselect.xpath
  3 | ===============
  4 | 
  5 | Translation of parsed CSS selectors to XPath expressions.
  6 | 
  7 | 
  8 | :copyright: (c) 2007-2012 Ian Bicking and contributors.
  9 | See AUTHORS for more details.
 10 | :license: BSD, see LICENSE for more details.
 11 | 
 12 | """
 13 | 
 14 | from __future__ import annotations
 15 | 
 16 | import re
 17 | from typing import TYPE_CHECKING, cast
 18 | 
 19 | from cssselect.parser import (
 20 |     Attrib,
 21 |     Class,
 22 |     CombinedSelector,
 23 |     Element,
 24 |     Function,
 25 |     Hash,
 26 |     Matching,
 27 |     Negation,
 28 |     Pseudo,
 29 |     PseudoElement,
 30 |     Relation,
 31 |     Selector,
 32 |     SelectorError,
 33 |     SpecificityAdjustment,
 34 |     Tree,
 35 |     parse,
 36 |     parse_series,
 37 | )
 38 | 
 39 | if TYPE_CHECKING:
 40 |     from collections.abc import Callable
 41 | 
 42 |     # typing.Self requires Python 3.11
 43 |     from typing_extensions import Self
 44 | 
 45 | 
 46 | class ExpressionError(SelectorError, RuntimeError):
 47 |     """Unknown or unsupported selector (eg. pseudo-class)."""
 48 | 
 49 | 
 50 | #### XPath Helpers
 51 | 
 52 | 
 53 | class XPathExpr:
 54 |     def __init__(
 55 |         self,
 56 |         path: str = "",
 57 |         element: str = "*",
 58 |         condition: str = "",
 59 |         star_prefix: bool = False,
 60 |     ) -> None:
 61 |         self.path = path
 62 |         self.element = element
 63 |         self.condition = condition
 64 | 
 65 |     def __str__(self) -> str:
 66 |         path = str(self.path) + str(self.element)
 67 |         if self.condition:
 68 |             path += f"[{self.condition}]"
 69 |         return path
 70 | 
 71 |     def __repr__(self) -> str:
 72 |         return f"{self.__class__.__name__}[{self}]"
 73 | 
 74 |     def add_condition(self, condition: str, conjuction: str = "and") -> Self:
 75 |         if self.condition:
 76 |             self.condition = f"({self.condition}) {conjuction} ({condition})"
 77 |         else:
 78 |             self.condition = condition
 79 |         return self
 80 | 
 81 |     def add_name_test(self) -> None:
 82 |         if self.element == "*":
 83 |             # We weren't doing a test anyway
 84 |             return
 85 |         self.add_condition(f"name() = {GenericTranslator.xpath_literal(self.element)}")
 86 |         self.element = "*"
 87 | 
 88 |     def add_star_prefix(self) -> None:
 89 |         """
 90 |         Append '*/' to the path to keep the context constrained
 91 |         to a single parent.
 92 |         """
 93 |         self.path += "*/"
 94 | 
 95 |     def join(
 96 |         self,
 97 |         combiner: str,
 98 |         other: XPathExpr,
 99 |         closing_combiner: str | None = None,
100 |         has_inner_condition: bool = False,
101 |     ) -> Self:
102 |         path = str(self) + combiner
103 |         # Any "star prefix" is redundant when joining.
104 |         if other.path != "*/":
105 |             path += other.path
106 |         self.path = path
107 |         if not has_inner_condition:
108 |             self.element = (
109 |                 other.element + closing_combiner if closing_combiner else other.element
110 |             )
111 |             self.condition = other.condition
112 |         else:
113 |             self.element = other.element
114 |             if other.condition:
115 |                 self.element += "[" + other.condition + "]"
116 |             if closing_combiner:
117 |                 self.element += closing_combiner
118 |         return self
119 | 
120 | 
121 | split_at_single_quotes = re.compile("('+)").split
122 | 
123 | # The spec is actually more permissive than that, but don’t bother.
124 | # This is just for the fast path.
125 | # http://www.w3.org/TR/REC-xml/#NT-NameStartChar
126 | is_safe_name = re.compile("^[a-zA-Z_][a-zA-Z0-9_.-]*$").match
127 | 
128 | # Test that the string is not empty and does not contain whitespace
129 | is_non_whitespace = re.compile(r"^[^ \t\r\n\f]+$").match
130 | 
131 | 
132 | #### Translation
133 | 
134 | 
135 | class GenericTranslator:
136 |     """
137 |     Translator for "generic" XML documents.
138 | 
139 |     Everything is case-sensitive, no assumption is made on the meaning
140 |     of element names and attribute names.
141 | 
142 |     """
143 | 
144 |     ####
145 |     ####  HERE BE DRAGONS
146 |     ####
147 |     ####  You are welcome to hook into this to change some behavior,
148 |     ####  but do so at your own risks.
149 |     ####  Until it has received a lot more work and review,
150 |     ####  I reserve the right to change this API in backward-incompatible ways
151 |     ####  with any minor version of cssselect.
152 |     ####  See https://github.com/scrapy/cssselect/pull/22
153 |     ####  -- Simon Sapin.
154 |     ####
155 | 
156 |     combinator_mapping = {
157 |         " ": "descendant",
158 |         ">": "child",
159 |         "+": "direct_adjacent",
160 |         "~": "indirect_adjacent",
161 |     }
162 | 
163 |     attribute_operator_mapping = {
164 |         "exists": "exists",
165 |         "=": "equals",
166 |         "~=": "includes",
167 |         "|=": "dashmatch",
168 |         "^=": "prefixmatch",
169 |         "$=": "suffixmatch",
170 |         "*=": "substringmatch",
171 |         "!=": "different",  # XXX Not in Level 3 but meh
172 |     }
173 | 
174 |     #: The attribute used for ID selectors depends on the document language:
175 |     #: http://www.w3.org/TR/selectors/#id-selectors
176 |     id_attribute = "id"
177 | 
178 |     #: The attribute used for ``:lang()`` depends on the document language:
179 |     #: http://www.w3.org/TR/selectors/#lang-pseudo
180 |     lang_attribute = "xml:lang"
181 | 
182 |     #: The case sensitivity of document language element names,
183 |     #: attribute names, and attribute values in selectors depends
184 |     #: on the document language.
185 |     #: http://www.w3.org/TR/selectors/#casesens
186 |     #:
187 |     #: When a document language defines one of these as case-insensitive,
188 |     #: cssselect assumes that the document parser makes the parsed values
189 |     #: lower-case. Making the selector lower-case too makes the comparaison
190 |     #: case-insensitive.
191 |     #:
192 |     #: In HTML, element names and attributes names (but not attribute values)
193 |     #: are case-insensitive. All of lxml.html, html5lib, BeautifulSoup4
194 |     #: and HTMLParser make them lower-case in their parse result, so
195 |     #: the assumption holds.
196 |     lower_case_element_names = False
197 |     lower_case_attribute_names = False
198 |     lower_case_attribute_values = False
199 | 
200 |     # class used to represent and xpath expression
201 |     xpathexpr_cls = XPathExpr
202 | 
203 |     def css_to_xpath(self, css: str, prefix: str = "descendant-or-self::") -> str:
204 |         """Translate a *group of selectors* to XPath.
205 | 
206 |         Pseudo-elements are not supported here since XPath only knows
207 |         about "real" elements.
208 | 
209 |         :param css:
210 |             A *group of selectors* as a string.
211 |         :param prefix:
212 |             This string is prepended to the XPath expression for each selector.
213 |             The default makes selectors scoped to the context node’s subtree.
214 |         :raises:
215 |             :class:`~cssselect.SelectorSyntaxError` on invalid selectors,
216 |             :class:`ExpressionError` on unknown/unsupported selectors,
217 |             including pseudo-elements.
218 |         :returns:
219 |             The equivalent XPath 1.0 expression as a string.
220 | 
221 |         """
222 |         return " | ".join(
223 |             self.selector_to_xpath(selector, prefix, translate_pseudo_elements=True)
224 |             for selector in parse(css)
225 |         )
226 | 
227 |     def selector_to_xpath(
228 |         self,
229 |         selector: Selector,
230 |         prefix: str = "descendant-or-self::",
231 |         translate_pseudo_elements: bool = False,
232 |     ) -> str:
233 |         """Translate a parsed selector to XPath.
234 | 
235 | 
236 |         :param selector:
237 |             A parsed :class:`Selector` object.
238 |         :param prefix:
239 |             This string is prepended to the resulting XPath expression.
240 |             The default makes selectors scoped to the context node’s subtree.
241 |         :param translate_pseudo_elements:
242 |             Unless this is set to ``True`` (as :meth:`css_to_xpath` does),
243 |             the :attr:`~Selector.pseudo_element` attribute of the selector
244 |             is ignored.
245 |             It is the caller's responsibility to reject selectors
246 |             with pseudo-elements, or to account for them somehow.
247 |         :raises:
248 |             :class:`ExpressionError` on unknown/unsupported selectors.
249 |         :returns:
250 |             The equivalent XPath 1.0 expression as a string.
251 | 
252 |         """
253 |         tree = getattr(selector, "parsed_tree", None)
254 |         if not tree:
255 |             raise TypeError(f"Expected a parsed selector, got {selector!r}")
256 |         xpath = self.xpath(tree)
257 |         assert isinstance(xpath, self.xpathexpr_cls)  # help debug a missing 'return'
258 |         if translate_pseudo_elements and selector.pseudo_element:
259 |             xpath = self.xpath_pseudo_element(xpath, selector.pseudo_element)
260 |         return (prefix or "") + str(xpath)
261 | 
262 |     def xpath_pseudo_element(
263 |         self, xpath: XPathExpr, pseudo_element: PseudoElement
264 |     ) -> XPathExpr:
265 |         """Translate a pseudo-element.
266 | 
267 |         Defaults to not supporting pseudo-elements at all,
268 |         but can be overridden by sub-classes.
269 | 
270 |         """
271 |         raise ExpressionError("Pseudo-elements are not supported.")
272 | 
273 |     @staticmethod
274 |     def xpath_literal(s: str) -> str:
275 |         s = str(s)
276 |         if "'" not in s:
277 |             s = f"'{s}'"
278 |         elif '"' not in s:
279 |             s = f'"{s}"'
280 |         else:
281 |             parts_quoted = [
282 |                 f'"{part}"' if "'" in part else f"'{part}'"
283 |                 for part in split_at_single_quotes(s)
284 |                 if part
285 |             ]
286 |             s = "concat({})".format(",".join(parts_quoted))
287 |         return s
288 | 
289 |     def xpath(self, parsed_selector: Tree) -> XPathExpr:
290 |         """Translate any parsed selector object."""
291 |         type_name = type(parsed_selector).__name__
292 |         method = cast(
293 |             "Callable[[Tree], XPathExpr] | None",
294 |             getattr(self, f"xpath_{type_name.lower()}", None),
295 |         )
296 |         if method is None:
297 |             raise ExpressionError(f"{type_name} is not supported.")
298 |         return method(parsed_selector)
299 | 
300 |     # Dispatched by parsed object type
301 | 
302 |     def xpath_combinedselector(self, combined: CombinedSelector) -> XPathExpr:
303 |         """Translate a combined selector."""
304 |         combinator = self.combinator_mapping[combined.combinator]
305 |         method = cast(
306 |             "Callable[[XPathExpr, XPathExpr], XPathExpr]",
307 |             getattr(self, f"xpath_{combinator}_combinator"),
308 |         )
309 |         return method(self.xpath(combined.selector), self.xpath(combined.subselector))
310 | 
311 |     def xpath_negation(self, negation: Negation) -> XPathExpr:
312 |         xpath = self.xpath(negation.selector)
313 |         sub_xpath = self.xpath(negation.subselector)
314 |         sub_xpath.add_name_test()
315 |         if sub_xpath.condition:
316 |             return xpath.add_condition(f"not({sub_xpath.condition})")
317 |         return xpath.add_condition("0")
318 | 
319 |     def xpath_relation(self, relation: Relation) -> XPathExpr:
320 |         xpath = self.xpath(relation.selector)
321 |         combinator = relation.combinator
322 |         subselector = relation.subselector
323 |         right = self.xpath(subselector.parsed_tree)
324 |         method = cast(
325 |             "Callable[[XPathExpr, XPathExpr], XPathExpr]",
326 |             getattr(
327 |                 self,
328 |                 f"xpath_relation_{self.combinator_mapping[cast('str', combinator.value)]}_combinator",
329 |             ),
330 |         )
331 |         return method(xpath, right)
332 | 
333 |     def xpath_matching(self, matching: Matching) -> XPathExpr:
334 |         xpath = self.xpath(matching.selector)
335 |         exprs = [self.xpath(selector) for selector in matching.selector_list]
336 |         for e in exprs:
337 |             e.add_name_test()
338 |             if e.condition:
339 |                 xpath.add_condition(e.condition, "or")
340 |         return xpath
341 | 
342 |     def xpath_specificityadjustment(self, matching: SpecificityAdjustment) -> XPathExpr:
343 |         xpath = self.xpath(matching.selector)
344 |         exprs = [self.xpath(selector) for selector in matching.selector_list]
345 |         for e in exprs:
346 |             e.add_name_test()
347 |             if e.condition:
348 |                 xpath.add_condition(e.condition, "or")
349 |         return xpath
350 | 
351 |     def xpath_function(self, function: Function) -> XPathExpr:
352 |         """Translate a functional pseudo-class."""
353 |         method_name = "xpath_{}_function".format(function.name.replace("-", "_"))
354 |         method = cast(
355 |             "Callable[[XPathExpr, Function], XPathExpr] | None",
356 |             getattr(self, method_name, None),
357 |         )
358 |         if not method:
359 |             raise ExpressionError(f"The pseudo-class :{function.name}() is unknown")
360 |         return method(self.xpath(function.selector), function)
361 | 
362 |     def xpath_pseudo(self, pseudo: Pseudo) -> XPathExpr:
363 |         """Translate a pseudo-class."""
364 |         method_name = "xpath_{}_pseudo".format(pseudo.ident.replace("-", "_"))
365 |         method = cast(
366 |             "Callable[[XPathExpr], XPathExpr] | None",
367 |             getattr(self, method_name, None),
368 |         )
369 |         if not method:
370 |             # TODO: better error message for pseudo-elements?
371 |             raise ExpressionError(f"The pseudo-class :{pseudo.ident} is unknown")
372 |         return method(self.xpath(pseudo.selector))
373 | 
374 |     def xpath_attrib(self, selector: Attrib) -> XPathExpr:
375 |         """Translate an attribute selector."""
376 |         operator = self.attribute_operator_mapping[selector.operator]
377 |         method = cast(
378 |             "Callable[[XPathExpr, str, str | None], XPathExpr]",
379 |             getattr(self, f"xpath_attrib_{operator}"),
380 |         )
381 |         if self.lower_case_attribute_names:
382 |             name = selector.attrib.lower()
383 |         else:
384 |             name = selector.attrib
385 |         safe = is_safe_name(name)
386 |         if selector.namespace:
387 |             name = f"{selector.namespace}:{name}"
388 |             safe = safe and is_safe_name(selector.namespace)
389 |         if safe:
390 |             attrib = "@" + name
391 |         else:
392 |             attrib = f"attribute::*[name() = {self.xpath_literal(name)}]"
393 |         if selector.value is None:
394 |             value = None
395 |         elif self.lower_case_attribute_values:
396 |             value = cast("str", selector.value.value).lower()
397 |         else:
398 |             value = selector.value.value
399 |         return method(self.xpath(selector.selector), attrib, value)
400 | 
401 |     def xpath_class(self, class_selector: Class) -> XPathExpr:
402 |         """Translate a class selector."""
403 |         # .foo is defined as [class~=foo] in the spec.
404 |         xpath = self.xpath(class_selector.selector)
405 |         return self.xpath_attrib_includes(xpath, "@class", class_selector.class_name)
406 | 
407 |     def xpath_hash(self, id_selector: Hash) -> XPathExpr:
408 |         """Translate an ID selector."""
409 |         xpath = self.xpath(id_selector.selector)
410 |         return self.xpath_attrib_equals(xpath, "@id", id_selector.id)
411 | 
412 |     def xpath_element(self, selector: Element) -> XPathExpr:
413 |         """Translate a type or universal selector."""
414 |         element = selector.element
415 |         if not element:
416 |             element = "*"
417 |             safe = True
418 |         else:
419 |             safe = bool(is_safe_name(element))
420 |             if self.lower_case_element_names:
421 |                 element = element.lower()
422 |         if selector.namespace:
423 |             # Namespace prefixes are case-sensitive.
424 |             # http://www.w3.org/TR/css3-namespace/#prefixes
425 |             element = f"{selector.namespace}:{element}"
426 |             safe = safe and bool(is_safe_name(selector.namespace))
427 |         xpath = self.xpathexpr_cls(element=element)
428 |         if not safe:
429 |             xpath.add_name_test()
430 |         return xpath
431 | 
432 |     # CombinedSelector: dispatch by combinator
433 | 
434 |     def xpath_descendant_combinator(
435 |         self, left: XPathExpr, right: XPathExpr
436 |     ) -> XPathExpr:
437 |         """right is a child, grand-child or further descendant of left"""
438 |         return left.join("/descendant-or-self::*/", right)
439 | 
440 |     def xpath_child_combinator(self, left: XPathExpr, right: XPathExpr) -> XPathExpr:
441 |         """right is an immediate child of left"""
442 |         return left.join("/", right)
443 | 
444 |     def xpath_direct_adjacent_combinator(
445 |         self, left: XPathExpr, right: XPathExpr
446 |     ) -> XPathExpr:
447 |         """right is a sibling immediately after left"""
448 |         xpath = left.join("/following-sibling::", right)
449 |         xpath.add_name_test()
450 |         return xpath.add_condition("position() = 1")
451 | 
452 |     def xpath_indirect_adjacent_combinator(
453 |         self, left: XPathExpr, right: XPathExpr
454 |     ) -> XPathExpr:
455 |         """right is a sibling after left, immediately or not"""
456 |         return left.join("/following-sibling::", right)
457 | 
458 |     def xpath_relation_descendant_combinator(
459 |         self, left: XPathExpr, right: XPathExpr
460 |     ) -> XPathExpr:
461 |         """right is a child, grand-child or further descendant of left; select left"""
462 |         return left.join(
463 |             "[descendant::", right, closing_combiner="]", has_inner_condition=True
464 |         )
465 | 
466 |     def xpath_relation_child_combinator(
467 |         self, left: XPathExpr, right: XPathExpr
468 |     ) -> XPathExpr:
469 |         """right is an immediate child of left; select left"""
470 |         return left.join("[./", right, closing_combiner="]")
471 | 
472 |     def xpath_relation_direct_adjacent_combinator(
473 |         self, left: XPathExpr, right: XPathExpr
474 |     ) -> XPathExpr:
475 |         """right is a sibling immediately after left; select left"""
476 |         return left.add_condition(
477 |             f"following-sibling::*[(name() = '{right.element}') and (position() = 1)]"
478 |         )
479 | 
480 |     def xpath_relation_indirect_adjacent_combinator(
481 |         self, left: XPathExpr, right: XPathExpr
482 |     ) -> XPathExpr:
483 |         """right is a sibling after left, immediately or not; select left"""
484 |         return left.join("[following-sibling::", right, closing_combiner="]")
485 | 
486 |     # Function: dispatch by function/pseudo-class name
487 | 
488 |     def xpath_nth_child_function(
489 |         self,
490 |         xpath: XPathExpr,
491 |         function: Function,
492 |         last: bool = False,
493 |         add_name_test: bool = True,
494 |     ) -> XPathExpr:
495 |         try:
496 |             a, b = parse_series(function.arguments)
497 |         except ValueError as ex:
498 |             raise ExpressionError(f"Invalid series: '{function.arguments!r}'") from ex
499 | 
500 |         # From https://www.w3.org/TR/css3-selectors/#structural-pseudos:
501 |         #
502 |         # :nth-child(an+b)
503 |         #       an+b-1 siblings before
504 |         #
505 |         # :nth-last-child(an+b)
506 |         #       an+b-1 siblings after
507 |         #
508 |         # :nth-of-type(an+b)
509 |         #       an+b-1 siblings with the same expanded element name before
510 |         #
511 |         # :nth-last-of-type(an+b)
512 |         #       an+b-1 siblings with the same expanded element name after
513 |         #
514 |         # So,
515 |         # for :nth-child and :nth-of-type
516 |         #
517 |         #    count(preceding-sibling::<nodetest>) = an+b-1
518 |         #
519 |         # for :nth-last-child and :nth-last-of-type
520 |         #
521 |         #    count(following-sibling::<nodetest>) = an+b-1
522 |         #
523 |         # therefore,
524 |         #    count(...) - (b-1) ≡ 0 (mod a)
525 |         #
526 |         # if a == 0:
527 |         # ~~~~~~~~~~
528 |         #    count(...) = b-1
529 |         #
530 |         # if a < 0:
531 |         # ~~~~~~~~~
532 |         #    count(...) - b +1 <= 0
533 |         # -> count(...) <= b-1
534 |         #
535 |         # if a > 0:
536 |         # ~~~~~~~~~
537 |         #    count(...) - b +1 >= 0
538 |         # -> count(...) >= b-1
539 | 
540 |         # work with b-1 instead
541 |         b_min_1 = b - 1
542 | 
543 |         # early-exit condition 1:
544 |         # ~~~~~~~~~~~~~~~~~~~~~~~
545 |         # for a == 1, nth-*(an+b) means n+b-1 siblings before/after,
546 |         # and since n ∈ {0, 1, 2, ...}, if b-1<=0,
547 |         # there is always an "n" matching any number of siblings (maybe none)
548 |         if a == 1 and b_min_1 <= 0:
549 |             return xpath
550 | 
551 |         # early-exit condition 2:
552 |         # ~~~~~~~~~~~~~~~~~~~~~~~
553 |         # an+b-1 siblings with a<0 and (b-1)<0 is not possible
554 |         if a < 0 and b_min_1 < 0:
555 |             return xpath.add_condition("0")
556 | 
557 |         # `add_name_test` boolean is inverted and somewhat counter-intuitive:
558 |         #
559 |         # nth_of_type() calls nth_child(add_name_test=False)
560 |         nodetest = "*" if add_name_test else f"{xpath.element}"
561 | 
562 |         # count siblings before or after the element
563 |         if not last:
564 |             siblings_count = f"count(preceding-sibling::{nodetest})"
565 |         else:
566 |             siblings_count = f"count(following-sibling::{nodetest})"
567 | 
568 |         # special case of fixed position: nth-*(0n+b)
569 |         # if a == 0:
570 |         # ~~~~~~~~~~
571 |         #    count(***-sibling::***) = b-1
572 |         if a == 0:
573 |             return xpath.add_condition(f"{siblings_count} = {b_min_1}")
574 | 
575 |         expressions = []
576 | 
577 |         if a > 0:
578 |             # siblings count, an+b-1, is always >= 0,
579 |             # so if a>0, and (b-1)<=0, an "n" exists to satisfy this,
580 |             # therefore, the predicate is only interesting if (b-1)>0
581 |             if b_min_1 > 0:
582 |                 expressions.append(f"{siblings_count} >= {b_min_1}")
583 |         else:
584 |             # if a<0, and (b-1)<0, no "n" satisfies this,
585 |             # this is tested above as an early exist condition
586 |             # otherwise,
587 |             expressions.append(f"{siblings_count} <= {b_min_1}")
588 | 
589 |         # operations modulo 1 or -1 are simpler, one only needs to verify:
590 |         #
591 |         # - either:
592 |         # count(***-sibling::***) - (b-1) = n = 0, 1, 2, 3, etc.,
593 |         #   i.e. count(***-sibling::***) >= (b-1)
594 |         #
595 |         # - or:
596 |         # count(***-sibling::***) - (b-1) = -n = 0, -1, -2, -3, etc.,
597 |         #   i.e. count(***-sibling::***) <= (b-1)
598 |         # we we just did above.
599 |         #
600 |         if abs(a) != 1:
601 |             # count(***-sibling::***) - (b-1) ≡ 0 (mod a)
602 |             left = siblings_count
603 | 
604 |             # apply "modulo a" on 2nd term, -(b-1),
605 |             # to simplify things like "(... +6) % -3",
606 |             # and also make it positive with |a|
607 |             b_neg = (-b_min_1) % abs(a)
608 | 
609 |             if b_neg != 0:
610 |                 left = f"({left} +{b_neg})"
611 | 
612 |             expressions.append(f"{left} mod {a} = 0")
613 | 
614 |         template = "(%s)" if len(expressions) > 1 else "%s"
615 |         xpath.add_condition(
616 |             " and ".join(template % expression for expression in expressions)
617 |         )
618 |         return xpath
619 | 
620 |     def xpath_nth_last_child_function(
621 |         self, xpath: XPathExpr, function: Function
622 |     ) -> XPathExpr:
623 |         return self.xpath_nth_child_function(xpath, function, last=True)
624 | 
625 |     def xpath_nth_of_type_function(
626 |         self, xpath: XPathExpr, function: Function
627 |     ) -> XPathExpr:
628 |         if xpath.element == "*":
629 |             raise ExpressionError("*:nth-of-type() is not implemented")
630 |         return self.xpath_nth_child_function(xpath, function, add_name_test=False)
631 | 
632 |     def xpath_nth_last_of_type_function(
633 |         self, xpath: XPathExpr, function: Function
634 |     ) -> XPathExpr:
635 |         if xpath.element == "*":
636 |             raise ExpressionError("*:nth-of-type() is not implemented")
637 |         return self.xpath_nth_child_function(
638 |             xpath, function, last=True, add_name_test=False
639 |         )
640 | 
641 |     def xpath_contains_function(
642 |         self, xpath: XPathExpr, function: Function
643 |     ) -> XPathExpr:
644 |         # Defined there, removed in later drafts:
645 |         # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
646 |         if function.argument_types() not in (["STRING"], ["IDENT"]):
647 |             raise ExpressionError(
648 |                 f"Expected a single string or ident for :contains(), got {function.arguments!r}"
649 |             )
650 |         value = cast("str", function.arguments[0].value)
651 |         return xpath.add_condition(f"contains(., {self.xpath_literal(value)})")
652 | 
653 |     def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
654 |         if function.argument_types() not in (["STRING"], ["IDENT"]):
655 |             raise ExpressionError(
656 |                 f"Expected a single string or ident for :lang(), got {function.arguments!r}"
657 |             )
658 |         value = cast("str", function.arguments[0].value)
659 |         return xpath.add_condition(f"lang({self.xpath_literal(value)})")
660 | 
661 |     # Pseudo: dispatch by pseudo-class name
662 | 
663 |     def xpath_root_pseudo(self, xpath: XPathExpr) -> XPathExpr:
664 |         return xpath.add_condition("not(parent::*)")
665 | 
666 |     # CSS immediate children (CSS ":scope > div" to XPath "child::div" or "./div")
667 |     # Works only at the start of a selector
668 |     # Needed to get immediate children of a processed selector in Scrapy
669 |     # for product in response.css('.product'):
670 |     #     description = product.css(':scope > div::text').get()
671 |     def xpath_scope_pseudo(self, xpath: XPathExpr) -> XPathExpr:
672 |         return xpath.add_condition("1")
673 | 
674 |     def xpath_first_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
675 |         return xpath.add_condition("count(preceding-sibling::*) = 0")
676 | 
677 |     def xpath_last_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
678 |         return xpath.add_condition("count(following-sibling::*) = 0")
679 | 
680 |     def xpath_first_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
681 |         if xpath.element == "*":
682 |             raise ExpressionError("*:first-of-type is not implemented")
683 |         return xpath.add_condition(f"count(preceding-sibling::{xpath.element}) = 0")
684 | 
685 |     def xpath_last_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
686 |         if xpath.element == "*":
687 |             raise ExpressionError("*:last-of-type is not implemented")
688 |         return xpath.add_condition(f"count(following-sibling::{xpath.element}) = 0")
689 | 
690 |     def xpath_only_child_pseudo(self, xpath: XPathExpr) -> XPathExpr:
691 |         return xpath.add_condition("count(parent::*/child::*) = 1")
692 | 
693 |     def xpath_only_of_type_pseudo(self, xpath: XPathExpr) -> XPathExpr:
694 |         if xpath.element == "*":
695 |             raise ExpressionError("*:only-of-type is not implemented")
696 |         return xpath.add_condition(f"count(parent::*/child::{xpath.element}) = 1")
697 | 
698 |     def xpath_empty_pseudo(self, xpath: XPathExpr) -> XPathExpr:
699 |         return xpath.add_condition("not(*) and not(string-length())")
700 | 
701 |     def pseudo_never_matches(self, xpath: XPathExpr) -> XPathExpr:
702 |         """Common implementation for pseudo-classes that never match."""
703 |         return xpath.add_condition("0")
704 | 
705 |     xpath_link_pseudo = pseudo_never_matches
706 |     xpath_visited_pseudo = pseudo_never_matches
707 |     xpath_hover_pseudo = pseudo_never_matches
708 |     xpath_active_pseudo = pseudo_never_matches
709 |     xpath_focus_pseudo = pseudo_never_matches
710 |     xpath_target_pseudo = pseudo_never_matches
711 |     xpath_enabled_pseudo = pseudo_never_matches
712 |     xpath_disabled_pseudo = pseudo_never_matches
713 |     xpath_checked_pseudo = pseudo_never_matches
714 | 
715 |     # Attrib: dispatch by attribute operator
716 | 
717 |     def xpath_attrib_exists(
718 |         self, xpath: XPathExpr, name: str, value: str | None
719 |     ) -> XPathExpr:
720 |         assert not value
721 |         xpath.add_condition(name)
722 |         return xpath
723 | 
724 |     def xpath_attrib_equals(
725 |         self, xpath: XPathExpr, name: str, value: str | None
726 |     ) -> XPathExpr:
727 |         assert value is not None
728 |         xpath.add_condition(f"{name} = {self.xpath_literal(value)}")
729 |         return xpath
730 | 
731 |     def xpath_attrib_different(
732 |         self, xpath: XPathExpr, name: str, value: str | None
733 |     ) -> XPathExpr:
734 |         assert value is not None
735 |         # FIXME: this seems like a weird hack...
736 |         if value:
737 |             xpath.add_condition(f"not({name}) or {name} != {self.xpath_literal(value)}")
738 |         else:
739 |             xpath.add_condition(f"{name} != {self.xpath_literal(value)}")
740 |         return xpath
741 | 
742 |     def xpath_attrib_includes(
743 |         self, xpath: XPathExpr, name: str, value: str | None
744 |     ) -> XPathExpr:
745 |         if value and is_non_whitespace(value):
746 |             arg = self.xpath_literal(" " + value + " ")
747 |             xpath.add_condition(
748 |                 f"{name} and contains(concat(' ', normalize-space({name}), ' '), {arg})"
749 |             )
750 |         else:
751 |             xpath.add_condition("0")
752 |         return xpath
753 | 
754 |     def xpath_attrib_dashmatch(
755 |         self, xpath: XPathExpr, name: str, value: str | None
756 |     ) -> XPathExpr:
757 |         assert value is not None
758 |         arg = self.xpath_literal(value)
759 |         arg_dash = self.xpath_literal(value + "-")
760 |         # Weird, but true...
761 |         xpath.add_condition(
762 |             f"{name} and ({name} = {arg} or starts-with({name}, {arg_dash}))"
763 |         )
764 |         return xpath
765 | 
766 |     def xpath_attrib_prefixmatch(
767 |         self, xpath: XPathExpr, name: str, value: str | None
768 |     ) -> XPathExpr:
769 |         if value:
770 |             xpath.add_condition(
771 |                 f"{name} and starts-with({name}, {self.xpath_literal(value)})"
772 |             )
773 |         else:
774 |             xpath.add_condition("0")
775 |         return xpath
776 | 
777 |     def xpath_attrib_suffixmatch(
778 |         self, xpath: XPathExpr, name: str, value: str | None
779 |     ) -> XPathExpr:
780 |         if value:
781 |             # Oddly there is a starts-with in XPath 1.0, but not ends-with
782 |             xpath.add_condition(
783 |                 f"{name} and substring({name}, string-length({name})-{len(value) - 1}) = {self.xpath_literal(value)}"
784 |             )
785 |         else:
786 |             xpath.add_condition("0")
787 |         return xpath
788 | 
789 |     def xpath_attrib_substringmatch(
790 |         self, xpath: XPathExpr, name: str, value: str | None
791 |     ) -> XPathExpr:
792 |         if value:
793 |             # Attribute selectors are case sensitive
794 |             xpath.add_condition(
795 |                 f"{name} and contains({name}, {self.xpath_literal(value)})"
796 |             )
797 |         else:
798 |             xpath.add_condition("0")
799 |         return xpath
800 | 
801 | 
802 | class HTMLTranslator(GenericTranslator):
803 |     """
804 |     Translator for (X)HTML documents.
805 | 
806 |     Has a more useful implementation of some pseudo-classes based on
807 |     HTML-specific element names and attribute names, as described in
808 |     the `HTML5 specification`_. It assumes no-quirks mode.
809 |     The API is the same as :class:`GenericTranslator`.
810 | 
811 |     .. _HTML5 specification: http://www.w3.org/TR/html5/links.html#selectors
812 | 
813 |     :param xhtml:
814 |         If false (the default), element names and attribute names
815 |         are case-insensitive.
816 | 
817 |     """
818 | 
819 |     lang_attribute = "lang"
820 | 
821 |     def __init__(self, xhtml: bool = False) -> None:
822 |         self.xhtml = xhtml  # Might be useful for sub-classes?
823 |         if not xhtml:
824 |             # See their definition in GenericTranslator.
825 |             self.lower_case_element_names = True
826 |             self.lower_case_attribute_names = True
827 | 
828 |     def xpath_checked_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore[override]
829 |         # FIXME: is this really all the elements?
830 |         return xpath.add_condition(
831 |             "(@selected and name(.) = 'option') or "
832 |             "(@checked "
833 |             "and (name(.) = 'input' or name(.) = 'command')"
834 |             "and (@type = 'checkbox' or @type = 'radio'))"
835 |         )
836 | 
837 |     def xpath_lang_function(self, xpath: XPathExpr, function: Function) -> XPathExpr:
838 |         if function.argument_types() not in (["STRING"], ["IDENT"]):
839 |             raise ExpressionError(
840 |                 f"Expected a single string or ident for :lang(), got {function.arguments!r}"
841 |             )
842 |         value = function.arguments[0].value
843 |         assert value
844 |         arg = self.xpath_literal(value.lower() + "-")
845 |         return xpath.add_condition(
846 |             "ancestor-or-self::*[@lang][1][starts-with(concat("
847 |             # XPath 1.0 has no lower-case function...
848 |             f"translate(@{self.lang_attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', "
849 |             "'abcdefghijklmnopqrstuvwxyz'), "
850 |             f"'-'), {arg})]"
851 |         )
852 | 
853 |     def xpath_link_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore[override]
854 |         return xpath.add_condition(
855 |             "@href and (name(.) = 'a' or name(.) = 'link' or name(.) = 'area')"
856 |         )
857 | 
858 |     # Links are never visited, the implementation for :visited is the same
859 |     # as in GenericTranslator
860 | 
861 |     def xpath_disabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore[override]
862 |         # http://www.w3.org/TR/html5/section-index.html#attributes-1
863 |         return xpath.add_condition(
864 |             """
865 |         (
866 |             @disabled and
867 |             (
868 |                 (name(.) = 'input' and @type != 'hidden') or
869 |                 name(.) = 'button' or
870 |                 name(.) = 'select' or
871 |                 name(.) = 'textarea' or
872 |                 name(.) = 'command' or
873 |                 name(.) = 'fieldset' or
874 |                 name(.) = 'optgroup' or
875 |                 name(.) = 'option'
876 |             )
877 |         ) or (
878 |             (
879 |                 (name(.) = 'input' and @type != 'hidden') or
880 |                 name(.) = 'button' or
881 |                 name(.) = 'select' or
882 |                 name(.) = 'textarea'
883 |             )
884 |             and ancestor::fieldset[@disabled]
885 |         )
886 |         """
887 |         )
888 |         # FIXME: in the second half, add "and is not a descendant of that
889 |         # fieldset element's first legend element child, if any."
890 | 
891 |     def xpath_enabled_pseudo(self, xpath: XPathExpr) -> XPathExpr:  # type: ignore[override]
892 |         # http://www.w3.org/TR/html5/section-index.html#attributes-1
893 |         return xpath.add_condition(
894 |             """
895 |         (
896 |             @href and (
897 |                 name(.) = 'a' or
898 |                 name(.) = 'link' or
899 |                 name(.) = 'area'
900 |             )
901 |         ) or (
902 |             (
903 |                 name(.) = 'command' or
904 |                 name(.) = 'fieldset' or
905 |                 name(.) = 'optgroup'
906 |             )
907 |             and not(@disabled)
908 |         ) or (
909 |             (
910 |                 (name(.) = 'input' and @type != 'hidden') or
911 |                 name(.) = 'button' or
912 |                 name(.) = 'select' or
913 |                 name(.) = 'textarea' or
914 |                 name(.) = 'keygen'
915 |             )
916 |             and not (@disabled or ancestor::fieldset[@disabled])
917 |         ) or (
918 |             name(.) = 'option' and not(
919 |                 @disabled or ancestor::optgroup[@disabled]
920 |             )
921 |         )
922 |         """
923 |         )
924 |         # FIXME: ... or "li elements that are children of menu elements,
925 |         # and that have a child element that defines a command, if the first
926 |         # such element's Disabled State facet is false (not disabled)".
927 |         # FIXME: after ancestor::fieldset[@disabled], add "and is not a
928 |         # descendant of that fieldset element's first legend element child,
929 |         # if any."
930 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # cssselect documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Mar 27 14:20:34 2012.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import re
 15 | from pathlib import Path
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | # sys.path.insert(0, os.path.abspath('.'))
 21 | 
 22 | # -- General configuration -----------------------------------------------------
 23 | 
 24 | # If your documentation needs a minimal Sphinx version, state it here.
 25 | # needs_sphinx = '1.0'
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.intersphinx", "sphinx.ext.doctest"]
 30 | 
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ["_templates"]
 33 | 
 34 | # The suffix of source filenames.
 35 | source_suffix = ".rst"
 36 | 
 37 | # The encoding of source files.
 38 | # source_encoding = 'utf-8-sig'
 39 | 
 40 | # The master toctree document.
 41 | master_doc = "index"
 42 | 
 43 | # General information about the project.
 44 | project = "cssselect"
 45 | copyright = "2012-2017, Simon Sapin, Scrapy developers"
 46 | 
 47 | # The version info for the project you're documenting, acts as replacement for
 48 | # |version| and |release|, also used in various other places throughout the
 49 | # built documents.
 50 | #
 51 | # The full version, including alpha/beta/rc tags.
 52 | init_py = (Path(__file__).parent.parent / "cssselect" / "__init__.py").read_text()
 53 | release = re.search('VERSION = "([^"]+)"', init_py).group(1)
 54 | # The short X.Y version.
 55 | version = release.rstrip("dev")
 56 | 
 57 | # The language for content autogenerated by Sphinx. Refer to documentation
 58 | # for a list of supported languages.
 59 | # language = None
 60 | 
 61 | # There are two options for replacing |today|: either, you set today to some
 62 | # non-false value, then it is used:
 63 | # today = ''
 64 | # Else, today_fmt is used as the format for a strftime call.
 65 | # today_fmt = '%B %d, %Y'
 66 | 
 67 | # List of patterns, relative to source directory, that match files and
 68 | # directories to ignore when looking for source files.
 69 | exclude_patterns = ["_build"]
 70 | 
 71 | # The reST default role (used for this markup: `text`) to use for all documents.
 72 | # default_role = None
 73 | 
 74 | # If true, '()' will be appended to :func: etc. cross-reference text.
 75 | # add_function_parentheses = True
 76 | 
 77 | # If true, the current module name will be prepended to all description
 78 | # unit titles (such as .. function::).
 79 | # add_module_names = True
 80 | 
 81 | # If true, sectionauthor and moduleauthor directives will be shown in the
 82 | # output. They are ignored by default.
 83 | # show_authors = False
 84 | 
 85 | # The name of the Pygments (syntax highlighting) style to use.
 86 | pygments_style = "sphinx"
 87 | 
 88 | # A list of ignored prefixes for module index sorting.
 89 | # modindex_common_prefix = []
 90 | 
 91 | 
 92 | # -- Options for HTML output ---------------------------------------------------
 93 | 
 94 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 95 | # a list of builtin themes.
 96 | html_theme = "sphinx_rtd_theme"
 97 | 
 98 | # Theme options are theme-specific and customize the look and feel of a theme
 99 | # further.  For a list of options available for each theme, see the
100 | # documentation.
101 | # html_theme_options = {}
102 | 
103 | # Add any paths that contain custom themes here, relative to this directory.
104 | # html_theme_path = []
105 | 
106 | # The name for this set of Sphinx documents.  If None, it defaults to
107 | # "<project> v<release> documentation".
108 | # html_title = None
109 | 
110 | # A shorter title for the navigation bar.  Default is the same as html_title.
111 | # html_short_title = None
112 | 
113 | # The name of an image file (relative to this directory) to place at the top
114 | # of the sidebar.
115 | # html_logo = None
116 | 
117 | # The name of an image file (within the static path) to use as favicon of the
118 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
119 | # pixels large.
120 | # html_favicon = None
121 | 
122 | # Add any paths that contain custom static files (such as style sheets) here,
123 | # relative to this directory. They are copied after the builtin static files,
124 | # so a file named "default.css" will overwrite the builtin "default.css".
125 | # html_static_path = ['_static']
126 | 
127 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
128 | # using the given strftime format.
129 | # html_last_updated_fmt = '%b %d, %Y'
130 | 
131 | # If true, SmartyPants will be used to convert quotes and dashes to
132 | # typographically correct entities.
133 | # html_use_smartypants = True
134 | 
135 | # Custom sidebar templates, maps document names to template names.
136 | # html_sidebars = {}
137 | 
138 | # Additional templates that should be rendered to pages, maps page names to
139 | # template names.
140 | # html_additional_pages = {}
141 | 
142 | # If false, no module index is generated.
143 | # html_domain_indices = True
144 | 
145 | # If false, no index is generated.
146 | # html_use_index = True
147 | 
148 | # If true, the index is split into individual pages for each letter.
149 | # html_split_index = False
150 | 
151 | # If true, links to the reST sources are added to the pages.
152 | # html_show_sourcelink = True
153 | 
154 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
155 | # html_show_sphinx = True
156 | 
157 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
158 | # html_show_copyright = True
159 | 
160 | # If true, an OpenSearch description file will be output, and all pages will
161 | # contain a <link> tag referring to it.  The value of this option must be the
162 | # base URL from which the finished HTML is served.
163 | # html_use_opensearch = ''
164 | 
165 | # This is the file name suffix for HTML files (e.g. ".xhtml").
166 | # html_file_suffix = None
167 | 
168 | # Output file base name for HTML help builder.
169 | htmlhelp_basename = "cssselectdoc"
170 | 
171 | 
172 | # -- Options for LaTeX output --------------------------------------------------
173 | 
174 | latex_elements = {
175 |     # The paper size ('letterpaper' or 'a4paper').
176 |     #'papersize': 'letterpaper',
177 |     # The font size ('10pt', '11pt' or '12pt').
178 |     #'pointsize': '10pt',
179 |     # Additional stuff for the LaTeX preamble.
180 |     #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |     ("index", "cssselect.tex", "cssselect Documentation", "Simon Sapin", "manual"),
187 | ]
188 | 
189 | # The name of an image file (relative to this directory) to place at the top of
190 | # the title page.
191 | # latex_logo = None
192 | 
193 | # For "manual" documents, if this is true, then toplevel headings are parts,
194 | # not chapters.
195 | # latex_use_parts = False
196 | 
197 | # If true, show page references after internal links.
198 | # latex_show_pagerefs = False
199 | 
200 | # If true, show URL addresses after external links.
201 | # latex_show_urls = False
202 | 
203 | # Documents to append as an appendix to all manuals.
204 | # latex_appendices = []
205 | 
206 | # If false, no module index is generated.
207 | # latex_domain_indices = True
208 | 
209 | 
210 | # -- Options for manual page output --------------------------------------------
211 | 
212 | # One entry per manual page. List of tuples
213 | # (source start file, name, description, authors, manual section).
214 | man_pages = [("index", "cssselect", "cssselect Documentation", ["Simon Sapin"], 1)]
215 | 
216 | # If true, show URL addresses after external links.
217 | # man_show_urls = False
218 | 
219 | 
220 | # -- Options for Texinfo output ------------------------------------------------
221 | 
222 | # Grouping the document tree into Texinfo files. List of tuples
223 | # (source start file, target name, title, author,
224 | #  dir menu entry, description, category)
225 | texinfo_documents = [
226 |     (
227 |         "index",
228 |         "cssselect",
229 |         "cssselect Documentation",
230 |         "Simon Sapin",
231 |         "cssselect",
232 |         "One line description of project.",
233 |         "Miscellaneous",
234 |     ),
235 | ]
236 | 
237 | # Documents to append as an appendix to all manuals.
238 | # texinfo_appendices = []
239 | 
240 | # If false, no module index is generated.
241 | # texinfo_domain_indices = True
242 | 
243 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
244 | # texinfo_show_urls = 'footnote'
245 | 
246 | 
247 | # Example configuration for intersphinx: refer to the Python standard library.
248 | intersphinx_mapping = {"python": ("https://docs.python.org/3", None)}
249 | 
250 | 
251 | # --- Nitpicking options ------------------------------------------------------
252 | 
253 | nitpicky = True
254 | nitpick_ignore = [
255 |     # explicitly not a part of the public API
256 |     ("py:class", "Token"),
257 | ]
258 | 


--------------------------------------------------------------------------------
/docs/conftest.py:
--------------------------------------------------------------------------------
 1 | from doctest import ELLIPSIS, NORMALIZE_WHITESPACE
 2 | 
 3 | from sybil import Sybil
 4 | from sybil.parsers.doctest import DocTestParser
 5 | from sybil.parsers.skip import skip
 6 | 
 7 | try:
 8 |     # sybil 3.0.0+
 9 |     from sybil.parsers.codeblock import PythonCodeBlockParser
10 | except ImportError:
11 |     from sybil.parsers.codeblock import CodeBlockParser as PythonCodeBlockParser
12 | 
13 | 
14 | pytest_collect_file = Sybil(
15 |     parsers=[
16 |         DocTestParser(optionflags=ELLIPSIS | NORMALIZE_WHITESPACE),
17 |         PythonCodeBlockParser(future_imports=["print_function"]),
18 |         skip,
19 |     ],
20 |     pattern="*.rst",
21 | ).pytest()
22 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | .. module:: cssselect
  2 | 
  3 | .. include:: ../README.rst
  4 | 
  5 | 
  6 | .. contents:: Contents
  7 |     :local:
  8 |     :depth: 1
  9 | 
 10 | Quickstart
 11 | ==========
 12 | 
 13 | Use :class:`HTMLTranslator` for HTML documents, :class:`GenericTranslator`
 14 | for "generic" XML documents. (The former has a more useful translation
 15 | for some selectors, based on HTML-specific element types or attributes.)
 16 | 
 17 | 
 18 | .. sourcecode:: pycon
 19 | 
 20 |     >>> from cssselect import GenericTranslator, SelectorError
 21 |     >>> try:
 22 |     ...     expression = GenericTranslator().css_to_xpath('div.content')
 23 |     ... except SelectorError:
 24 |     ...     print('Invalid selector.')
 25 |     ...
 26 |     >>> print(expression)
 27 |     descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')]
 28 | 
 29 | The resulting expression can be used with lxml's `XPath engine`_:
 30 | 
 31 | .. _XPath engine: http://lxml.de/xpathxslt.html#xpath
 32 | 
 33 | .. sourcecode:: pycon
 34 | 
 35 |     >>> from lxml.etree import fromstring
 36 |     >>> document = fromstring('''
 37 |     ...   <div id="outer">
 38 |     ...     <div id="inner" class="content body">text</div>
 39 |     ...   </div>
 40 |     ... ''')
 41 |     >>> [e.get('id') for e in document.xpath(expression)]
 42 |     ['inner']
 43 | 
 44 | User API
 45 | ========
 46 | 
 47 | In CSS3 Selectors terms, the top-level object is a `group of selectors`_, a
 48 | sequence of comma-separated selectors. For example, ``div, h1.title + p``
 49 | is a group of two selectors.
 50 | 
 51 | .. _group of selectors: http://www.w3.org/TR/selectors/#grouping
 52 | 
 53 | .. autofunction:: parse
 54 | .. autoclass:: Selector()
 55 |     :members:
 56 | 
 57 | .. autoclass:: FunctionalPseudoElement
 58 | 
 59 | .. autoclass:: GenericTranslator
 60 |     :members: css_to_xpath, selector_to_xpath
 61 | 
 62 | .. autoclass:: HTMLTranslator
 63 | 
 64 | Exceptions
 65 | ----------
 66 | 
 67 | .. autoexception:: SelectorError
 68 | .. autoexception:: SelectorSyntaxError
 69 | .. autoexception:: ExpressionError
 70 | 
 71 | 
 72 | Supported selectors
 73 | ===================
 74 | 
 75 | This library implements CSS3 selectors as described in `the W3C specification
 76 | <http://www.w3.org/TR/2011/REC-css3-selectors-20110929/>`_.
 77 | In this context however, there is no interactivity or history of visited links.
 78 | Therefore, these pseudo-classes are accepted but never match anything:
 79 | 
 80 | * ``:hover``
 81 | * ``:active``
 82 | * ``:focus``
 83 | * ``:target``
 84 | * ``:visited``
 85 | 
 86 | Additionally, these depend on document knowledge and only have a useful
 87 | implementation in :class:`HTMLTranslator`. In :class:`GenericTranslator`,
 88 | they never match:
 89 | 
 90 | * ``:link``
 91 | * ``:enabled``
 92 | * ``:disabled``
 93 | * ``:checked``
 94 | 
 95 | These applicable pseudo-classes are not yet implemented:
 96 | 
 97 | * ``*:first-of-type``, ``*:last-of-type``, ``*:nth-of-type``,
 98 |   ``*:nth-last-of-type``, ``*:only-of-type``.  All of these work when
 99 |   you specify an element type, but not with ``*``
100 | 
101 | On the other hand, *cssselect* supports some selectors that are not
102 | in the Level 3 specification.
103 | 
104 | These parts of the Level 4 specification are supported (note that a large part
105 | of the Level 4 additions is not applicable to cssselect similarly to ``:hover``
106 | or not representable in XPath 1.0 so the complete specification is unlikely to
107 | be implemented):
108 | 
109 | * The ``:scope`` pseudo-class. Limitation: it can only be used at a start of a
110 |   selector.
111 | * The ``:is()``, ``:where()`` and ``:has()`` pseudo-classes. Limitation:
112 |   ``:has()`` cannot contain nested ``:has()`` or ``:not()``.
113 | 
114 | These are non-standard extensions:
115 | 
116 | * The ``:contains(text)`` pseudo-class that existed in `an early draft`_
117 |   but was then removed.
118 | * The ``!=`` attribute operator. ``[foo!=bar]`` is the same as
119 |   ``:not([foo=bar])``.
120 | * ``:not()`` accepts a *sequence of simple selectors*, not just single
121 |   *simple selector*. For example, ``:not(a.important[rel])`` is allowed,
122 |   even though the negation contains 3 *simple selectors*.
123 | 
124 | .. _an early draft: http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
125 | 
126 | ..
127 |     The following claim was copied from lxml:
128 | 
129 |     """
130 |     XPath has underspecified string quoting rules (there seems to be no
131 |     string quoting at all), so if you use expressions that contain
132 |     characters that requiring quoting you might have problems with the
133 |     translation from CSS to XPath.
134 |     """
135 | 
136 |     It seems "string quoting" meant "quote escaping". There is indeed
137 |     no quote escaping, but the xpath_literal method handles this.
138 |     It should not be a problem anymore.
139 | 
140 | 
141 | Customizing the translation
142 | ===========================
143 | 
144 | Just like :class:`HTMLTranslator` is a subclass of :class:`GenericTranslator`,
145 | you can make new sub-classes of either of them and override some methods.
146 | This enables you, for example, to customize how some pseudo-class is
147 | implemented without forking or monkey-patching cssselect.
148 | 
149 | The "customization API" is the set of methods in translation classes
150 | and their signature. You can look at the `source code`_ to see how it works.
151 | However, be aware that this API is not very stable yet. It might change
152 | and break your sub-class.
153 | 
154 | .. _source code: https://github.com/scrapy/cssselect/blob/master/cssselect/xpath.py
155 | 
156 | 
157 | Namespaces
158 | ==========
159 | 
160 | In CSS you can use ``namespace-prefix|element``, similar to
161 | ``namespace-prefix:element`` in an XPath expression.  In fact, it maps
162 | one-to-one. How prefixes are mapped to namespace URIs depends on the
163 | XPath implementation.
164 | 
165 | .. include:: ../CHANGES
166 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx==8.2.3
2 | sphinx-rtd-theme==3.0.2
3 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | build-backend = "hatchling.build"
  3 | requires = ["hatchling>=1.27.0"]
  4 | 
  5 | [project]
  6 | name = "cssselect"
  7 | license = "BSD-3-Clause"
  8 | license-files = ["LICENSE", "AUTHORS"]
  9 | description = "cssselect parses CSS3 Selectors and translates them to XPath 1.0"
 10 | readme = "README.rst"
 11 | authors = [{ name = "Ian Bicking", email = "ianb@colorstudy.com" }]
 12 | maintainers = [{ name = "Paul Tremberth", email = "paul.tremberth@gmail.com" }]
 13 | requires-python = ">=3.9"
 14 | classifiers = [
 15 |     "Development Status :: 4 - Beta",
 16 |     "Intended Audience :: Developers",
 17 |     "Programming Language :: Python :: 3",
 18 |     "Programming Language :: Python :: 3.9",
 19 |     "Programming Language :: Python :: 3.10",
 20 |     "Programming Language :: Python :: 3.11",
 21 |     "Programming Language :: Python :: 3.12",
 22 |     "Programming Language :: Python :: 3.13",
 23 |     "Programming Language :: Python :: Implementation :: CPython",
 24 |     "Programming Language :: Python :: Implementation :: PyPy",
 25 | ]
 26 | dynamic = ["version"]
 27 | 
 28 | [project.urls]
 29 | "Homepage" = "https://github.com/scrapy/cssselect"
 30 | 
 31 | [tool.hatch.version]
 32 | path = "cssselect/__init__.py"
 33 | 
 34 | [tool.hatch.build.targets.sdist]
 35 | include = [
 36 |     "/cssselect",
 37 |     "/docs",
 38 |     "/tests",
 39 |     "/CHANGES",
 40 |     "/README.rst",
 41 |     "/tox.ini",
 42 | ]
 43 | exclude = [
 44 |     "/docs/_build",
 45 | ]
 46 | 
 47 | [tool.hatch.build.targets.wheel]
 48 | packages = ["cssselect"]
 49 | 
 50 | [tool.bumpversion]
 51 | current_version = "1.3.0"
 52 | commit = true
 53 | tag = true
 54 | 
 55 | [[tool.bumpversion.files]]
 56 | filename = "cssselect/__init__.py"
 57 | 
 58 | [tool.coverage.run]
 59 | branch = true
 60 | source = ["cssselect"]
 61 | 
 62 | [tool.coverage.report]
 63 | exclude_also = [
 64 |     "def __repr__",
 65 |     "if sys.version_info",
 66 |     "if __name__ == '__main__':",
 67 |     "if TYPE_CHECKING:",
 68 | ]
 69 | 
 70 | [tool.pylint.MASTER]
 71 | persistent = "no"
 72 | extension-pkg-allow-list = ["lxml"]
 73 | 
 74 | [tool.pylint."MESSAGES CONTROL"]
 75 | enable = [
 76 |     "useless-suppression",
 77 | ]
 78 | disable = [
 79 |     "consider-using-f-string",
 80 |     "fixme",
 81 |     "invalid-name",
 82 |     "line-too-long",
 83 |     "missing-class-docstring",
 84 |     "missing-function-docstring",
 85 |     "missing-module-docstring",
 86 |     "no-member",
 87 |     "not-callable",
 88 |     "redefined-builtin",
 89 |     "redefined-outer-name",
 90 |     "too-few-public-methods",
 91 |     "too-many-arguments",
 92 |     "too-many-branches",
 93 |     "too-many-function-args",
 94 |     "too-many-lines",
 95 |     "too-many-locals",
 96 |     "too-many-positional-arguments",
 97 |     "too-many-public-methods",
 98 |     "too-many-statements",
 99 |     "unused-argument",
100 | ]
101 | 
102 | [tool.pytest.ini_options]
103 | testpaths = ["tests"]
104 | 
105 | [tool.ruff.lint]
106 | extend-select = [
107 |     # flake8-bugbear
108 |     "B",
109 |     # flake8-comprehensions
110 |     "C4",
111 |     # pydocstyle
112 |     "D",
113 |     # flake8-future-annotations
114 |     "FA",
115 |     # flynt
116 |     "FLY",
117 |     # refurb
118 |     "FURB",
119 |     # isort
120 |     "I",
121 |     # flake8-implicit-str-concat
122 |     "ISC",
123 |     # flake8-logging
124 |     "LOG",
125 |     # Perflint
126 |     "PERF",
127 |     # pygrep-hooks
128 |     "PGH",
129 |     # flake8-pie
130 |     "PIE",
131 |     # pylint
132 |     "PL",
133 |     # flake8-use-pathlib
134 |     "PTH",
135 |     # flake8-pyi
136 |     "PYI",
137 |     # flake8-quotes
138 |     "Q",
139 |     # flake8-return
140 |     "RET",
141 |     # flake8-raise
142 |     "RSE",
143 |     # Ruff-specific rules
144 |     "RUF",
145 |     # flake8-bandit
146 |     "S",
147 |     # flake8-simplify
148 |     "SIM",
149 |     # flake8-slots
150 |     "SLOT",
151 |     # flake8-debugger
152 |     "T10",
153 |     # flake8-type-checking
154 |     "TC",
155 |     # pyupgrade
156 |     "UP",
157 |     # pycodestyle warnings
158 |     "W",
159 |     # flake8-2020
160 |     "YTT",
161 | ]
162 | ignore = [
163 |     # Missing docstring in public module
164 |     "D100",
165 |     # Missing docstring in public class
166 |     "D101",
167 |     # Missing docstring in public method
168 |     "D102",
169 |     # Missing docstring in public function
170 |     "D103",
171 |     # Missing docstring in public package
172 |     "D104",
173 |     # Missing docstring in magic method
174 |     "D105",
175 |     # Missing docstring in public nested class
176 |     "D106",
177 |     # Missing docstring in __init__
178 |     "D107",
179 |     # One-line docstring should fit on one line with quotes
180 |     "D200",
181 |     # No blank lines allowed after function docstring
182 |     "D202",
183 |     # 1 blank line required between summary line and description
184 |     "D205",
185 |     # Multi-line docstring closing quotes should be on a separate line
186 |     "D209",
187 |     # First line should end with a period
188 |     "D400",
189 |     # First line should be in imperative mood; try rephrasing
190 |     "D401",
191 |     # First line should not be the function's "signature"
192 |     "D402",
193 |     # First word of the first line should be properly capitalized
194 |     "D403",
195 |     # Too many return statements
196 |     "PLR0911",
197 |     # Too many branches
198 |     "PLR0912",
199 |     # Too many arguments in function definition
200 |     "PLR0913",
201 |     # Too many statements
202 |     "PLR0915",
203 |     # Magic value used in comparison
204 |     "PLR2004",
205 |     # String contains ambiguous {}.
206 |     "RUF001",
207 |     # Docstring contains ambiguous {}.
208 |     "RUF002",
209 |     # Comment contains ambiguous {}.
210 |     "RUF003",
211 |     # Mutable class attributes should be annotated with `typing.ClassVar`
212 |     "RUF012",
213 |     # Use of `assert` detected
214 |     "S101",
215 |     # Using lxml to parse untrusted data is known to be vulnerable to XML attacks
216 |     "S320",
217 | ]
218 | 
219 | [tool.ruff.lint.pydocstyle]
220 | convention = "pep257"
221 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scrapy/cssselect/b478ce96deddd07bd7bd5311d49fd0b5bbf3f54f/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_cssselect.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | """
   3 | Tests for cssselect
   4 | ===================
   5 | 
   6 | These tests can be run either by py.test or by the standard library's
   7 | unittest. They use plain ``assert`` statements and do little reporting
   8 | themselves in case of failure.
   9 | 
  10 | Use py.test to get fancy error reporting and assert introspection.
  11 | 
  12 | 
  13 | :copyright: (c) 2007-2012 Ian Bicking and contributors.
  14 | See AUTHORS for more details.
  15 | :license: BSD, see LICENSE for more details.
  16 | 
  17 | """
  18 | 
  19 | from __future__ import annotations
  20 | 
  21 | import sys
  22 | import typing
  23 | import unittest
  24 | from typing import TYPE_CHECKING
  25 | 
  26 | from lxml import etree, html
  27 | 
  28 | from cssselect import (
  29 |     ExpressionError,
  30 |     GenericTranslator,
  31 |     HTMLTranslator,
  32 |     SelectorSyntaxError,
  33 |     parse,
  34 | )
  35 | from cssselect.parser import (
  36 |     Function,
  37 |     FunctionalPseudoElement,
  38 |     PseudoElement,
  39 |     Token,
  40 |     parse_series,
  41 |     tokenize,
  42 | )
  43 | from cssselect.xpath import XPathExpr
  44 | 
  45 | if TYPE_CHECKING:
  46 |     from collections.abc import Sequence
  47 | 
  48 | 
  49 | class TestCssselect(unittest.TestCase):
  50 |     def test_tokenizer(self) -> None:
  51 |         tokens = [
  52 |             str(item)
  53 |             for item in tokenize(r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')
  54 |         ]
  55 |         assert tokens == [
  56 |             "<IDENT 'E é' at 0>",
  57 |             "<S ' ' at 4>",
  58 |             "<DELIM '>' at 5>",
  59 |             "<S ' ' at 6>",
  60 |             # the no-break space is not whitespace in CSS
  61 |             "<IDENT 'f ' at 7>",  # f\xa0
  62 |             "<DELIM '[' at 9>",
  63 |             "<IDENT 'a' at 10>",
  64 |             "<DELIM '~' at 11>",
  65 |             "<DELIM '=' at 12>",
  66 |             "<STRING 'y\"x' at 13>",
  67 |             "<DELIM ']' at 19>",
  68 |             "<DELIM ':' at 20>",
  69 |             "<IDENT 'nth' at 21>",
  70 |             "<DELIM '(' at 24>",
  71 |             "<NUMBER '-3.7' at 37>",
  72 |             "<DELIM ')' at 41>",
  73 |             "<EOF at 42>",
  74 |         ]
  75 | 
  76 |     def test_parser(self) -> None:
  77 |         def repr_parse(css: str) -> list[str]:
  78 |             selectors = parse(css)
  79 |             for selector in selectors:
  80 |                 assert selector.pseudo_element is None
  81 |             return [repr(selector.parsed_tree) for selector in selectors]
  82 | 
  83 |         def parse_many(first: str, *others: str) -> list[str]:
  84 |             result = repr_parse(first)
  85 |             for other in others:
  86 |                 assert repr_parse(other) == result
  87 |             return result
  88 | 
  89 |         assert parse_many("*") == ["Element[*]"]
  90 |         assert parse_many("*|*") == ["Element[*]"]
  91 |         assert parse_many("*|foo") == ["Element[foo]"]
  92 |         assert parse_many("|foo") == ["Element[foo]"]
  93 |         assert parse_many("foo|*") == ["Element[foo|*]"]
  94 |         assert parse_many("foo|bar") == ["Element[foo|bar]"]
  95 |         # This will never match, but it is valid:
  96 |         assert parse_many("#foo#bar") == ["Hash[Hash[Element[*]#foo]#bar]"]
  97 |         assert parse_many(
  98 |             "div>.foo",
  99 |             "div> .foo",
 100 |             "div >.foo",
 101 |             "div > .foo",
 102 |             "div \n>  \t \t .foo",
 103 |             "div\r>\n\n\n.foo",
 104 |             "div\f>\f.foo",
 105 |         ) == ["CombinedSelector[Element[div] > Class[Element[*].foo]]"]
 106 |         assert parse_many(
 107 |             "td.foo,.bar", "td.foo, .bar", "td.foo\t\r\n\f ,\t\r\n\f .bar"
 108 |         ) == [
 109 |             "Class[Element[td].foo]",
 110 |             "Class[Element[*].bar]",
 111 |         ]
 112 |         assert parse_many("div, td.foo, div.bar span") == [
 113 |             "Element[div]",
 114 |             "Class[Element[td].foo]",
 115 |             "CombinedSelector[Class[Element[div].bar] <followed> Element[span]]",
 116 |         ]
 117 |         assert parse_many("div > p") == ["CombinedSelector[Element[div] > Element[p]]"]
 118 |         assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
 119 |         assert parse_many("td:first") == ["Pseudo[Element[td]:first]"]
 120 |         assert parse_many("td :first") == [
 121 |             "CombinedSelector[Element[td] <followed> Pseudo[Element[*]:first]]"
 122 |         ]
 123 |         assert parse_many("td :first") == [
 124 |             "CombinedSelector[Element[td] <followed> Pseudo[Element[*]:first]]"
 125 |         ]
 126 |         assert parse_many("a[name]", "a[ name\t]") == ["Attrib[Element[a][name]]"]
 127 |         assert parse_many("a [name]") == [
 128 |             "CombinedSelector[Element[a] <followed> Attrib[Element[*][name]]]"
 129 |         ]
 130 |         assert parse_many('a[rel="include"]', "a[rel = include]") == [
 131 |             "Attrib[Element[a][rel = 'include']]"
 132 |         ]
 133 |         assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
 134 |             "Attrib[Element[a][hreflang |= 'en']]"
 135 |         ]
 136 |         assert parse_many("div:nth-child(10)") == [
 137 |             "Function[Element[div]:nth-child(['10'])]"
 138 |         ]
 139 |         assert parse_many(":nth-child(2n+2)") == [
 140 |             "Function[Element[*]:nth-child(['2', 'n', '+2'])]"
 141 |         ]
 142 |         assert parse_many("div:nth-of-type(10)") == [
 143 |             "Function[Element[div]:nth-of-type(['10'])]"
 144 |         ]
 145 |         assert parse_many("div div:nth-of-type(10) .aclass") == [
 146 |             "CombinedSelector[CombinedSelector[Element[div] <followed> "
 147 |             "Function[Element[div]:nth-of-type(['10'])]] "
 148 |             "<followed> Class[Element[*].aclass]]"
 149 |         ]
 150 |         assert parse_many("label:only") == ["Pseudo[Element[label]:only]"]
 151 |         assert parse_many("a:lang(fr)") == ["Function[Element[a]:lang(['fr'])]"]
 152 |         assert parse_many('div:contains("foo")') == [
 153 |             "Function[Element[div]:contains(['foo'])]"
 154 |         ]
 155 |         assert parse_many("div#foobar") == ["Hash[Element[div]#foobar]"]
 156 |         assert parse_many("div:not(div.foo)") == [
 157 |             "Negation[Element[div]:not(Class[Element[div].foo])]"
 158 |         ]
 159 |         assert parse_many("div:has(div.foo)") == [
 160 |             "Relation[Element[div]:has(Selector[Class[Element[div].foo]])]"
 161 |         ]
 162 |         assert parse_many("div:is(.foo, #bar)") == [
 163 |             "Matching[Element[div]:is(Class[Element[*].foo], Hash[Element[*]#bar])]"
 164 |         ]
 165 |         assert parse_many(":is(:hover, :visited)") == [
 166 |             "Matching[Element[*]:is(Pseudo[Element[*]:hover], Pseudo[Element[*]:visited])]"
 167 |         ]
 168 |         assert parse_many(":where(:hover, :visited)") == [
 169 |             "SpecificityAdjustment[Element[*]:where(Pseudo[Element[*]:hover],"
 170 |             " Pseudo[Element[*]:visited])]"
 171 |         ]
 172 |         assert parse_many("td ~ th") == ["CombinedSelector[Element[td] ~ Element[th]]"]
 173 |         assert parse_many(":scope > foo") == [
 174 |             "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]"
 175 |         ]
 176 |         assert parse_many(" :scope > foo") == [
 177 |             "CombinedSelector[Pseudo[Element[*]:scope] > Element[foo]]"
 178 |         ]
 179 |         assert parse_many(":scope > foo bar > div") == [
 180 |             "CombinedSelector[CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > "
 181 |             "Element[foo]] <followed> Element[bar]] > Element[div]]"
 182 |         ]
 183 |         assert parse_many(":scope > #foo #bar") == [
 184 |             "CombinedSelector[CombinedSelector[Pseudo[Element[*]:scope] > "
 185 |             "Hash[Element[*]#foo]] <followed> Hash[Element[*]#bar]]"
 186 |         ]
 187 | 
 188 |     def test_pseudo_elements(self) -> None:
 189 |         def parse_pseudo(css: str) -> list[tuple[str, str | None]]:
 190 |             result: list[tuple[str, str | None]] = []
 191 |             for selector in parse(css):
 192 |                 pseudo = selector.pseudo_element
 193 |                 pseudo = str(pseudo) if pseudo else pseudo
 194 |                 # No Symbol here
 195 |                 assert pseudo is None or isinstance(pseudo, str)
 196 |                 selector_as_str = repr(selector.parsed_tree)
 197 |                 result.append((selector_as_str, pseudo))
 198 |             return result
 199 | 
 200 |         def parse_one(css: str) -> tuple[str, str | None]:
 201 |             result = parse_pseudo(css)
 202 |             assert len(result) == 1
 203 |             return result[0]
 204 | 
 205 |         def test_pseudo_repr(css: str) -> str:
 206 |             result = parse(css)
 207 |             assert len(result) == 1
 208 |             selector = result[0]
 209 |             return repr(selector.parsed_tree)
 210 | 
 211 |         assert parse_one("foo") == ("Element[foo]", None)
 212 |         assert parse_one("*") == ("Element[*]", None)
 213 |         assert parse_one(":empty") == ("Pseudo[Element[*]:empty]", None)
 214 |         assert parse_one(":scope") == ("Pseudo[Element[*]:scope]", None)
 215 | 
 216 |         # Special cases for CSS 2.1 pseudo-elements
 217 |         assert parse_one(":BEfore") == ("Element[*]", "before")
 218 |         assert parse_one(":aftER") == ("Element[*]", "after")
 219 |         assert parse_one(":First-Line") == ("Element[*]", "first-line")
 220 |         assert parse_one(":First-Letter") == ("Element[*]", "first-letter")
 221 | 
 222 |         assert parse_one("::befoRE") == ("Element[*]", "before")
 223 |         assert parse_one("::AFter") == ("Element[*]", "after")
 224 |         assert parse_one("::firsT-linE") == ("Element[*]", "first-line")
 225 |         assert parse_one("::firsT-letteR") == ("Element[*]", "first-letter")
 226 | 
 227 |         assert parse_one("::text-content") == ("Element[*]", "text-content")
 228 |         assert parse_one("::attr(name)") == (
 229 |             "Element[*]",
 230 |             "FunctionalPseudoElement[::attr(['name'])]",
 231 |         )
 232 | 
 233 |         assert parse_one("::Selection") == ("Element[*]", "selection")
 234 |         assert parse_one("foo:after") == ("Element[foo]", "after")
 235 |         assert parse_one("foo::selection") == ("Element[foo]", "selection")
 236 |         assert parse_one("lorem#ipsum ~ a#b.c[href]:empty::selection") == (
 237 |             "CombinedSelector[Hash[Element[lorem]#ipsum] ~ "
 238 |             "Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]",
 239 |             "selection",
 240 |         )
 241 |         assert parse_pseudo(":scope > div, foo bar") == [
 242 |             ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
 243 |             ("CombinedSelector[Element[foo] <followed> Element[bar]]", None),
 244 |         ]
 245 |         assert parse_pseudo("foo bar, :scope > div") == [
 246 |             ("CombinedSelector[Element[foo] <followed> Element[bar]]", None),
 247 |             ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
 248 |         ]
 249 |         assert parse_pseudo("foo bar,:scope > div") == [
 250 |             ("CombinedSelector[Element[foo] <followed> Element[bar]]", None),
 251 |             ("CombinedSelector[Pseudo[Element[*]:scope] > Element[div]]", None),
 252 |         ]
 253 |         assert parse_pseudo("foo:before, bar, baz:after") == [
 254 |             ("Element[foo]", "before"),
 255 |             ("Element[bar]", None),
 256 |             ("Element[baz]", "after"),
 257 |         ]
 258 | 
 259 |         # Special cases for CSS 2.1 pseudo-elements are ignored by default
 260 |         for pseudo in ("after", "before", "first-line", "first-letter"):
 261 |             (selector,) = parse(f"e:{pseudo}")
 262 |             assert selector.pseudo_element == pseudo
 263 |             assert GenericTranslator().selector_to_xpath(selector, prefix="") == "e"
 264 | 
 265 |         # Pseudo Elements are ignored by default, but if allowed they are not
 266 |         # supported by GenericTranslator
 267 |         tr = GenericTranslator()
 268 |         (selector,) = parse("e::foo")
 269 |         assert selector.pseudo_element == "foo"
 270 |         assert tr.selector_to_xpath(selector, prefix="") == "e"
 271 |         self.assertRaises(
 272 |             ExpressionError,
 273 |             tr.selector_to_xpath,
 274 |             selector,
 275 |             translate_pseudo_elements=True,
 276 |         )
 277 | 
 278 |         # Special test for the unicode symbols and ':scope' element if check
 279 |         # Errors if use repr() instead of __repr__()
 280 |         assert test_pseudo_repr(":fİrst-child") == "Pseudo[Element[*]:fİrst-child]"
 281 |         assert test_pseudo_repr(":scope") == "Pseudo[Element[*]:scope]"
 282 | 
 283 |     def test_specificity(self) -> None:
 284 |         def specificity(css: str) -> tuple[int, int, int]:
 285 |             selectors = parse(css)
 286 |             assert len(selectors) == 1
 287 |             return selectors[0].specificity()
 288 | 
 289 |         assert specificity("*") == (0, 0, 0)
 290 |         assert specificity(" foo") == (0, 0, 1)
 291 |         assert specificity(":empty ") == (0, 1, 0)
 292 |         assert specificity(":before") == (0, 0, 1)
 293 |         assert specificity("*:before") == (0, 0, 1)
 294 |         assert specificity(":nth-child(2)") == (0, 1, 0)
 295 |         assert specificity(".bar") == (0, 1, 0)
 296 |         assert specificity("[baz]") == (0, 1, 0)
 297 |         assert specificity('[baz="4"]') == (0, 1, 0)
 298 |         assert specificity('[baz^="4"]') == (0, 1, 0)
 299 |         assert specificity("#lipsum") == (1, 0, 0)
 300 |         assert specificity("::attr(name)") == (0, 0, 1)
 301 | 
 302 |         assert specificity(":not(*)") == (0, 0, 0)
 303 |         assert specificity(":not(foo)") == (0, 0, 1)
 304 |         assert specificity(":not(.foo)") == (0, 1, 0)
 305 |         assert specificity(":not([foo])") == (0, 1, 0)
 306 |         assert specificity(":not(:empty)") == (0, 1, 0)
 307 |         assert specificity(":not(#foo)") == (1, 0, 0)
 308 | 
 309 |         assert specificity(":has(*)") == (0, 0, 0)
 310 |         assert specificity(":has(foo)") == (0, 0, 1)
 311 |         assert specificity(":has(.foo)") == (0, 1, 0)
 312 |         assert specificity(":has(> foo)") == (0, 0, 1)
 313 | 
 314 |         assert specificity(":is(.foo, #bar)") == (1, 0, 0)
 315 |         assert specificity(":is(:hover, :visited)") == (0, 1, 0)
 316 |         assert specificity(":where(:hover, :visited)") == (0, 0, 0)
 317 | 
 318 |         assert specificity("foo:empty") == (0, 1, 1)
 319 |         assert specificity("foo:before") == (0, 0, 2)
 320 |         assert specificity("foo::before") == (0, 0, 2)
 321 |         assert specificity("foo:empty::before") == (0, 1, 2)
 322 | 
 323 |         assert specificity("#lorem + foo#ipsum:first-child > bar:first-line") == (
 324 |             2,
 325 |             1,
 326 |             3,
 327 |         )
 328 | 
 329 |     def test_css_export(self) -> None:
 330 |         def css2css(css: str, res: str | None = None) -> None:
 331 |             selectors = parse(css)
 332 |             assert len(selectors) == 1
 333 |             assert selectors[0].canonical() == (res or css)
 334 | 
 335 |         css2css("*")
 336 |         css2css(" foo", "foo")
 337 |         css2css("Foo", "Foo")
 338 |         css2css(":empty ", ":empty")
 339 |         css2css(":before", "::before")
 340 |         css2css(":beFOre", "::before")
 341 |         css2css("*:before", "::before")
 342 |         css2css(":nth-child(2)")
 343 |         css2css(".bar")
 344 |         css2css("[baz]")
 345 |         css2css('[baz="4"]', "[baz='4']")
 346 |         css2css('[baz^="4"]', "[baz^='4']")
 347 |         css2css("[ns|attr='4']")
 348 |         css2css("#lipsum")
 349 |         css2css(":not(*)")
 350 |         css2css(":not(foo)")
 351 |         css2css(":not(*.foo)", ":not(.foo)")
 352 |         css2css(":not(*[foo])", ":not([foo])")
 353 |         css2css(":not(:empty)")
 354 |         css2css(":not(#foo)")
 355 |         css2css(":has(*)")
 356 |         css2css(":has(foo)")
 357 |         css2css(":has(*.foo)", ":has(.foo)")
 358 |         css2css(":is(#bar, .foo)")
 359 |         css2css(":is(:focused, :visited)")
 360 |         css2css(":where(:focused, :visited)")
 361 |         css2css("foo:empty")
 362 |         css2css("foo::before")
 363 |         css2css("foo:empty::before")
 364 |         css2css('::name(arg + "val" - 3)', "::name(arg+'val'-3)")
 365 |         css2css("#lorem + foo#ipsum:first-child > bar::first-line")
 366 |         css2css("foo > *")
 367 | 
 368 |     def test_parse_errors(self) -> None:
 369 |         def get_error(css: str) -> str | None:
 370 |             try:
 371 |                 parse(css)
 372 |             except SelectorSyntaxError:
 373 |                 return str(sys.exc_info()[1])
 374 |             return None
 375 | 
 376 |         assert get_error("attributes(href)/html/body/a") == (
 377 |             "Expected selector, got <DELIM '(' at 10>"
 378 |         )
 379 |         assert get_error("attributes(href)") == (
 380 |             "Expected selector, got <DELIM '(' at 10>"
 381 |         )
 382 |         assert get_error("html/body/a") == ("Expected selector, got <DELIM '/' at 4>")
 383 |         assert get_error(" ") == ("Expected selector, got <EOF at 1>")
 384 |         assert get_error("div, ") == ("Expected selector, got <EOF at 5>")
 385 |         assert get_error(" , div") == ("Expected selector, got <DELIM ',' at 1>")
 386 |         assert get_error("p, , div") == ("Expected selector, got <DELIM ',' at 3>")
 387 |         assert get_error("div > ") == ("Expected selector, got <EOF at 6>")
 388 |         assert get_error("  > div") == ("Expected selector, got <DELIM '>' at 2>")
 389 |         assert get_error("foo|#bar") == ("Expected ident or '*', got <HASH 'bar' at 4>")
 390 |         assert get_error("#.foo") == ("Expected selector, got <DELIM '#' at 0>")
 391 |         assert get_error(".#foo") == ("Expected ident, got <HASH 'foo' at 1>")
 392 |         assert get_error(":#foo") == ("Expected ident, got <HASH 'foo' at 1>")
 393 |         assert get_error("[*]") == ("Expected '|', got <DELIM ']' at 2>")
 394 |         assert get_error("[foo|]") == ("Expected ident, got <DELIM ']' at 5>")
 395 |         assert get_error("[#]") == ("Expected ident or '*', got <DELIM '#' at 1>")
 396 |         assert get_error("[foo=#]") == (
 397 |             "Expected string or ident, got <DELIM '#' at 5>"
 398 |         )
 399 |         assert get_error("[href]a") == ("Expected selector, got <IDENT 'a' at 6>")
 400 |         assert get_error("[rel=stylesheet]") is None
 401 |         assert get_error("[rel:stylesheet]") == (
 402 |             "Operator expected, got <DELIM ':' at 4>"
 403 |         )
 404 |         assert get_error("[rel=stylesheet") == ("Expected ']', got <EOF at 15>")
 405 |         assert get_error(":lang(fr)") is None
 406 |         assert get_error(":lang(fr") == ("Expected an argument, got <EOF at 8>")
 407 |         assert get_error(':contains("foo') == ("Unclosed string at 10")
 408 |         assert get_error("foo!") == ("Expected selector, got <DELIM '!' at 3>")
 409 | 
 410 |         # Mis-placed pseudo-elements
 411 |         assert get_error("a:before:empty") == (
 412 |             "Got pseudo-element ::before not at the end of a selector"
 413 |         )
 414 |         assert get_error("li:before a") == (
 415 |             "Got pseudo-element ::before not at the end of a selector"
 416 |         )
 417 |         assert get_error(":not(:before)") == (
 418 |             "Got pseudo-element ::before inside :not() at 12"
 419 |         )
 420 |         assert get_error(":not(:not(a))") == ("Got nested :not()")
 421 |         assert get_error(":is(:before)") == (
 422 |             "Got pseudo-element ::before inside function"
 423 |         )
 424 |         assert get_error(":is(a b)") == ("Expected an argument, got <IDENT 'b' at 6>")
 425 |         assert get_error(":where(:before)") == (
 426 |             "Got pseudo-element ::before inside function"
 427 |         )
 428 |         assert get_error(":where(a b)") == (
 429 |             "Expected an argument, got <IDENT 'b' at 9>"
 430 |         )
 431 |         assert get_error(":scope > div :scope header") == (
 432 |             'Got immediate child pseudo-element ":scope" not at the start of a selector'
 433 |         )
 434 |         assert get_error("div :scope header") == (
 435 |             'Got immediate child pseudo-element ":scope" not at the start of a selector'
 436 |         )
 437 |         assert get_error("> div p") == ("Expected selector, got <DELIM '>' at 0>")
 438 | 
 439 |         # Unsupported :has() with several arguments
 440 |         assert get_error(":has(a, b)") == ("Expected an argument, got <DELIM ',' at 6>")
 441 |         assert get_error(":has()") == ("Expected selector, got <EOF at 0>")
 442 | 
 443 |     def test_translation(self) -> None:
 444 |         def xpath(css: str) -> str:
 445 |             return str(GenericTranslator().css_to_xpath(css, prefix=""))
 446 | 
 447 |         assert xpath("*") == "*"
 448 |         assert xpath("e") == "e"
 449 |         assert xpath("*|e") == "e"
 450 |         assert xpath("e|f") == "e:f"
 451 |         assert xpath("e[foo]") == "e[@foo]"
 452 |         assert xpath("e[foo|bar]") == "e[@foo:bar]"
 453 |         assert xpath('e[foo="bar"]') == "e[@foo = 'bar']"
 454 |         assert xpath('e[foo~="bar"]') == (
 455 |             "e[@foo and contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]"
 456 |         )
 457 |         assert xpath('e[foo^="bar"]') == ("e[@foo and starts-with(@foo, 'bar')]")
 458 |         assert xpath('e[foo$="bar"]') == (
 459 |             "e[@foo and substring(@foo, string-length(@foo)-2) = 'bar']"
 460 |         )
 461 |         assert xpath('e[foo*="bar"]') == ("e[@foo and contains(@foo, 'bar')]")
 462 |         assert xpath('e[hreflang|="en"]') == (
 463 |             "e[@hreflang and (@hreflang = 'en' or starts-with(@hreflang, 'en-'))]"
 464 |         )
 465 | 
 466 |         # --- nth-* and nth-last-* -------------------------------------
 467 |         assert xpath("e:nth-child(1)") == ("e[count(preceding-sibling::*) = 0]")
 468 | 
 469 |         # always true
 470 |         assert xpath("e:nth-child(n)") == ("e")
 471 |         assert xpath("e:nth-child(n+1)") == ("e")
 472 |         # always true too
 473 |         assert xpath("e:nth-child(n-10)") == ("e")
 474 |         # b=2 is the limit...
 475 |         assert xpath("e:nth-child(n+2)") == ("e[count(preceding-sibling::*) >= 1]")
 476 |         # always false
 477 |         assert xpath("e:nth-child(-n)") == ("e[0]")
 478 |         # equivalent to first child
 479 |         assert xpath("e:nth-child(-n+1)") == ("e[count(preceding-sibling::*) <= 0]")
 480 | 
 481 |         assert xpath("e:nth-child(3n+2)") == (
 482 |             "e[(count(preceding-sibling::*) >= 1) and "
 483 |             "((count(preceding-sibling::*) +2) mod 3 = 0)]"
 484 |         )
 485 |         assert xpath("e:nth-child(3n-2)") == (
 486 |             "e[count(preceding-sibling::*) mod 3 = 0]"
 487 |         )
 488 |         assert xpath("e:nth-child(-n+6)") == ("e[count(preceding-sibling::*) <= 5]")
 489 | 
 490 |         assert xpath("e:nth-last-child(1)") == ("e[count(following-sibling::*) = 0]")
 491 |         assert xpath("e:nth-last-child(2n)") == (
 492 |             "e[(count(following-sibling::*) +1) mod 2 = 0]"
 493 |         )
 494 |         assert xpath("e:nth-last-child(2n+1)") == (
 495 |             "e[count(following-sibling::*) mod 2 = 0]"
 496 |         )
 497 |         assert xpath("e:nth-last-child(2n+2)") == (
 498 |             "e[(count(following-sibling::*) >= 1) and "
 499 |             "((count(following-sibling::*) +1) mod 2 = 0)]"
 500 |         )
 501 |         assert xpath("e:nth-last-child(3n+1)") == (
 502 |             "e[count(following-sibling::*) mod 3 = 0]"
 503 |         )
 504 |         # represents the two last e elements
 505 |         assert xpath("e:nth-last-child(-n+2)") == (
 506 |             "e[count(following-sibling::*) <= 1]"
 507 |         )
 508 | 
 509 |         assert xpath("e:nth-of-type(1)") == ("e[count(preceding-sibling::e) = 0]")
 510 |         assert xpath("e:nth-last-of-type(1)") == ("e[count(following-sibling::e) = 0]")
 511 |         assert xpath("div e:nth-last-of-type(1) .aclass") == (
 512 |             "div/descendant-or-self::*/e[count(following-sibling::e) = 0]"
 513 |             "/descendant-or-self::*/*[@class and contains("
 514 |             "concat(' ', normalize-space(@class), ' '), ' aclass ')]"
 515 |         )
 516 | 
 517 |         assert xpath("e:first-child") == ("e[count(preceding-sibling::*) = 0]")
 518 |         assert xpath("e:last-child") == ("e[count(following-sibling::*) = 0]")
 519 |         assert xpath("e:first-of-type") == ("e[count(preceding-sibling::e) = 0]")
 520 |         assert xpath("e:last-of-type") == ("e[count(following-sibling::e) = 0]")
 521 |         assert xpath("e:only-child") == ("e[count(parent::*/child::*) = 1]")
 522 |         assert xpath("e:only-of-type") == ("e[count(parent::*/child::e) = 1]")
 523 |         assert xpath("e:empty") == ("e[not(*) and not(string-length())]")
 524 |         assert xpath("e:EmPTY") == ("e[not(*) and not(string-length())]")
 525 |         assert xpath("e:root") == ("e[not(parent::*)]")
 526 |         assert xpath("e:hover") == ("e[0]")  # never matches
 527 |         assert (
 528 |             xpath("div:has(bar.foo)") == "div[descendant::bar"
 529 |             "[@class and contains(concat(' ', normalize-space(@class), ' '), ' foo ')]]"
 530 |         )
 531 |         assert xpath("e:has(> f)") == "e[./f]"
 532 |         assert xpath("e:has(f)") == "e[descendant::f]"
 533 |         assert xpath("e:has(~ f)") == "e[following-sibling::f]"
 534 |         assert (
 535 |             xpath("e:has(+ f)")
 536 |             == "e[following-sibling::*[(name() = 'f') and (position() = 1)]]"
 537 |         )
 538 |         assert xpath('e:contains("foo")') == ("e[contains(., 'foo')]")
 539 |         assert xpath("e:ConTains(foo)") == ("e[contains(., 'foo')]")
 540 |         assert xpath("e.warning") == (
 541 |             "e[@class and contains("
 542 |             "concat(' ', normalize-space(@class), ' '), ' warning ')]"
 543 |         )
 544 |         assert xpath("e#myid") == ("e[@id = 'myid']")
 545 |         assert xpath("e:not(:nth-child(odd))") == (
 546 |             "e[not(count(preceding-sibling::*) mod 2 = 0)]"
 547 |         )
 548 |         assert xpath("e:nOT(*)") == ("e[0]")  # never matches
 549 |         assert xpath("e f") == ("e/descendant-or-self::*/f")
 550 |         assert xpath("e > f") == ("e/f")
 551 |         assert xpath("e + f") == (
 552 |             "e/following-sibling::*[(name() = 'f') and (position() = 1)]"
 553 |         )
 554 |         assert xpath("e ~ f") == ("e/following-sibling::f")
 555 |         assert xpath("e ~ f:nth-child(3)") == (
 556 |             "e/following-sibling::f[count(preceding-sibling::*) = 2]"
 557 |         )
 558 |         assert xpath("div#container p") == (
 559 |             "div[@id = 'container']/descendant-or-self::*/p"
 560 |         )
 561 |         assert xpath("e:where(foo)") == "e[name() = 'foo']"
 562 |         assert xpath("e:where(foo, bar)") == "e[(name() = 'foo') or (name() = 'bar')]"
 563 | 
 564 |         # Invalid characters in XPath element names
 565 |         assert xpath(r"di\a0 v") == ("*[name() = 'di v']")  # di\xa0v
 566 |         assert xpath(r"di\[v") == ("*[name() = 'di[v']")
 567 |         assert xpath(r"[h\a0 ref]") == ("*[attribute::*[name() = 'h ref']]")  # h\xa0ref
 568 |         assert xpath(r"[h\]ref]") == ("*[attribute::*[name() = 'h]ref']]")
 569 | 
 570 |         self.assertRaises(ExpressionError, xpath, ":fİrst-child")
 571 |         self.assertRaises(ExpressionError, xpath, ":first-of-type")
 572 |         self.assertRaises(ExpressionError, xpath, ":only-of-type")
 573 |         self.assertRaises(ExpressionError, xpath, ":last-of-type")
 574 |         self.assertRaises(ExpressionError, xpath, ":nth-of-type(1)")
 575 |         self.assertRaises(ExpressionError, xpath, ":nth-last-of-type(1)")
 576 |         self.assertRaises(ExpressionError, xpath, ":nth-child(n-)")
 577 |         self.assertRaises(ExpressionError, xpath, ":after")
 578 |         self.assertRaises(ExpressionError, xpath, ":lorem-ipsum")
 579 |         self.assertRaises(ExpressionError, xpath, ":lorem(ipsum)")
 580 |         self.assertRaises(ExpressionError, xpath, "::lorem-ipsum")
 581 |         self.assertRaises(TypeError, GenericTranslator().css_to_xpath, 4)
 582 |         self.assertRaises(TypeError, GenericTranslator().selector_to_xpath, "foo")
 583 | 
 584 |     def test_unicode(self) -> None:
 585 |         css = ".a\xc1b"
 586 |         xpath = GenericTranslator().css_to_xpath(css)
 587 |         assert css[1:] in xpath
 588 |         xpath = xpath.encode("ascii", "xmlcharrefreplace").decode("ASCII")
 589 |         assert xpath == (
 590 |             "descendant-or-self::*[@class and contains("
 591 |             "concat(' ', normalize-space(@class), ' '), ' a&#193;b ')]"
 592 |         )
 593 | 
 594 |     def test_quoting(self) -> None:
 595 |         css_to_xpath = GenericTranslator().css_to_xpath
 596 |         assert css_to_xpath('*[aval="\'"]') == (
 597 |             """descendant-or-self::*[@aval = "'"]"""
 598 |         )
 599 |         assert css_to_xpath("*[aval=\"'''\"]") == (
 600 |             """descendant-or-self::*[@aval = "'''"]"""
 601 |         )
 602 |         assert css_to_xpath("*[aval='\"']") == (
 603 |             """descendant-or-self::*[@aval = '"']"""
 604 |         )
 605 |         assert css_to_xpath('*[aval=\'"""\']') == (
 606 |             '''descendant-or-self::*[@aval = '"""']'''
 607 |         )
 608 |         assert css_to_xpath(':scope > div[dataimg="<testmessage>"]') == (
 609 |             "descendant-or-self::*[1]/div[@dataimg = '<testmessage>']"
 610 |         )
 611 | 
 612 |     def test_unicode_escapes(self) -> None:
 613 |         # \22 == '"'  \20 == ' '
 614 |         css_to_xpath = GenericTranslator().css_to_xpath
 615 |         assert css_to_xpath(r'*[aval="\'\22\'"]') == (
 616 |             """descendant-or-self::*[@aval = concat("'",'"',"'")]"""
 617 |         )
 618 |         assert css_to_xpath(r'*[aval="\'\22 2\'"]') == (
 619 |             """descendant-or-self::*[@aval = concat("'",'"2',"'")]"""
 620 |         )
 621 |         assert css_to_xpath(r'*[aval="\'\20  \'"]') == (
 622 |             """descendant-or-self::*[@aval = "'  '"]"""
 623 |         )
 624 |         assert css_to_xpath("*[aval=\"'\\20\r\n '\"]") == (
 625 |             """descendant-or-self::*[@aval = "'  '"]"""
 626 |         )
 627 | 
 628 |     def test_xpath_pseudo_elements(self) -> None:
 629 |         class CustomTranslator(GenericTranslator):
 630 |             def xpath_pseudo_element(
 631 |                 self, xpath: XPathExpr, pseudo_element: PseudoElement
 632 |             ) -> XPathExpr:
 633 |                 if isinstance(pseudo_element, FunctionalPseudoElement):
 634 |                     method_name = "xpath_{}_functional_pseudo_element".format(
 635 |                         pseudo_element.name.replace("-", "_")
 636 |                     )
 637 |                     method = getattr(self, method_name, None)
 638 |                     if not method:
 639 |                         raise ExpressionError(
 640 |                             f"The functional pseudo-element ::{pseudo_element.name}() is unknown"
 641 |                         )
 642 |                     xpath = method(xpath, pseudo_element.arguments)
 643 |                 else:
 644 |                     method_name = "xpath_{}_simple_pseudo_element".format(
 645 |                         pseudo_element.replace("-", "_")
 646 |                     )
 647 |                     method = getattr(self, method_name, None)
 648 |                     if not method:
 649 |                         raise ExpressionError(
 650 |                             f"The pseudo-element ::{pseudo_element} is unknown"
 651 |                         )
 652 |                     xpath = method(xpath)
 653 |                 return xpath
 654 | 
 655 |             # functional pseudo-class:
 656 |             # elements that have a certain number of attributes
 657 |             def xpath_nb_attr_function(
 658 |                 self, xpath: XPathExpr, function: Function
 659 |             ) -> XPathExpr:
 660 |                 assert function.arguments[0].value
 661 |                 nb_attributes = int(function.arguments[0].value)
 662 |                 return xpath.add_condition(f"count(@*)={nb_attributes}")
 663 | 
 664 |             # pseudo-class:
 665 |             # elements that have 5 attributes
 666 |             def xpath_five_attributes_pseudo(self, xpath: XPathExpr) -> XPathExpr:
 667 |                 return xpath.add_condition("count(@*)=5")
 668 | 
 669 |             # functional pseudo-element:
 670 |             # element's attribute by name
 671 |             def xpath_attr_functional_pseudo_element(
 672 |                 self, xpath: XPathExpr, arguments: Sequence[Token]
 673 |             ) -> XPathExpr:
 674 |                 attribute_name = arguments[0].value
 675 |                 other = XPathExpr(
 676 |                     f"@{attribute_name}",
 677 |                     "",
 678 |                 )
 679 |                 return xpath.join("/", other)
 680 | 
 681 |             # pseudo-element:
 682 |             # element's text() nodes
 683 |             def xpath_text_node_simple_pseudo_element(
 684 |                 self, xpath: XPathExpr
 685 |             ) -> XPathExpr:
 686 |                 other = XPathExpr(
 687 |                     "text()",
 688 |                     "",
 689 |                 )
 690 |                 return xpath.join("/", other)
 691 | 
 692 |             # pseudo-element:
 693 |             # element's href attribute
 694 |             def xpath_attr_href_simple_pseudo_element(
 695 |                 self, xpath: XPathExpr
 696 |             ) -> XPathExpr:
 697 |                 other = XPathExpr(
 698 |                     "@href",
 699 |                     "",
 700 |                 )
 701 |                 return xpath.join("/", other)
 702 | 
 703 |             # pseudo-element:
 704 |             # used to demonstrate operator precedence
 705 |             def xpath_first_or_second_pseudo(self, xpath: XPathExpr) -> XPathExpr:
 706 |                 return xpath.add_condition("@id = 'first' or @id = 'second'")
 707 | 
 708 |         def xpath(css: str) -> str:
 709 |             return str(CustomTranslator().css_to_xpath(css))
 710 | 
 711 |         assert xpath(":five-attributes") == "descendant-or-self::*[count(@*)=5]"
 712 |         assert xpath(":nb-attr(3)") == "descendant-or-self::*[count(@*)=3]"
 713 |         assert xpath("::attr(href)") == "descendant-or-self::*/@href"
 714 |         assert xpath("::text-node") == "descendant-or-self::*/text()"
 715 |         assert xpath("::attr-href") == "descendant-or-self::*/@href"
 716 |         assert xpath("p img::attr(src)") == (
 717 |             "descendant-or-self::p/descendant-or-self::*/img/@src"
 718 |         )
 719 |         assert xpath(":scope") == "descendant-or-self::*[1]"
 720 |         assert xpath(":first-or-second[href]") == (
 721 |             "descendant-or-self::*[(@id = 'first' or @id = 'second') and (@href)]"
 722 |         )
 723 | 
 724 |         assert str(XPathExpr("", "", condition="@href")) == "[@href]"
 725 | 
 726 |         document = etree.fromstring(OPERATOR_PRECEDENCE_IDS)
 727 |         sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__
 728 | 
 729 |         def operator_id(selector: str) -> list[str]:
 730 |             xpath = CustomTranslator().css_to_xpath(selector)
 731 |             items = typing.cast("list[etree._Element]", document.xpath(xpath))
 732 |             items.sort(key=sort_key)
 733 |             return [element.get("id", "nil") for element in items]
 734 | 
 735 |         assert operator_id(":first-or-second") == ["first", "second"]
 736 |         assert operator_id(":first-or-second[href]") == ["second"]
 737 |         assert operator_id("[href]:first-or-second") == ["second"]
 738 | 
 739 |     def test_series(self) -> None:
 740 |         def series(css: str) -> tuple[int, int] | None:
 741 |             (selector,) = parse(f":nth-child({css})")
 742 |             args = typing.cast(
 743 |                 "FunctionalPseudoElement", selector.parsed_tree
 744 |             ).arguments
 745 |             try:
 746 |                 return parse_series(args)
 747 |             except ValueError:
 748 |                 return None
 749 | 
 750 |         assert series("1n+3") == (1, 3)
 751 |         assert series("1n +3") == (1, 3)
 752 |         assert series("1n + 3") == (1, 3)
 753 |         assert series("1n+ 3") == (1, 3)
 754 |         assert series("1n-3") == (1, -3)
 755 |         assert series("1n -3") == (1, -3)
 756 |         assert series("1n - 3") == (1, -3)
 757 |         assert series("1n- 3") == (1, -3)
 758 |         assert series("n-5") == (1, -5)
 759 |         assert series("odd") == (2, 1)
 760 |         assert series("even") == (2, 0)
 761 |         assert series("3n") == (3, 0)
 762 |         assert series("n") == (1, 0)
 763 |         assert series("+n") == (1, 0)
 764 |         assert series("-n") == (-1, 0)
 765 |         assert series("5") == (0, 5)
 766 |         assert series("foo") is None
 767 |         assert series("n+") is None
 768 | 
 769 |     def test_lang(self) -> None:
 770 |         document = etree.fromstring(XMLLANG_IDS)
 771 |         sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__
 772 |         css_to_xpath = GenericTranslator().css_to_xpath
 773 | 
 774 |         def langid(selector: str) -> list[str]:
 775 |             xpath = css_to_xpath(selector)
 776 |             items = typing.cast("list[etree._Element]", document.xpath(xpath))
 777 |             items.sort(key=sort_key)
 778 |             return [element.get("id", "nil") for element in items]
 779 | 
 780 |         assert langid(':lang("EN")') == ["first", "second", "third", "fourth"]
 781 |         assert langid(':lang("en-us")') == ["second", "fourth"]
 782 |         assert langid(":lang(en-nz)") == ["third"]
 783 |         assert langid(":lang(fr)") == ["fifth"]
 784 |         assert langid(":lang(ru)") == ["sixth"]
 785 |         assert langid(":lang('ZH')") == ["eighth"]
 786 |         assert langid(":lang(de) :lang(zh)") == ["eighth"]
 787 |         assert langid(":lang(en), :lang(zh)") == [
 788 |             "first",
 789 |             "second",
 790 |             "third",
 791 |             "fourth",
 792 |             "eighth",
 793 |         ]
 794 |         assert langid(":lang(es)") == []
 795 | 
 796 |     def test_argument_types(self) -> None:
 797 |         class CustomTranslator(GenericTranslator):
 798 |             def __init__(self) -> None:
 799 |                 self.argument_types: list[str] = []
 800 | 
 801 |             def xpath_pseudo_element(
 802 |                 self, xpath: XPathExpr, pseudo_element: PseudoElement
 803 |             ) -> XPathExpr:
 804 |                 self.argument_types += typing.cast(
 805 |                     "FunctionalPseudoElement", pseudo_element
 806 |                 ).argument_types()
 807 |                 return xpath
 808 | 
 809 |         def argument_types(css: str) -> list[str]:
 810 |             translator = CustomTranslator()
 811 |             translator.css_to_xpath(css)
 812 |             return translator.argument_types
 813 | 
 814 |         mappings: list[tuple[str, list[str]]] = [
 815 |             ("", []),
 816 |             ("ident", ["IDENT"]),
 817 |             ('"string"', ["STRING"]),
 818 |             ("1", ["NUMBER"]),
 819 |         ]
 820 |         for argument_string, argument_list in mappings:
 821 |             css = f"::pseudo_element({argument_string})"
 822 |             assert argument_types(css) == argument_list
 823 | 
 824 |     def test_select(self) -> None:
 825 |         document = etree.fromstring(HTML_IDS)
 826 |         sort_key = {el: count for count, el in enumerate(document.iter())}.__getitem__
 827 |         css_to_xpath = GenericTranslator().css_to_xpath
 828 |         html_css_to_xpath = HTMLTranslator().css_to_xpath
 829 | 
 830 |         def select_ids(selector: str, html_only: bool) -> list[str]:
 831 |             xpath = css_to_xpath(selector)
 832 |             items = typing.cast("list[etree._Element]", document.xpath(xpath))
 833 |             if html_only:
 834 |                 assert items == []
 835 |                 xpath = html_css_to_xpath(selector)
 836 |                 items = typing.cast("list[etree._Element]", document.xpath(xpath))
 837 |             items.sort(key=sort_key)
 838 |             return [element.get("id", "nil") for element in items]
 839 | 
 840 |         def pcss(main: str, *selectors: str, **kwargs: bool) -> list[str]:
 841 |             html_only = kwargs.pop("html_only", False)
 842 |             result = select_ids(main, html_only)
 843 |             for selector in selectors:
 844 |                 assert select_ids(selector, html_only) == result
 845 |             return result
 846 | 
 847 |         all_ids = pcss("*")
 848 |         assert all_ids[:6] == [
 849 |             "html",
 850 |             "nil",
 851 |             "link-href",
 852 |             "link-nohref",
 853 |             "nil",
 854 |             "outer-div",
 855 |         ]
 856 |         assert all_ids[-1:] == ["foobar-span"]
 857 |         assert pcss("div") == ["outer-div", "li-div", "foobar-div"]
 858 |         assert pcss("DIV", html_only=True) == [
 859 |             "outer-div",
 860 |             "li-div",
 861 |             "foobar-div",
 862 |         ]  # case-insensitive in HTML
 863 |         assert pcss("div div") == ["li-div"]
 864 |         assert pcss("div, div div") == ["outer-div", "li-div", "foobar-div"]
 865 |         assert pcss("a[name]") == ["name-anchor"]
 866 |         assert pcss("a[NAme]", html_only=True) == [
 867 |             "name-anchor"
 868 |         ]  # case-insensitive in HTML:
 869 |         assert pcss("a[rel]") == ["tag-anchor", "nofollow-anchor"]
 870 |         assert pcss('a[rel="tag"]') == ["tag-anchor"]
 871 |         assert pcss('a[href*="localhost"]') == ["tag-anchor"]
 872 |         assert pcss('a[href*=""]') == []
 873 |         assert pcss('a[href^="http"]') == ["tag-anchor", "nofollow-anchor"]
 874 |         assert pcss('a[href^="http:"]') == ["tag-anchor"]
 875 |         assert pcss('a[href^=""]') == []
 876 |         assert pcss('a[href$="org"]') == ["nofollow-anchor"]
 877 |         assert pcss('a[href$=""]') == []
 878 |         assert pcss('div[foobar~="bc"]', 'div[foobar~="cde"]') == ["foobar-div"]
 879 |         assert pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]') == []
 880 |         assert pcss('div[foobar~="cd"]') == []
 881 |         assert pcss('*[lang|="En"]', '[lang|="En-us"]') == ["second-li"]
 882 |         # Attribute values are case sensitive
 883 |         assert pcss('*[lang|="en"]', '[lang|="en-US"]') == []
 884 |         assert pcss('*[lang|="e"]') == []
 885 |         # ... :lang() is not.
 886 |         assert pcss(':lang("EN")', "*:lang(en-US)", html_only=True) == [
 887 |             "second-li",
 888 |             "li-div",
 889 |         ]
 890 |         assert pcss(':lang("e")', html_only=True) == []
 891 |         assert pcss(":scope > div") == []
 892 |         assert pcss(":scope body") == ["nil"]
 893 |         assert pcss(":scope body > div") == ["outer-div", "foobar-div"]
 894 |         assert pcss(":scope head") == ["nil"]
 895 |         assert pcss(":scope html") == []
 896 | 
 897 |         # --- nth-* and nth-last-* -------------------------------------
 898 | 
 899 |         # select nothing
 900 |         assert pcss("li:nth-child(-n)") == []
 901 |         # select all children
 902 |         assert pcss("li:nth-child(n)") == [
 903 |             "first-li",
 904 |             "second-li",
 905 |             "third-li",
 906 |             "fourth-li",
 907 |             "fifth-li",
 908 |             "sixth-li",
 909 |             "seventh-li",
 910 |         ]
 911 | 
 912 |         assert pcss("li:nth-child(3)", "#first-li ~ :nth-child(3)") == ["third-li"]
 913 |         assert pcss("li:nth-child(10)") == []
 914 |         assert pcss("li:nth-child(2n)", "li:nth-child(even)", "li:nth-child(2n+0)") == [
 915 |             "second-li",
 916 |             "fourth-li",
 917 |             "sixth-li",
 918 |         ]
 919 |         assert pcss("li:nth-child(+2n+1)", "li:nth-child(odd)") == [
 920 |             "first-li",
 921 |             "third-li",
 922 |             "fifth-li",
 923 |             "seventh-li",
 924 |         ]
 925 |         assert pcss("li:nth-child(2n+4)") == ["fourth-li", "sixth-li"]
 926 |         assert pcss("li:nth-child(3n+1)") == ["first-li", "fourth-li", "seventh-li"]
 927 |         assert pcss("li:nth-child(-n+3)") == ["first-li", "second-li", "third-li"]
 928 |         assert pcss("li:nth-child(-2n+4)") == ["second-li", "fourth-li"]
 929 |         assert pcss("li:nth-last-child(0)") == []
 930 |         assert pcss("li:nth-last-child(1)") == ["seventh-li"]
 931 |         assert pcss("li:nth-last-child(2n)", "li:nth-last-child(even)") == [
 932 |             "second-li",
 933 |             "fourth-li",
 934 |             "sixth-li",
 935 |         ]
 936 |         assert pcss("li:nth-last-child(2n+1)") == [
 937 |             "first-li",
 938 |             "third-li",
 939 |             "fifth-li",
 940 |             "seventh-li",
 941 |         ]
 942 |         assert pcss("li:nth-last-child(2n+2)") == ["second-li", "fourth-li", "sixth-li"]
 943 |         assert pcss("li:nth-last-child(3n+1)") == [
 944 |             "first-li",
 945 |             "fourth-li",
 946 |             "seventh-li",
 947 |         ]
 948 |         assert pcss("ol:first-of-type") == ["first-ol"]
 949 |         assert pcss("ol:nth-child(1)") == []
 950 |         assert pcss("ol:nth-of-type(2)") == ["second-ol"]
 951 |         assert pcss("ol:nth-last-of-type(1)") == ["second-ol"]
 952 | 
 953 |         # "+" and "~" tests
 954 |         assert pcss("ol#first-ol li + li:nth-child(4)") == ["fourth-li"]
 955 |         assert pcss("li + li:nth-child(1)") == []
 956 |         assert pcss("li ~ li:nth-child(2n+1)") == [
 957 |             "third-li",
 958 |             "fifth-li",
 959 |             "seventh-li",
 960 |         ]  # all but the first
 961 |         assert pcss("li ~ li:nth-last-child(2n+1)") == [
 962 |             "third-li",
 963 |             "fifth-li",
 964 |             "seventh-li",
 965 |         ]  # all but the first
 966 | 
 967 |         assert pcss("span:only-child") == ["foobar-span"]
 968 |         assert pcss("li div:only-child") == ["li-div"]
 969 |         assert pcss("div *:only-child") == ["li-div", "foobar-span"]
 970 |         self.assertRaises(ExpressionError, pcss, "p *:only-of-type")
 971 |         assert pcss("p:only-of-type") == ["paragraph"]
 972 |         assert pcss("a:empty", "a:EMpty") == ["name-anchor"]
 973 |         assert pcss("li:empty") == ["third-li", "fourth-li", "fifth-li", "sixth-li"]
 974 |         assert pcss(":root", "html:root") == ["html"]
 975 |         assert pcss("li:root", "* :root") == []
 976 |         assert pcss('*:contains("link")', ':CONtains("link")') == [
 977 |             "html",
 978 |             "nil",
 979 |             "outer-div",
 980 |             "tag-anchor",
 981 |             "nofollow-anchor",
 982 |         ]
 983 |         assert pcss('*:contains("LInk")') == []  # case sensitive
 984 |         assert pcss('*:contains("e")') == [
 985 |             "html",
 986 |             "nil",
 987 |             "outer-div",
 988 |             "first-ol",
 989 |             "first-li",
 990 |             "paragraph",
 991 |             "p-em",
 992 |         ]
 993 |         assert pcss('*:contains("E")') == []  # case-sensitive
 994 |         assert pcss(".a", ".b", "*.a", "ol.a") == ["first-ol"]
 995 |         assert pcss(".c", "*.c") == ["first-ol", "third-li", "fourth-li"]
 996 |         assert pcss("ol *.c", "ol li.c", "li ~ li.c", "ol > li.c") == [
 997 |             "third-li",
 998 |             "fourth-li",
 999 |         ]
1000 |         assert pcss("#first-li", "li#first-li", "*#first-li") == ["first-li"]
1001 |         assert pcss("li div", "li > div", "div div") == ["li-div"]
1002 |         assert pcss("div > div") == []
1003 |         assert pcss("div>.c", "div > .c") == ["first-ol"]
1004 |         assert pcss("div + div") == ["foobar-div"]
1005 |         assert pcss("a ~ a") == ["tag-anchor", "nofollow-anchor"]
1006 |         assert pcss('a[rel="tag"] ~ a') == ["nofollow-anchor"]
1007 |         assert pcss("ol#first-ol li:last-child") == ["seventh-li"]
1008 |         assert pcss("ol#first-ol *:last-child") == ["li-div", "seventh-li"]
1009 |         assert pcss("#outer-div:first-child") == ["outer-div"]
1010 |         assert pcss("#outer-div :first-child") == [
1011 |             "name-anchor",
1012 |             "first-li",
1013 |             "li-div",
1014 |             "p-b",
1015 |             "checkbox-fieldset-disabled",
1016 |             "area-href",
1017 |         ]
1018 |         assert pcss("a[href]") == ["tag-anchor", "nofollow-anchor"]
1019 |         assert pcss(":not(*)") == []
1020 |         assert pcss("a:not([href])") == ["name-anchor"]
1021 |         assert pcss("ol :Not(li[class])") == [
1022 |             "first-li",
1023 |             "second-li",
1024 |             "li-div",
1025 |             "fifth-li",
1026 |             "sixth-li",
1027 |             "seventh-li",
1028 |         ]
1029 |         assert pcss("link:has(*)") == []
1030 |         assert pcss("ol:has(div)") == ["first-ol"]
1031 |         assert pcss(":is(#first-li, #second-li)") == ["first-li", "second-li"]
1032 |         assert pcss("a:is(#name-anchor, #tag-anchor)") == ["name-anchor", "tag-anchor"]
1033 |         assert pcss(":is(.c)") == ["first-ol", "third-li", "fourth-li"]
1034 |         assert pcss("ol.a.b.c > li.c:nth-child(3)") == ["third-li"]
1035 | 
1036 |         # Invalid characters in XPath element names, should not crash
1037 |         assert pcss(r"di\a0 v", r"div\[") == []
1038 |         assert pcss(r"[h\a0 ref]", r"[h\]ref]") == []
1039 | 
1040 |         # HTML-specific
1041 |         assert pcss(":link", html_only=True) == [
1042 |             "link-href",
1043 |             "tag-anchor",
1044 |             "nofollow-anchor",
1045 |             "area-href",
1046 |         ]
1047 |         assert pcss(":visited", html_only=True) == []
1048 |         assert pcss(":enabled", html_only=True) == [
1049 |             "link-href",
1050 |             "tag-anchor",
1051 |             "nofollow-anchor",
1052 |             "checkbox-unchecked",
1053 |             "text-checked",
1054 |             "checkbox-checked",
1055 |             "area-href",
1056 |         ]
1057 |         assert pcss(":disabled", html_only=True) == [
1058 |             "checkbox-disabled",
1059 |             "checkbox-disabled-checked",
1060 |             "fieldset",
1061 |             "checkbox-fieldset-disabled",
1062 |         ]
1063 |         assert pcss(":checked", html_only=True) == [
1064 |             "checkbox-checked",
1065 |             "checkbox-disabled-checked",
1066 |         ]
1067 | 
1068 |     def test_select_shakespeare(self) -> None:
1069 |         document = html.document_fromstring(HTML_SHAKESPEARE)
1070 |         body = typing.cast("list[etree._Element]", document.xpath("//body"))[0]
1071 |         css_to_xpath = GenericTranslator().css_to_xpath
1072 | 
1073 |         basestring_ = (str, bytes)
1074 | 
1075 |         def count(selector: str) -> int:
1076 |             xpath = css_to_xpath(selector)
1077 |             results = typing.cast("list[etree._Element]", body.xpath(xpath))
1078 |             assert not isinstance(results, basestring_)
1079 |             found = set()
1080 |             for item in results:
1081 |                 assert item not in found
1082 |                 found.add(item)
1083 |                 assert not isinstance(item, basestring_)
1084 |             return len(results)
1085 | 
1086 |         # Data borrowed from http://mootools.net/slickspeed/
1087 | 
1088 |         ## Changed from original; probably because I'm only
1089 |         ## searching the body.
1090 |         # assert count('*') == 252
1091 |         assert count("*") == 246
1092 |         assert count("div:contains(CELIA)") == 26
1093 |         assert count("div:only-child") == 22  # ?
1094 |         assert count("div:nth-child(even)") == 106
1095 |         assert count("div:nth-child(2n)") == 106
1096 |         assert count("div:nth-child(odd)") == 137
1097 |         assert count("div:nth-child(2n+1)") == 137
1098 |         assert count("div:nth-child(n)") == 243
1099 |         assert count("div:last-child") == 53
1100 |         assert count("div:first-child") == 51
1101 |         assert count("div > div") == 242
1102 |         assert count("div + div") == 190
1103 |         assert count("div ~ div") == 190
1104 |         assert count("body") == 1
1105 |         assert count("body div") == 243
1106 |         assert count("div") == 243
1107 |         assert count("div div") == 242
1108 |         assert count("div div div") == 241
1109 |         assert count("div, div, div") == 243
1110 |         assert count("div, a, span") == 243
1111 |         assert count(".dialog") == 51
1112 |         assert count("div.dialog") == 51
1113 |         assert count("div .dialog") == 51
1114 |         assert count("div.character, div.dialog") == 99
1115 |         assert count("div.direction.dialog") == 0
1116 |         assert count("div.dialog.direction") == 0
1117 |         assert count("div.dialog.scene") == 1
1118 |         assert count("div.scene.scene") == 1
1119 |         assert count("div.scene .scene") == 0
1120 |         assert count("div.direction .dialog ") == 0
1121 |         assert count("div .dialog .direction") == 4
1122 |         assert count("div.dialog .dialog .direction") == 4
1123 |         assert count("#speech5") == 1
1124 |         assert count("div#speech5") == 1
1125 |         assert count("div #speech5") == 1
1126 |         assert count("div.scene div.dialog") == 49
1127 |         assert count("div#scene1 div.dialog div") == 142
1128 |         assert count("#scene1 #speech1") == 1
1129 |         assert count("div[class]") == 103
1130 |         assert count("div[class=dialog]") == 50
1131 |         assert count("div[class^=dia]") == 51
1132 |         assert count("div[class$=log]") == 50
1133 |         assert count("div[class*=sce]") == 1
1134 |         assert count("div[class|=dialog]") == 50  # ? Seems right
1135 |         assert count("div[class!=madeup]") == 243  # ? Seems right
1136 |         assert count("div[class~=dialog]") == 51  # ? Seems right
1137 |         assert count(":scope > div") == 1
1138 |         assert count(":scope > div > div[class=dialog]") == 1
1139 |         assert count(":scope > div div") == 242
1140 | 
1141 | 
1142 | OPERATOR_PRECEDENCE_IDS = """
1143 | <html>
1144 |   <a id="first"></a>
1145 |   <a id="second" href="#"></a>
1146 |   <a id="third" href="#"></a>
1147 | </html>
1148 | """
1149 | 
1150 | XMLLANG_IDS = """
1151 | <test>
1152 |   <a id="first" xml:lang="en">a</a>
1153 |   <b id="second" xml:lang="en-US">b</b>
1154 |   <c id="third" xml:lang="en-Nz">c</c>
1155 |   <d id="fourth" xml:lang="En-us">d</d>
1156 |   <e id="fifth" xml:lang="fr">e</e>
1157 |   <f id="sixth" xml:lang="ru">f</f>
1158 |   <g id="seventh" xml:lang="de">
1159 |     <h id="eighth" xml:lang="zh"/>
1160 |   </g>
1161 | </test>
1162 | """
1163 | 
1164 | HTML_IDS = """
1165 | <html id="html"><head>
1166 |   <link id="link-href" href="foo" />
1167 |   <link id="link-nohref" />
1168 | </head><body>
1169 | <div id="outer-div">
1170 |  <a id="name-anchor" name="foo"></a>
1171 |  <a id="tag-anchor" rel="tag" href="http://localhost/foo">link</a>
1172 |  <a id="nofollow-anchor" rel="nofollow" href="https://example.org">
1173 |     link</a>
1174 |  <ol id="first-ol" class="a b c">
1175 |    <li id="first-li">content</li>
1176 |    <li id="second-li" lang="En-us">
1177 |      <div id="li-div">
1178 |      </div>
1179 |    </li>
1180 |    <li id="third-li" class="ab c"></li>
1181 |    <li id="fourth-li" class="ab
1182 | c"></li>
1183 |    <li id="fifth-li"></li>
1184 |    <li id="sixth-li"></li>
1185 |    <li id="seventh-li">  </li>
1186 |  </ol>
1187 |  <p id="paragraph">
1188 |    <b id="p-b">hi</b> <em id="p-em">there</em>
1189 |    <b id="p-b2">guy</b>
1190 |    <input type="checkbox" id="checkbox-unchecked" />
1191 |    <input type="checkbox" id="checkbox-disabled" disabled="" />
1192 |    <input type="text" id="text-checked" checked="checked" />
1193 |    <input type="hidden" />
1194 |    <input type="hidden" disabled="disabled" />
1195 |    <input type="checkbox" id="checkbox-checked" checked="checked" />
1196 |    <input type="checkbox" id="checkbox-disabled-checked"
1197 |           disabled="disabled" checked="checked" />
1198 |    <fieldset id="fieldset" disabled="disabled">
1199 |      <input type="checkbox" id="checkbox-fieldset-disabled" />
1200 |      <input type="hidden" />
1201 |    </fieldset>
1202 |  </p>
1203 |  <ol id="second-ol">
1204 |  </ol>
1205 |  <map name="dummymap">
1206 |    <area shape="circle" coords="200,250,25" href="foo.html" id="area-href" />
1207 |    <area shape="default" id="area-nohref" />
1208 |  </map>
1209 | </div>
1210 | <div id="foobar-div" foobar="ab bc
1211 | cde"><span id="foobar-span"></span></div>
1212 | </body></html>
1213 | """
1214 | 
1215 | 
1216 | HTML_SHAKESPEARE = """
1217 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
1218 | 	"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
1219 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" debug="true">
1220 | <head>
1221 | 	<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
1222 | </head>
1223 | <body>
1224 | 	<div id="test">
1225 | 	<div class="dialog">
1226 | 	<h2>As You Like It</h2>
1227 | 	<div id="playwright">
1228 | 	  by William Shakespeare
1229 | 	</div>
1230 | 	<div class="dialog scene thirdClass" id="scene1">
1231 | 	  <h3>ACT I, SCENE III. A room in the palace.</h3>
1232 | 	  <div class="dialog">
1233 | 	  <div class="direction">Enter CELIA and ROSALIND</div>
1234 | 	  </div>
1235 | 	  <div id="speech1" class="character">CELIA</div>
1236 | 	  <div class="dialog">
1237 | 	  <div id="scene1.3.1">Why, cousin! why, Rosalind! Cupid have mercy! not a word?</div>
1238 | 	  </div>
1239 | 	  <div id="speech2" class="character">ROSALIND</div>
1240 | 	  <div class="dialog">
1241 | 	  <div id="scene1.3.2">Not one to throw at a dog.</div>
1242 | 	  </div>
1243 | 	  <div id="speech3" class="character">CELIA</div>
1244 | 	  <div class="dialog">
1245 | 	  <div id="scene1.3.3">No, thy words are too precious to be cast away upon</div>
1246 | 	  <div id="scene1.3.4">curs; throw some of them at me; come, lame me with reasons.</div>
1247 | 	  </div>
1248 | 	  <div id="speech4" class="character">ROSALIND</div>
1249 | 	  <div id="speech5" class="character">CELIA</div>
1250 | 	  <div class="dialog">
1251 | 	  <div id="scene1.3.8">But is all this for your father?</div>
1252 | 	  </div>
1253 | 	  <div class="dialog">
1254 | 	  <div id="scene1.3.5">Then there were two cousins laid up; when the one</div>
1255 | 	  <div id="scene1.3.6">should be lamed with reasons and the other mad</div>
1256 | 	  <div id="scene1.3.7">without any.</div>
1257 | 	  </div>
1258 | 	  <div id="speech6" class="character">ROSALIND</div>
1259 | 	  <div class="dialog">
1260 | 	  <div id="scene1.3.9">No, some of it is for my child's father. O, how</div>
1261 | 	  <div id="scene1.3.10">full of briers is this working-day world!</div>
1262 | 	  </div>
1263 | 	  <div id="speech7" class="character">CELIA</div>
1264 | 	  <div class="dialog">
1265 | 	  <div id="scene1.3.11">They are but burs, cousin, thrown upon thee in</div>
1266 | 	  <div id="scene1.3.12">holiday foolery: if we walk not in the trodden</div>
1267 | 	  <div id="scene1.3.13">paths our very petticoats will catch them.</div>
1268 | 	  </div>
1269 | 	  <div id="speech8" class="character">ROSALIND</div>
1270 | 	  <div class="dialog">
1271 | 	  <div id="scene1.3.14">I could shake them off my coat: these burs are in my heart.</div>
1272 | 	  </div>
1273 | 	  <div id="speech9" class="character">CELIA</div>
1274 | 	  <div class="dialog">
1275 | 	  <div id="scene1.3.15">Hem them away.</div>
1276 | 	  </div>
1277 | 	  <div id="speech10" class="character">ROSALIND</div>
1278 | 	  <div class="dialog">
1279 | 	  <div id="scene1.3.16">I would try, if I could cry 'hem' and have him.</div>
1280 | 	  </div>
1281 | 	  <div id="speech11" class="character">CELIA</div>
1282 | 	  <div class="dialog">
1283 | 	  <div id="scene1.3.17">Come, come, wrestle with thy affections.</div>
1284 | 	  </div>
1285 | 	  <div id="speech12" class="character">ROSALIND</div>
1286 | 	  <div class="dialog">
1287 | 	  <div id="scene1.3.18">O, they take the part of a better wrestler than myself!</div>
1288 | 	  </div>
1289 | 	  <div id="speech13" class="character">CELIA</div>
1290 | 	  <div class="dialog">
1291 | 	  <div id="scene1.3.19">O, a good wish upon you! you will try in time, in</div>
1292 | 	  <div id="scene1.3.20">despite of a fall. But, turning these jests out of</div>
1293 | 	  <div id="scene1.3.21">service, let us talk in good earnest: is it</div>
1294 | 	  <div id="scene1.3.22">possible, on such a sudden, you should fall into so</div>
1295 | 	  <div id="scene1.3.23">strong a liking with old Sir Rowland's youngest son?</div>
1296 | 	  </div>
1297 | 	  <div id="speech14" class="character">ROSALIND</div>
1298 | 	  <div class="dialog">
1299 | 	  <div id="scene1.3.24">The duke my father loved his father dearly.</div>
1300 | 	  </div>
1301 | 	  <div id="speech15" class="character">CELIA</div>
1302 | 	  <div class="dialog">
1303 | 	  <div id="scene1.3.25">Doth it therefore ensue that you should love his son</div>
1304 | 	  <div id="scene1.3.26">dearly? By this kind of chase, I should hate him,</div>
1305 | 	  <div id="scene1.3.27">for my father hated his father dearly; yet I hate</div>
1306 | 	  <div id="scene1.3.28">not Orlando.</div>
1307 | 	  </div>
1308 | 	  <div id="speech16" class="character">ROSALIND</div>
1309 | 	  <div title="wtf" class="dialog">
1310 | 	  <div id="scene1.3.29">No, faith, hate him not, for my sake.</div>
1311 | 	  </div>
1312 | 	  <div id="speech17" class="character">CELIA</div>
1313 | 	  <div class="dialog">
1314 | 	  <div id="scene1.3.30">Why should I not? doth he not deserve well?</div>
1315 | 	  </div>
1316 | 	  <div id="speech18" class="character">ROSALIND</div>
1317 | 	  <div class="dialog">
1318 | 	  <div id="scene1.3.31">Let me love him for that, and do you love him</div>
1319 | 	  <div id="scene1.3.32">because I do. Look, here comes the duke.</div>
1320 | 	  </div>
1321 | 	  <div id="speech19" class="character">CELIA</div>
1322 | 	  <div class="dialog">
1323 | 	  <div id="scene1.3.33">With his eyes full of anger.</div>
1324 | 	  <div class="direction">Enter DUKE FREDERICK, with Lords</div>
1325 | 	  </div>
1326 | 	  <div id="speech20" class="character">DUKE FREDERICK</div>
1327 | 	  <div class="dialog">
1328 | 	  <div id="scene1.3.34">Mistress, dispatch you with your safest haste</div>
1329 | 	  <div id="scene1.3.35">And get you from our court.</div>
1330 | 	  </div>
1331 | 	  <div id="speech21" class="character">ROSALIND</div>
1332 | 	  <div class="dialog">
1333 | 	  <div id="scene1.3.36">Me, uncle?</div>
1334 | 	  </div>
1335 | 	  <div id="speech22" class="character">DUKE FREDERICK</div>
1336 | 	  <div class="dialog">
1337 | 	  <div id="scene1.3.37">You, cousin</div>
1338 | 	  <div id="scene1.3.38">Within these ten days if that thou be'st found</div>
1339 | 	  <div id="scene1.3.39">So near our public court as twenty miles,</div>
1340 | 	  <div id="scene1.3.40">Thou diest for it.</div>
1341 | 	  </div>
1342 | 	  <div id="speech23" class="character">ROSALIND</div>
1343 | 	  <div class="dialog">
1344 | 	  <div id="scene1.3.41">                  I do beseech your grace,</div>
1345 | 	  <div id="scene1.3.42">Let me the knowledge of my fault bear with me:</div>
1346 | 	  <div id="scene1.3.43">If with myself I hold intelligence</div>
1347 | 	  <div id="scene1.3.44">Or have acquaintance with mine own desires,</div>
1348 | 	  <div id="scene1.3.45">If that I do not dream or be not frantic,--</div>
1349 | 	  <div id="scene1.3.46">As I do trust I am not--then, dear uncle,</div>
1350 | 	  <div id="scene1.3.47">Never so much as in a thought unborn</div>
1351 | 	  <div id="scene1.3.48">Did I offend your highness.</div>
1352 | 	  </div>
1353 | 	  <div id="speech24" class="character">DUKE FREDERICK</div>
1354 | 	  <div class="dialog">
1355 | 	  <div id="scene1.3.49">Thus do all traitors:</div>
1356 | 	  <div id="scene1.3.50">If their purgation did consist in words,</div>
1357 | 	  <div id="scene1.3.51">They are as innocent as grace itself:</div>
1358 | 	  <div id="scene1.3.52">Let it suffice thee that I trust thee not.</div>
1359 | 	  </div>
1360 | 	  <div id="speech25" class="character">ROSALIND</div>
1361 | 	  <div class="dialog">
1362 | 	  <div id="scene1.3.53">Yet your mistrust cannot make me a traitor:</div>
1363 | 	  <div id="scene1.3.54">Tell me whereon the likelihood depends.</div>
1364 | 	  </div>
1365 | 	  <div id="speech26" class="character">DUKE FREDERICK</div>
1366 | 	  <div class="dialog">
1367 | 	  <div id="scene1.3.55">Thou art thy father's daughter; there's enough.</div>
1368 | 	  </div>
1369 | 	  <div id="speech27" class="character">ROSALIND</div>
1370 | 	  <div class="dialog">
1371 | 	  <div id="scene1.3.56">So was I when your highness took his dukedom;</div>
1372 | 	  <div id="scene1.3.57">So was I when your highness banish'd him:</div>
1373 | 	  <div id="scene1.3.58">Treason is not inherited, my lord;</div>
1374 | 	  <div id="scene1.3.59">Or, if we did derive it from our friends,</div>
1375 | 	  <div id="scene1.3.60">What's that to me? my father was no traitor:</div>
1376 | 	  <div id="scene1.3.61">Then, good my liege, mistake me not so much</div>
1377 | 	  <div id="scene1.3.62">To think my poverty is treacherous.</div>
1378 | 	  </div>
1379 | 	  <div id="speech28" class="character">CELIA</div>
1380 | 	  <div class="dialog">
1381 | 	  <div id="scene1.3.63">Dear sovereign, hear me speak.</div>
1382 | 	  </div>
1383 | 	  <div id="speech29" class="character">DUKE FREDERICK</div>
1384 | 	  <div class="dialog">
1385 | 	  <div id="scene1.3.64">Ay, Celia; we stay'd her for your sake,</div>
1386 | 	  <div id="scene1.3.65">Else had she with her father ranged along.</div>
1387 | 	  </div>
1388 | 	  <div id="speech30" class="character">CELIA</div>
1389 | 	  <div class="dialog">
1390 | 	  <div id="scene1.3.66">I did not then entreat to have her stay;</div>
1391 | 	  <div id="scene1.3.67">It was your pleasure and your own remorse:</div>
1392 | 	  <div id="scene1.3.68">I was too young that time to value her;</div>
1393 | 	  <div id="scene1.3.69">But now I know her: if she be a traitor,</div>
1394 | 	  <div id="scene1.3.70">Why so am I; we still have slept together,</div>
1395 | 	  <div id="scene1.3.71">Rose at an instant, learn'd, play'd, eat together,</div>
1396 | 	  <div id="scene1.3.72">And wheresoever we went, like Juno's swans,</div>
1397 | 	  <div id="scene1.3.73">Still we went coupled and inseparable.</div>
1398 | 	  </div>
1399 | 	  <div id="speech31" class="character">DUKE FREDERICK</div>
1400 | 	  <div class="dialog">
1401 | 	  <div id="scene1.3.74">She is too subtle for thee; and her smoothness,</div>
1402 | 	  <div id="scene1.3.75">Her very silence and her patience</div>
1403 | 	  <div id="scene1.3.76">Speak to the people, and they pity her.</div>
1404 | 	  <div id="scene1.3.77">Thou art a fool: she robs thee of thy name;</div>
1405 | 	  <div id="scene1.3.78">And thou wilt show more bright and seem more virtuous</div>
1406 | 	  <div id="scene1.3.79">When she is gone. Then open not thy lips:</div>
1407 | 	  <div id="scene1.3.80">Firm and irrevocable is my doom</div>
1408 | 	  <div id="scene1.3.81">Which I have pass'd upon her; she is banish'd.</div>
1409 | 	  </div>
1410 | 	  <div id="speech32" class="character">CELIA</div>
1411 | 	  <div class="dialog">
1412 | 	  <div id="scene1.3.82">Pronounce that sentence then on me, my liege:</div>
1413 | 	  <div id="scene1.3.83">I cannot live out of her company.</div>
1414 | 	  </div>
1415 | 	  <div id="speech33" class="character">DUKE FREDERICK</div>
1416 | 	  <div class="dialog">
1417 | 	  <div id="scene1.3.84">You are a fool. You, niece, provide yourself:</div>
1418 | 	  <div id="scene1.3.85">If you outstay the time, upon mine honour,</div>
1419 | 	  <div id="scene1.3.86">And in the greatness of my word, you die.</div>
1420 | 	  <div class="direction">Exeunt DUKE FREDERICK and Lords</div>
1421 | 	  </div>
1422 | 	  <div id="speech34" class="character">CELIA</div>
1423 | 	  <div class="dialog">
1424 | 	  <div id="scene1.3.87">O my poor Rosalind, whither wilt thou go?</div>
1425 | 	  <div id="scene1.3.88">Wilt thou change fathers? I will give thee mine.</div>
1426 | 	  <div id="scene1.3.89">I charge thee, be not thou more grieved than I am.</div>
1427 | 	  </div>
1428 | 	  <div id="speech35" class="character">ROSALIND</div>
1429 | 	  <div class="dialog">
1430 | 	  <div id="scene1.3.90">I have more cause.</div>
1431 | 	  </div>
1432 | 	  <div id="speech36" class="character">CELIA</div>
1433 | 	  <div class="dialog">
1434 | 	  <div id="scene1.3.91">                  Thou hast not, cousin;</div>
1435 | 	  <div id="scene1.3.92">Prithee be cheerful: know'st thou not, the duke</div>
1436 | 	  <div id="scene1.3.93">Hath banish'd me, his daughter?</div>
1437 | 	  </div>
1438 | 	  <div id="speech37" class="character">ROSALIND</div>
1439 | 	  <div class="dialog">
1440 | 	  <div id="scene1.3.94">That he hath not.</div>
1441 | 	  </div>
1442 | 	  <div id="speech38" class="character">CELIA</div>
1443 | 	  <div class="dialog">
1444 | 	  <div id="scene1.3.95">No, hath not? Rosalind lacks then the love</div>
1445 | 	  <div id="scene1.3.96">Which teacheth thee that thou and I am one:</div>
1446 | 	  <div id="scene1.3.97">Shall we be sunder'd? shall we part, sweet girl?</div>
1447 | 	  <div id="scene1.3.98">No: let my father seek another heir.</div>
1448 | 	  <div id="scene1.3.99">Therefore devise with me how we may fly,</div>
1449 | 	  <div id="scene1.3.100">Whither to go and what to bear with us;</div>
1450 | 	  <div id="scene1.3.101">And do not seek to take your change upon you,</div>
1451 | 	  <div id="scene1.3.102">To bear your griefs yourself and leave me out;</div>
1452 | 	  <div id="scene1.3.103">For, by this heaven, now at our sorrows pale,</div>
1453 | 	  <div id="scene1.3.104">Say what thou canst, I'll go along with thee.</div>
1454 | 	  </div>
1455 | 	  <div id="speech39" class="character">ROSALIND</div>
1456 | 	  <div class="dialog">
1457 | 	  <div id="scene1.3.105">Why, whither shall we go?</div>
1458 | 	  </div>
1459 | 	  <div id="speech40" class="character">CELIA</div>
1460 | 	  <div class="dialog">
1461 | 	  <div id="scene1.3.106">To seek my uncle in the forest of Arden.</div>
1462 | 	  </div>
1463 | 	  <div id="speech41" class="character">ROSALIND</div>
1464 | 	  <div class="dialog">
1465 | 	  <div id="scene1.3.107">Alas, what danger will it be to us,</div>
1466 | 	  <div id="scene1.3.108">Maids as we are, to travel forth so far!</div>
1467 | 	  <div id="scene1.3.109">Beauty provoketh thieves sooner than gold.</div>
1468 | 	  </div>
1469 | 	  <div id="speech42" class="character">CELIA</div>
1470 | 	  <div class="dialog">
1471 | 	  <div id="scene1.3.110">I'll put myself in poor and mean attire</div>
1472 | 	  <div id="scene1.3.111">And with a kind of umber smirch my face;</div>
1473 | 	  <div id="scene1.3.112">The like do you: so shall we pass along</div>
1474 | 	  <div id="scene1.3.113">And never stir assailants.</div>
1475 | 	  </div>
1476 | 	  <div id="speech43" class="character">ROSALIND</div>
1477 | 	  <div class="dialog">
1478 | 	  <div id="scene1.3.114">Were it not better,</div>
1479 | 	  <div id="scene1.3.115">Because that I am more than common tall,</div>
1480 | 	  <div id="scene1.3.116">That I did suit me all points like a man?</div>
1481 | 	  <div id="scene1.3.117">A gallant curtle-axe upon my thigh,</div>
1482 | 	  <div id="scene1.3.118">A boar-spear in my hand; and--in my heart</div>
1483 | 	  <div id="scene1.3.119">Lie there what hidden woman's fear there will--</div>
1484 | 	  <div id="scene1.3.120">We'll have a swashing and a martial outside,</div>
1485 | 	  <div id="scene1.3.121">As many other mannish cowards have</div>
1486 | 	  <div id="scene1.3.122">That do outface it with their semblances.</div>
1487 | 	  </div>
1488 | 	  <div id="speech44" class="character">CELIA</div>
1489 | 	  <div class="dialog">
1490 | 	  <div id="scene1.3.123">What shall I call thee when thou art a man?</div>
1491 | 	  </div>
1492 | 	  <div id="speech45" class="character">ROSALIND</div>
1493 | 	  <div class="dialog">
1494 | 	  <div id="scene1.3.124">I'll have no worse a name than Jove's own page;</div>
1495 | 	  <div id="scene1.3.125">And therefore look you call me Ganymede.</div>
1496 | 	  <div id="scene1.3.126">But what will you be call'd?</div>
1497 | 	  </div>
1498 | 	  <div id="speech46" class="character">CELIA</div>
1499 | 	  <div class="dialog">
1500 | 	  <div id="scene1.3.127">Something that hath a reference to my state</div>
1501 | 	  <div id="scene1.3.128">No longer Celia, but Aliena.</div>
1502 | 	  </div>
1503 | 	  <div id="speech47" class="character">ROSALIND</div>
1504 | 	  <div class="dialog">
1505 | 	  <div id="scene1.3.129">But, cousin, what if we assay'd to steal</div>
1506 | 	  <div id="scene1.3.130">The clownish fool out of your father's court?</div>
1507 | 	  <div id="scene1.3.131">Would he not be a comfort to our travel?</div>
1508 | 	  </div>
1509 | 	  <div id="speech48" class="character">CELIA</div>
1510 | 	  <div class="dialog">
1511 | 	  <div id="scene1.3.132">He'll go along o'er the wide world with me;</div>
1512 | 	  <div id="scene1.3.133">Leave me alone to woo him. Let's away,</div>
1513 | 	  <div id="scene1.3.134">And get our jewels and our wealth together,</div>
1514 | 	  <div id="scene1.3.135">Devise the fittest time and safest way</div>
1515 | 	  <div id="scene1.3.136">To hide us from pursuit that will be made</div>
1516 | 	  <div id="scene1.3.137">After my flight. Now go we in content</div>
1517 | 	  <div id="scene1.3.138">To liberty and not to banishment.</div>
1518 | 	  <div class="direction">Exeunt</div>
1519 | 	  </div>
1520 | 	</div>
1521 | 	</div>
1522 | </div>
1523 | </body>
1524 | </html>
1525 | """
1526 | 
1527 | 
1528 | if __name__ == "__main__":
1529 |     unittest.main()
1530 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = pre-commit,pylint,py,docs,typing
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     lxml>=4.4
 7 |     pytest-cov>=2.8
 8 |     pytest>=5.4
 9 |     setuptools
10 |     sybil
11 | commands =
12 |     pytest --cov=cssselect \
13 |         --cov-report=term-missing --cov-report=html --cov-report=xml \
14 |         --verbose {posargs: cssselect tests docs}
15 | 
16 | [testenv:pylint]
17 | deps =
18 |     {[testenv]deps}
19 |     pylint==3.3.5
20 | commands =
21 |     pylint {posargs: cssselect tests docs}
22 | 
23 | [testenv:docs]
24 | changedir = docs
25 | deps =
26 |     -r docs/requirements.txt
27 | commands =
28 |     sphinx-build -W -b html . {envtmpdir}/html
29 | 
30 | [testenv:typing]
31 | deps =
32 |     {[testenv]deps}
33 |     mypy==1.15.0
34 |     types-lxml==2025.3.4
35 | commands =
36 |     mypy --strict {posargs: cssselect tests}
37 | 
38 | [testenv:pre-commit]
39 | deps = pre-commit
40 | commands = pre-commit run --all-files --show-diff-on-failure
41 | skip_install = true
42 | 
43 | [testenv:twinecheck]
44 | basepython = python3
45 | deps =
46 |     twine==6.1.0
47 |     build==1.2.2.post1
48 | commands =
49 |     python -m build --sdist
50 |     twine check dist/*
51 | 


--------------------------------------------------------------------------------