├── .github ├── FUNDING.yml └── workflows │ ├── doconfly.yml │ ├── release.yml │ └── tests.yml ├── .gitignore ├── LICENSE ├── README.rst ├── cssselect2 ├── __init__.py ├── compiler.py ├── parser.py └── tree.py ├── docs ├── api_reference.rst ├── changelog.rst ├── conf.py ├── contribute.rst ├── example.py ├── first_steps.rst ├── index.rst └── support.rst ├── pyproject.toml └── tests ├── LICENSE ├── __init__.py ├── content.xhtml ├── ids.html ├── make_selectors.py ├── shakespeare.html ├── test_cssselect2.py └── w3_selectors.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | open_collective: courtbouillon 4 | -------------------------------------------------------------------------------- /.github/workflows/doconfly.yml: -------------------------------------------------------------------------------- 1 | name: doconfly 2 | on: 3 | push: 4 | branches: 5 | - main 6 | tags: 7 | - "*" 8 | 9 | jobs: 10 | doconfly: 11 | name: doconfly job 12 | runs-on: ubuntu-latest 13 | env: 14 | PORT: ${{ secrets.PORT }} 15 | SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} 16 | TAKOYAKI: ${{ secrets.TAKOYAKI }} 17 | USER: ${{ secrets.USER }} 18 | DOCUMENTATION_PATH: ${{ secrets.DOCUMENTATION_PATH }} 19 | DOCUMENTATION_URL: ${{ secrets.DOCUMENTATION_URL }} 20 | steps: 21 | - run: | 22 | which ssh-agent || ( apt-get update -y && apt-get install openssh-client -y ) 23 | eval $(ssh-agent -s) 24 | echo "$SSH_PRIVATE_KEY" | tr -d '\r' | ssh-add - 25 | mkdir -p ~/.ssh 26 | chmod 700 ~/.ssh 27 | ssh-keyscan -p $PORT $TAKOYAKI >> ~/.ssh/known_hosts 28 | chmod 644 ~/.ssh/known_hosts 29 | ssh $USER@$TAKOYAKI -p $PORT "doconfly/doconfly.sh $GITHUB_REPOSITORY $GITHUB_REF $DOCUMENTATION_PATH $DOCUMENTATION_URL" 30 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release new version 2 | on: 3 | push: 4 | tags: 5 | - '*' 6 | 7 | jobs: 8 | pypi-publish: 9 | name: Upload release to PyPI 10 | runs-on: ubuntu-latest 11 | environment: 12 | name: pypi 13 | url: https://pypi.org/p/cssselect2 14 | permissions: 15 | id-token: write 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: actions/setup-python@v5 19 | - name: Install requirements 20 | run: python -m pip install flit 21 | - name: Build packages 22 | run: flit build 23 | - name: Publish package distributions to PyPI 24 | uses: pypa/gh-action-pypi-publish@release/v1 25 | add-version: 26 | name: Add version to GitHub 27 | runs-on: ubuntu-latest 28 | permissions: 29 | contents: write 30 | steps: 31 | - name: Checkout code 32 | uses: actions/checkout@v4 33 | - name: Install requirements 34 | run: sudo apt-get install pandoc 35 | - name: Generate content 36 | run: | 37 | pandoc docs/changelog.rst -f rst -t gfm | csplit - /##/ "{1}" -f .part 38 | sed -r "s/^([A-Z].*)\:\$/## \1/" .part01 | sed -r "s/^ *//" | sed -rz "s/([^\n])\n([^\n^-])/\1 \2/g" | tail -n +5 > .body 39 | - name: Create Release 40 | uses: softprops/action-gh-release@v2 41 | with: 42 | body_path: .body 43 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: cssselect2's tests 2 | on: [push, pull_request] 3 | 4 | jobs: 5 | tests: 6 | name: ${{ matrix.os }} - ${{ matrix.python-version }} 7 | runs-on: ${{ matrix.os }} 8 | strategy: 9 | matrix: 10 | os: [ubuntu-latest, macos-latest, windows-latest] 11 | python-version: ['3.13'] 12 | include: 13 | - os: ubuntu-latest 14 | python-version: '3.9' 15 | - os: ubuntu-latest 16 | python-version: 'pypy-3.10' 17 | steps: 18 | - uses: actions/checkout@v4 19 | with: 20 | submodules: true 21 | - uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Upgrade pip and setuptools 25 | run: python -m pip install --upgrade pip setuptools 26 | - name: Install tests' requirements 27 | run: python -m pip install .[test] 28 | - name: Launch tests 29 | run: python -m pytest 30 | - name: Check coding style 31 | run: python -m ruff check 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .cache 3 | /.coverage 4 | /build 5 | /dist 6 | /docs/_build 7 | /pytest_cache 8 | /venv 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2012-2018, Simon Sapin and contributors (see AUTHORS). 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | cssselect2 is a straightforward implementation of CSS4 Selectors for markup 2 | documents (HTML, XML, etc.) that can be read by ElementTree-like parsers 3 | (including cElementTree, lxml, html5lib, etc.) 4 | 5 | * Free software: BSD license 6 | * For Python 3.9+, tested on CPython and PyPy 7 | * Documentation: https://doc.courtbouillon.org/cssselect2 8 | * Changelog: https://github.com/Kozea/cssselect2/releases 9 | * Code, issues, tests: https://github.com/Kozea/cssselect2 10 | * Code of conduct: https://www.courtbouillon.org/code-of-conduct.html 11 | * Professional support: https://www.courtbouillon.org 12 | * Donation: https://opencollective.com/courtbouillon 13 | 14 | cssselect2 has been created and developed by Kozea (https://kozea.fr/). 15 | Professional support, maintenance and community management is provided by 16 | CourtBouillon (https://www.courtbouillon.org/). 17 | 18 | Copyrights are retained by their contributors, no copyright assignment is 19 | required to contribute to cssselect2. Unless explicitly stated otherwise, any 20 | contribution intentionally submitted for inclusion is licensed under the BSD 21 | 3-clause license, without any additional terms or conditions. For full 22 | authorship information, see the version control history. 23 | -------------------------------------------------------------------------------- /cssselect2/__init__.py: -------------------------------------------------------------------------------- 1 | """CSS4 selectors for Python. 2 | 3 | cssselect2 is a straightforward implementation of CSS4 Selectors for markup 4 | documents (HTML, XML, etc.) that can be read by ElementTree-like parsers 5 | (including cElementTree, lxml, html5lib, etc.) 6 | 7 | """ 8 | 9 | from webencodings import ascii_lower 10 | 11 | # Classes are imported here to expose them at the top level of the module 12 | from .compiler import compile_selector_list # noqa 13 | from .parser import SelectorError # noqa 14 | from .tree import ElementWrapper # noqa 15 | 16 | VERSION = __version__ = '0.8.0' 17 | 18 | 19 | class Matcher: 20 | """A CSS selectors storage that can match against HTML elements.""" 21 | def __init__(self): 22 | self.id_selectors = {} 23 | self.class_selectors = {} 24 | self.lower_local_name_selectors = {} 25 | self.namespace_selectors = {} 26 | self.lang_attr_selectors = [] 27 | self.other_selectors = [] 28 | self.order = 0 29 | 30 | def add_selector(self, selector, payload): 31 | """Add a selector and its payload to the matcher. 32 | 33 | :param selector: 34 | A :class:`compiler.CompiledSelector` object. 35 | :param payload: 36 | Some data associated to the selector, 37 | such as :class:`declarations ` 38 | parsed from the :attr:`tinycss2.ast.QualifiedRule.content` 39 | of a style rule. 40 | It can be any Python object, 41 | and will be returned as-is by :meth:`match`. 42 | 43 | """ 44 | self.order += 1 45 | 46 | if selector.never_matches: 47 | return 48 | 49 | entry = ( 50 | selector.test, selector.specificity, self.order, selector.pseudo_element, 51 | payload) 52 | if selector.id is not None: 53 | self.id_selectors.setdefault(selector.id, []).append(entry) 54 | elif selector.class_name is not None: 55 | self.class_selectors.setdefault(selector.class_name, []).append(entry) 56 | elif selector.local_name is not None: 57 | self.lower_local_name_selectors.setdefault( 58 | selector.lower_local_name, []).append(entry) 59 | elif selector.namespace is not None: 60 | self.namespace_selectors.setdefault(selector.namespace, []).append(entry) 61 | elif selector.requires_lang_attr: 62 | self.lang_attr_selectors.append(entry) 63 | else: 64 | self.other_selectors.append(entry) 65 | 66 | def match(self, element): 67 | """Match selectors against the given element. 68 | 69 | :param element: 70 | An :class:`ElementWrapper`. 71 | :returns: 72 | A list of the payload objects associated to selectors that match 73 | element, in order of lowest to highest 74 | :attr:`compiler.CompiledSelector` specificity and in order of 75 | addition with :meth:`add_selector` among selectors of equal 76 | specificity. 77 | 78 | """ 79 | relevant_selectors = [] 80 | 81 | if element.id is not None and element.id in self.id_selectors: 82 | self.add_relevant_selectors( 83 | element, self.id_selectors[element.id], relevant_selectors) 84 | 85 | for class_name in element.classes: 86 | if class_name in self.class_selectors: 87 | self.add_relevant_selectors( 88 | element, self.class_selectors[class_name], relevant_selectors) 89 | 90 | lower_name = ascii_lower(element.local_name) 91 | if lower_name in self.lower_local_name_selectors: 92 | self.add_relevant_selectors( 93 | element, self.lower_local_name_selectors[lower_name], 94 | relevant_selectors) 95 | if element.namespace_url in self.namespace_selectors: 96 | self.add_relevant_selectors( 97 | element, self.namespace_selectors[element.namespace_url], 98 | relevant_selectors) 99 | 100 | if 'lang' in element.etree_element.attrib: 101 | self.add_relevant_selectors( 102 | element, self.lang_attr_selectors, relevant_selectors) 103 | 104 | self.add_relevant_selectors(element, self.other_selectors, relevant_selectors) 105 | 106 | relevant_selectors.sort() 107 | return relevant_selectors 108 | 109 | @staticmethod 110 | def add_relevant_selectors(element, selectors, relevant_selectors): 111 | for test, specificity, order, pseudo, payload in selectors: 112 | if test(element): 113 | relevant_selectors.append((specificity, order, pseudo, payload)) 114 | -------------------------------------------------------------------------------- /cssselect2/compiler.py: -------------------------------------------------------------------------------- 1 | import re 2 | from urllib.parse import urlparse 3 | 4 | from tinycss2.nth import parse_nth 5 | from webencodings import ascii_lower 6 | 7 | from . import parser 8 | from .parser import SelectorError 9 | 10 | # http://dev.w3.org/csswg/selectors/#whitespace 11 | split_whitespace = re.compile('[^ \t\r\n\f]+').findall 12 | 13 | 14 | def compile_selector_list(input, namespaces=None): 15 | """Compile a (comma-separated) list of selectors. 16 | 17 | :param input: 18 | A string, or an iterable of tinycss2 component values such as 19 | the :attr:`tinycss2.ast.QualifiedRule.prelude` of a style rule. 20 | :param namespaces: 21 | A optional dictionary of all `namespace prefix declarations 22 | `_ in scope for this selector. 23 | Keys are namespace prefixes as strings, or ``None`` for the default 24 | namespace. 25 | Values are namespace URLs as strings. 26 | If omitted, assume that no prefix is declared. 27 | :returns: 28 | A list of opaque :class:`compiler.CompiledSelector` objects. 29 | 30 | """ 31 | return [CompiledSelector(selector) for selector in parser.parse(input, namespaces)] 32 | 33 | 34 | class CompiledSelector: 35 | """Abstract representation of a selector.""" 36 | def __init__(self, parsed_selector): 37 | source = _compile_node(parsed_selector.parsed_tree) 38 | self.never_matches = source == '0' 39 | eval_globals = { 40 | 'split_whitespace': split_whitespace, 41 | 'ascii_lower': ascii_lower, 42 | 'urlparse': urlparse, 43 | } 44 | self.test = eval('lambda el: ' + source, eval_globals, {}) 45 | self.specificity = parsed_selector.specificity 46 | self.pseudo_element = parsed_selector.pseudo_element 47 | self.id = None 48 | self.class_name = None 49 | self.local_name = None 50 | self.lower_local_name = None 51 | self.namespace = None 52 | self.requires_lang_attr = False 53 | 54 | node = parsed_selector.parsed_tree 55 | if isinstance(node, parser.CombinedSelector): 56 | node = node.right 57 | for simple_selector in node.simple_selectors: 58 | if isinstance(simple_selector, parser.IDSelector): 59 | self.id = simple_selector.ident 60 | elif isinstance(simple_selector, parser.ClassSelector): 61 | self.class_name = simple_selector.class_name 62 | elif isinstance(simple_selector, parser.LocalNameSelector): 63 | self.local_name = simple_selector.local_name 64 | self.lower_local_name = simple_selector.lower_local_name 65 | elif isinstance(simple_selector, parser.NamespaceSelector): 66 | self.namespace = simple_selector.namespace 67 | elif isinstance(simple_selector, parser.AttributeSelector): 68 | if simple_selector.name == 'lang': 69 | self.requires_lang_attr = True 70 | 71 | 72 | def _compile_node(selector): 73 | """Return a boolean expression, as a Python source string. 74 | 75 | When evaluated in a context where the `el` variable is an 76 | :class:`cssselect2.tree.Element` object, tells whether the element is a 77 | subject of `selector`. 78 | 79 | """ 80 | # To avoid precedence-related bugs, any sub-expression that is passed 81 | # around must be "atomic": add parentheses when the top-level would be 82 | # an operator. Bare literals and function calls are fine. 83 | 84 | # 1 and 0 are used for True and False to avoid global lookups. 85 | 86 | if isinstance(selector, parser.CombinedSelector): 87 | left_inside = _compile_node(selector.left) 88 | if left_inside == '0': 89 | return '0' # 0 and x == 0 90 | elif left_inside == '1': 91 | # 1 and x == x, but the element matching 1 still needs to exist. 92 | if selector.combinator in (' ', '>'): 93 | left = 'el.parent is not None' 94 | elif selector.combinator in ('~', '+'): 95 | left = 'el.previous is not None' 96 | else: 97 | raise SelectorError('Unknown combinator', selector.combinator) 98 | # Rebind the `el` name inside a generator-expressions (in a new scope) 99 | # so that 'left_inside' applies to different elements. 100 | elif selector.combinator == ' ': 101 | left = f'any(({left_inside}) for el in el.ancestors)' 102 | elif selector.combinator == '>': 103 | left = ( 104 | f'next(el is not None and ({left_inside}) ' 105 | 'for el in [el.parent])') 106 | elif selector.combinator == '+': 107 | left = ( 108 | f'next(el is not None and ({left_inside}) ' 109 | 'for el in [el.previous])') 110 | elif selector.combinator == '~': 111 | left = f'any(({left_inside}) for el in el.previous_siblings)' 112 | else: 113 | raise SelectorError('Unknown combinator', selector.combinator) 114 | 115 | right = _compile_node(selector.right) 116 | if right == '0': 117 | return '0' # 0 and x == 0 118 | elif right == '1': 119 | return left # 1 and x == x 120 | else: 121 | # Evaluate combinators right to left 122 | return f'({right}) and ({left})' 123 | 124 | elif isinstance(selector, parser.CompoundSelector): 125 | sub_expressions = [ 126 | expr for expr in [ 127 | _compile_node(selector) 128 | for selector in selector.simple_selectors] 129 | if expr != '1'] 130 | if len(sub_expressions) == 1: 131 | return sub_expressions[0] 132 | elif '0' in sub_expressions: 133 | return '0' 134 | elif sub_expressions: 135 | return ' and '.join(f'({el})' for el in sub_expressions) 136 | else: 137 | return '1' # all([]) == True 138 | 139 | elif isinstance(selector, parser.NegationSelector): 140 | sub_expressions = [ 141 | expr for expr in [ 142 | _compile_node(selector.parsed_tree) 143 | for selector in selector.selector_list] 144 | if expr != '1'] 145 | if not sub_expressions: 146 | return '0' 147 | return f'not ({" or ".join(f"({expr})" for expr in sub_expressions)})' 148 | 149 | elif isinstance(selector, parser.RelationalSelector): 150 | sub_expressions = [] 151 | for relative_selector in selector.selector_list: 152 | expression = _compile_node(relative_selector.selector.parsed_tree) 153 | if expression == '0': 154 | continue 155 | if relative_selector.combinator == ' ': 156 | elements = 'list(el.iter_subtree())[1:]' 157 | elif relative_selector.combinator == '>': 158 | elements = 'el.iter_children()' 159 | elif relative_selector.combinator == '+': 160 | elements = 'list(el.iter_next_siblings())[:1]' 161 | elif relative_selector.combinator == '~': 162 | elements = 'el.iter_next_siblings()' 163 | sub_expressions.append(f'(any({expression} for el in {elements}))') 164 | return ' or '.join(sub_expressions) 165 | 166 | elif isinstance(selector, ( 167 | parser.MatchesAnySelector, parser.SpecificityAdjustmentSelector)): 168 | sub_expressions = [ 169 | expr for expr in [ 170 | _compile_node(selector.parsed_tree) 171 | for selector in selector.selector_list] 172 | if expr != '0'] 173 | if not sub_expressions: 174 | return '0' 175 | return ' or '.join(f'({expr})' for expr in sub_expressions) 176 | 177 | elif isinstance(selector, parser.LocalNameSelector): 178 | if selector.lower_local_name == selector.local_name: 179 | return f'el.local_name == {selector.local_name!r}' 180 | else: 181 | return ( 182 | f'el.local_name == ({selector.lower_local_name!r} ' 183 | f'if el.in_html_document else {selector.local_name!r})') 184 | 185 | elif isinstance(selector, parser.NamespaceSelector): 186 | return f'el.namespace_url == {selector.namespace!r}' 187 | 188 | elif isinstance(selector, parser.ClassSelector): 189 | return f'{selector.class_name!r} in el.classes' 190 | 191 | elif isinstance(selector, parser.IDSelector): 192 | return f'el.id == {selector.ident!r}' 193 | 194 | elif isinstance(selector, parser.AttributeSelector): 195 | if selector.namespace is not None: 196 | if selector.namespace: 197 | if selector.name == selector.lower_name: 198 | key = repr(f'{{{selector.namespace}}}{selector.name}') 199 | else: 200 | lower = f'{{{selector.namespace}}}{selector.lower_name}' 201 | name = f'{{{selector.namespace}}}{selector.name}' 202 | key = f'({lower!r} if el.in_html_document else {name!r})' 203 | else: 204 | if selector.name == selector.lower_name: 205 | key = repr(selector.name) 206 | else: 207 | lower, name = selector.lower_name, selector.name 208 | key = f'({lower!r} if el.in_html_document else {name!r})' 209 | value = selector.value 210 | attribute_value = f'el.etree_element.get({key}, "")' 211 | if selector.case_sensitive is False: 212 | value = value.lower() 213 | attribute_value += '.lower()' 214 | if selector.operator is None: 215 | return f'{key} in el.etree_element.attrib' 216 | elif selector.operator == '=': 217 | return ( 218 | f'{key} in el.etree_element.attrib and ' 219 | f'{attribute_value} == {value!r}') 220 | elif selector.operator == '~=': 221 | return ( 222 | '0' if len(value.split()) != 1 or value.strip() != value 223 | else f'{value!r} in split_whitespace({attribute_value})') 224 | elif selector.operator == '|=': 225 | return ( 226 | f'{key} in el.etree_element.attrib and ' 227 | f'{attribute_value} == {value!r} or ' 228 | f'{attribute_value}.startswith({(value + "-")!r})') 229 | elif selector.operator == '^=': 230 | if value: 231 | return f'{attribute_value}.startswith({value!r})' 232 | else: 233 | return '0' 234 | elif selector.operator == '$=': 235 | return ( 236 | f'{attribute_value}.endswith({value!r})' if value else '0') 237 | elif selector.operator == '*=': 238 | return f'{value!r} in {attribute_value}' if value else '0' 239 | else: 240 | raise SelectorError('Unknown attribute operator', selector.operator) 241 | else: # In any namespace 242 | raise NotImplementedError # TODO 243 | 244 | elif isinstance(selector, parser.PseudoClassSelector): 245 | if selector.name in ('link', 'any-link', 'local-link'): 246 | test = html_tag_eq('a', 'area', 'link') 247 | test += ' and el.etree_element.get("href") is not None ' 248 | if selector.name == 'local-link': 249 | test += 'and not urlparse(el.etree_element.get("href")).scheme' 250 | return test 251 | elif selector.name == 'enabled': 252 | input = html_tag_eq( 253 | 'button', 'input', 'select', 'textarea', 'option') 254 | group = html_tag_eq('optgroup', 'menuitem', 'fieldset') 255 | a = html_tag_eq('a', 'area', 'link') 256 | return ( 257 | f'({input} and el.etree_element.get("disabled") is None' 258 | ' and not el.in_disabled_fieldset) or' 259 | f'({group} and el.etree_element.get("disabled") is None) or ' 260 | f'({a} and el.etree_element.get("href") is not None)') 261 | elif selector.name == 'disabled': 262 | input = html_tag_eq( 263 | 'button', 'input', 'select', 'textarea', 'option') 264 | group = html_tag_eq('optgroup', 'menuitem', 'fieldset') 265 | return ( 266 | f'({input} and (el.etree_element.get("disabled") is not None' 267 | ' or el.in_disabled_fieldset)) or' 268 | f'({group} and el.etree_element.get("disabled") is not None)') 269 | elif selector.name == 'checked': 270 | input = html_tag_eq('input', 'menuitem') 271 | option = html_tag_eq('option') 272 | return ( 273 | f'({input} and el.etree_element.get("checked") is not None and' 274 | ' ascii_lower(el.etree_element.get("type", "")) ' 275 | ' in ("checkbox", "radio")) or (' 276 | f'{option} and el.etree_element.get("selected") is not None)') 277 | elif selector.name in ( 278 | 'visited', 'hover', 'active', 'focus', 'focus-within', 279 | 'focus-visible', 'target', 'target-within', 'current', 'past', 280 | 'future', 'playing', 'paused', 'seeking', 'buffering', 281 | 'stalled', 'muted', 'volume-locked', 'user-valid', 282 | 'user-invalid'): 283 | # Not applicable in a static context: never match. 284 | return '0' 285 | elif selector.name in ('root', 'scope'): 286 | return 'el.parent is None' 287 | elif selector.name == 'first-child': 288 | return 'el.index == 0' 289 | elif selector.name == 'last-child': 290 | return 'el.index + 1 == len(el.etree_siblings)' 291 | elif selector.name == 'first-of-type': 292 | return ( 293 | 'all(s.tag != el.etree_element.tag' 294 | ' for s in el.etree_siblings[:el.index])') 295 | elif selector.name == 'last-of-type': 296 | return ( 297 | 'all(s.tag != el.etree_element.tag' 298 | ' for s in el.etree_siblings[el.index + 1:])') 299 | elif selector.name == 'only-child': 300 | return 'len(el.etree_siblings) == 1' 301 | elif selector.name == 'only-of-type': 302 | return ( 303 | 'all(s.tag != el.etree_element.tag or i == el.index' 304 | ' for i, s in enumerate(el.etree_siblings))') 305 | elif selector.name == 'empty': 306 | return 'not (el.etree_children or el.etree_element.text)' 307 | else: 308 | raise SelectorError('Unknown pseudo-class', selector.name) 309 | 310 | elif isinstance(selector, parser.FunctionalPseudoClassSelector): 311 | if selector.name == 'lang': 312 | langs = [] 313 | tokens = [ 314 | token for token in selector.arguments 315 | if token.type not in ('whitespace', 'comment')] 316 | while tokens: 317 | token = tokens.pop(0) 318 | if token.type == 'ident': 319 | langs.append(token.lower_value) 320 | elif token.type == 'string': 321 | langs.append(ascii_lower(token.value)) 322 | else: 323 | raise SelectorError('Invalid arguments for :lang()') 324 | if tokens: 325 | token = tokens.pop(0) 326 | if token.type != 'ident' and token.value != ',': 327 | raise SelectorError('Invalid arguments for :lang()') 328 | return ' or '.join( 329 | f'el.lang == {lang!r} or el.lang.startswith({(lang + "-")!r})' 330 | for lang in langs) 331 | else: 332 | nth = [] 333 | selector_list = [] 334 | current_list = nth 335 | for argument in selector.arguments: 336 | if argument.type == 'ident' and argument.value == 'of': 337 | if current_list is nth: 338 | current_list = selector_list 339 | continue 340 | current_list.append(argument) 341 | 342 | if selector_list: 343 | test = ' and '.join( 344 | _compile_node(selector.parsed_tree) 345 | for selector in parser.parse(selector_list)) 346 | if selector.name == 'nth-child': 347 | count = ( 348 | f'sum(1 for el in el.previous_siblings if ({test}))') 349 | elif selector.name == 'nth-last-child': 350 | count = ( 351 | 'sum(1 for el in' 352 | ' tuple(el.iter_siblings())[el.index + 1:]' 353 | f' if ({test}))') 354 | elif selector.name == 'nth-of-type': 355 | count = ( 356 | 'sum(1 for s in (' 357 | ' el for el in el.previous_siblings' 358 | f' if ({test}))' 359 | ' if s.etree_element.tag == el.etree_element.tag)') 360 | elif selector.name == 'nth-last-of-type': 361 | count = ( 362 | 'sum(1 for s in (' 363 | ' el for el in' 364 | ' tuple(el.iter_siblings())[el.index + 1:]' 365 | f' if ({test}))' 366 | ' if s.etree_element.tag == el.etree_element.tag)') 367 | else: 368 | raise SelectorError('Unknown pseudo-class', selector.name) 369 | count += f'if ({test}) else float("nan")' 370 | else: 371 | if current_list is selector_list: 372 | raise SelectorError( 373 | f'Invalid arguments for :{selector.name}()') 374 | if selector.name == 'nth-child': 375 | count = 'el.index' 376 | elif selector.name == 'nth-last-child': 377 | count = 'len(el.etree_siblings) - el.index - 1' 378 | elif selector.name == 'nth-of-type': 379 | count = ( 380 | 'sum(1 for s in el.etree_siblings[:el.index]' 381 | ' if s.tag == el.etree_element.tag)') 382 | elif selector.name == 'nth-last-of-type': 383 | count = ( 384 | 'sum(1 for s in el.etree_siblings[el.index + 1:]' 385 | ' if s.tag == el.etree_element.tag)') 386 | else: 387 | raise SelectorError('Unknown pseudo-class', selector.name) 388 | 389 | result = parse_nth(nth) 390 | if result is None: 391 | raise SelectorError( 392 | f'Invalid arguments for :{selector.name}()') 393 | a, b = result 394 | # x is the number of siblings before/after the element 395 | # Matches if a positive or zero integer n exists so that: 396 | # x = a*n + b-1 397 | # x = a*n + B 398 | B = b - 1 # noqa: N806 399 | if a == 0: 400 | # x = B 401 | return f'({count}) == {B}' 402 | else: 403 | # n = (x - B) / a 404 | return ( 405 | 'next(r == 0 and n >= 0' 406 | f' for n, r in [divmod(({count}) - {B}, {a})])') 407 | 408 | else: 409 | raise TypeError(type(selector), selector) 410 | 411 | 412 | def html_tag_eq(*local_names): 413 | """Generate expression testing equality with HTML local names.""" 414 | if len(local_names) == 1: 415 | tag = f'{{http://www.w3.org/1999/xhtml}}{local_names[0]}' 416 | return ( 417 | f'((el.local_name == {local_names[0]!r}) if el.in_html_document ' 418 | f'else (el.etree_element.tag == {tag!r}))') 419 | else: 420 | names = ', '.join(repr(n) for n in local_names) 421 | tags = ', '.join( 422 | repr(f'{{http://www.w3.org/1999/xhtml}}{name}') 423 | for name in local_names) 424 | return ( 425 | f'((el.local_name in ({names})) if el.in_html_document ' 426 | f'else (el.etree_element.tag in ({tags})))') 427 | -------------------------------------------------------------------------------- /cssselect2/parser.py: -------------------------------------------------------------------------------- 1 | from tinycss2 import parse_component_value_list 2 | 3 | __all__ = ['parse'] 4 | 5 | SUPPORTED_PSEUDO_ELEMENTS = { 6 | # As per CSS Pseudo-Elements Module Level 4 7 | 'first-line', 'first-letter', 'prefix', 'postfix', 'selection', 8 | 'target-text', 'spelling-error', 'grammar-error', 'before', 'after', 9 | 'marker', 'placeholder', 'file-selector-button', 10 | # As per CSS Generated Content for Paged Media Module 11 | 'footnote-call', 'footnote-marker', 12 | # As per CSS Scoping Module Level 1 13 | 'content', 'shadow', 14 | } 15 | 16 | 17 | def parse(input, namespaces=None, forgiving=False, relative=False): 18 | """Yield tinycss2 selectors found in given ``input``. 19 | 20 | :param input: 21 | A string, or an iterable of tinycss2 component values. 22 | 23 | """ 24 | if isinstance(input, str): 25 | input = parse_component_value_list(input) 26 | tokens = TokenStream(input) 27 | namespaces = namespaces or {} 28 | try: 29 | yield parse_selector(tokens, namespaces, relative) 30 | except SelectorError as exception: 31 | if forgiving: 32 | return 33 | raise exception 34 | while 1: 35 | next = tokens.next() 36 | if next is None: 37 | return 38 | elif next == ',': 39 | try: 40 | yield parse_selector(tokens, namespaces, relative) 41 | except SelectorError as exception: 42 | if not forgiving: 43 | raise exception 44 | else: 45 | if not forgiving: 46 | raise SelectorError(next, f'unexpected {next.type} token.') 47 | 48 | 49 | def parse_selector(tokens, namespaces, relative=False): 50 | tokens.skip_whitespace_and_comment() 51 | if relative: 52 | peek = tokens.peek() 53 | if peek in ('>', '+', '~'): 54 | initial_combinator = peek.value 55 | tokens.next() 56 | else: 57 | initial_combinator = ' ' 58 | tokens.skip_whitespace_and_comment() 59 | result, pseudo_element = parse_compound_selector(tokens, namespaces) 60 | while 1: 61 | has_whitespace = tokens.skip_whitespace() 62 | while tokens.skip_comment(): 63 | has_whitespace = tokens.skip_whitespace() or has_whitespace 64 | selector = Selector(result, pseudo_element) 65 | if relative: 66 | selector = RelativeSelector(initial_combinator, selector) 67 | if pseudo_element is not None: 68 | return selector 69 | peek = tokens.peek() 70 | if peek is None or peek == ',': 71 | return selector 72 | elif peek in ('>', '+', '~'): 73 | combinator = peek.value 74 | tokens.next() 75 | elif has_whitespace: 76 | combinator = ' ' 77 | else: 78 | return selector 79 | compound, pseudo_element = parse_compound_selector(tokens, namespaces) 80 | result = CombinedSelector(result, combinator, compound) 81 | 82 | 83 | def parse_compound_selector(tokens, namespaces): 84 | type_selectors = parse_type_selector(tokens, namespaces) 85 | simple_selectors = type_selectors if type_selectors is not None else [] 86 | while 1: 87 | simple_selector, pseudo_element = parse_simple_selector( 88 | tokens, namespaces) 89 | if pseudo_element is not None or simple_selector is None: 90 | break 91 | simple_selectors.append(simple_selector) 92 | 93 | if simple_selectors or (type_selectors, pseudo_element) != (None, None): 94 | return CompoundSelector(simple_selectors), pseudo_element 95 | 96 | peek = tokens.peek() 97 | peek_type = peek.type if peek else 'EOF' 98 | raise SelectorError(peek, f'expected a compound selector, got {peek_type}') 99 | 100 | 101 | def parse_type_selector(tokens, namespaces): 102 | tokens.skip_whitespace() 103 | qualified_name = parse_qualified_name(tokens, namespaces) 104 | if qualified_name is None: 105 | return None 106 | 107 | simple_selectors = [] 108 | namespace, local_name = qualified_name 109 | if local_name is not None: 110 | simple_selectors.append(LocalNameSelector(local_name)) 111 | if namespace is not None: 112 | simple_selectors.append(NamespaceSelector(namespace)) 113 | return simple_selectors 114 | 115 | 116 | def parse_simple_selector(tokens, namespaces): 117 | peek = tokens.peek() 118 | if peek is None: 119 | return None, None 120 | if peek.type == 'hash' and peek.is_identifier: 121 | tokens.next() 122 | return IDSelector(peek.value), None 123 | elif peek == '.': 124 | tokens.next() 125 | next = tokens.next() 126 | if next is None or next.type != 'ident': 127 | raise SelectorError(next, f'Expected a class name, got {next}') 128 | return ClassSelector(next.value), None 129 | elif peek.type == '[] block': 130 | tokens.next() 131 | attr = parse_attribute_selector(TokenStream(peek.content), namespaces) 132 | return attr, None 133 | elif peek == ':': 134 | tokens.next() 135 | next = tokens.next() 136 | if next == ':': 137 | next = tokens.next() 138 | if next is None or next.type != 'ident': 139 | raise SelectorError(next, f'Expected a pseudo-element name, got {next}') 140 | value = next.lower_value 141 | if value not in SUPPORTED_PSEUDO_ELEMENTS: 142 | raise SelectorError( 143 | next, f'Expected a supported pseudo-element, got {value}') 144 | return None, value 145 | elif next is not None and next.type == 'ident': 146 | name = next.lower_value 147 | if name in ('before', 'after', 'first-line', 'first-letter'): 148 | return None, name 149 | else: 150 | return PseudoClassSelector(name), None 151 | elif next is not None and next.type == 'function': 152 | name = next.lower_name 153 | if name in ('is', 'where', 'not', 'has'): 154 | return parse_logical_combination(next, namespaces, name), None 155 | else: 156 | return (FunctionalPseudoClassSelector(name, next.arguments), None) 157 | else: 158 | raise SelectorError(next, f'unexpected {next} token.') 159 | else: 160 | return None, None 161 | 162 | 163 | def parse_logical_combination(matches_any_token, namespaces, name): 164 | forgiving = True 165 | relative = False 166 | if name == 'is': 167 | selector_class = MatchesAnySelector 168 | elif name == 'where': 169 | selector_class = SpecificityAdjustmentSelector 170 | elif name == 'not': 171 | forgiving = False 172 | selector_class = NegationSelector 173 | elif name == 'has': 174 | relative = True 175 | selector_class = RelationalSelector 176 | 177 | selectors = [ 178 | selector for selector in 179 | parse(matches_any_token.arguments, namespaces, forgiving, relative) 180 | if selector.pseudo_element is None] 181 | return selector_class(selectors) 182 | 183 | 184 | def parse_attribute_selector(tokens, namespaces): 185 | tokens.skip_whitespace() 186 | qualified_name = parse_qualified_name(tokens, namespaces, is_attribute=True) 187 | if qualified_name is None: 188 | next = tokens.next() 189 | raise SelectorError(next, f'expected attribute name, got {next}') 190 | namespace, local_name = qualified_name 191 | 192 | tokens.skip_whitespace() 193 | peek = tokens.peek() 194 | if peek is None: 195 | operator = None 196 | value = None 197 | elif peek in ('=', '~=', '|=', '^=', '$=', '*='): 198 | operator = peek.value 199 | tokens.next() 200 | tokens.skip_whitespace() 201 | next = tokens.next() 202 | if next is None or next.type not in ('ident', 'string'): 203 | next_type = 'None' if next is None else next.type 204 | raise SelectorError(next, f'expected attribute value, got {next_type}') 205 | value = next.value 206 | else: 207 | raise SelectorError(peek, f'expected attribute selector operator, got {peek}') 208 | 209 | tokens.skip_whitespace() 210 | next = tokens.next() 211 | case_sensitive = None 212 | if next is not None: 213 | if next.type == 'ident' and next.value.lower() == 'i': 214 | case_sensitive = False 215 | elif next.type == 'ident' and next.value.lower() == 's': 216 | case_sensitive = True 217 | else: 218 | raise SelectorError(next, f'expected ], got {next.type}') 219 | return AttributeSelector(namespace, local_name, operator, value, case_sensitive) 220 | 221 | 222 | def parse_qualified_name(tokens, namespaces, is_attribute=False): 223 | """Return ``(namespace, local)`` for given tokens. 224 | 225 | Can also return ``None`` for a wildcard. 226 | 227 | The empty string for ``namespace`` means "no namespace". 228 | 229 | """ 230 | peek = tokens.peek() 231 | if peek is None: 232 | return None 233 | if peek.type == 'ident': 234 | first_ident = tokens.next() 235 | peek = tokens.peek() 236 | if peek != '|': 237 | namespace = '' if is_attribute else namespaces.get(None, None) 238 | return namespace, (first_ident.value, first_ident.lower_value) 239 | tokens.next() 240 | namespace = namespaces.get(first_ident.value) 241 | if namespace is None: 242 | raise SelectorError( 243 | first_ident, f'undefined namespace prefix: {first_ident.value}') 244 | elif peek == '*': 245 | next = tokens.next() 246 | peek = tokens.peek() 247 | if peek != '|': 248 | if is_attribute: 249 | raise SelectorError(next, f'expected local name, got {next.type}') 250 | return namespaces.get(None, None), None 251 | tokens.next() 252 | namespace = None 253 | elif peek == '|': 254 | tokens.next() 255 | namespace = '' 256 | else: 257 | return None 258 | 259 | # If we get here, we just consumed '|' and set ``namespace`` 260 | next = tokens.next() 261 | if next.type == 'ident': 262 | return namespace, (next.value, next.lower_value) 263 | elif next == '*' and not is_attribute: 264 | return namespace, None 265 | else: 266 | raise SelectorError(next, f'expected local name, got {next.type}') 267 | 268 | 269 | class SelectorError(ValueError): 270 | """A specialized ``ValueError`` for invalid selectors.""" 271 | 272 | 273 | class TokenStream: 274 | def __init__(self, tokens): 275 | self.tokens = iter(tokens) 276 | self.peeked = [] # In reversed order 277 | 278 | def next(self): 279 | if self.peeked: 280 | return self.peeked.pop() 281 | else: 282 | return next(self.tokens, None) 283 | 284 | def peek(self): 285 | if not self.peeked: 286 | self.peeked.append(next(self.tokens, None)) 287 | return self.peeked[-1] 288 | 289 | def skip(self, skip_types): 290 | found = False 291 | while 1: 292 | peek = self.peek() 293 | if peek is None or peek.type not in skip_types: 294 | break 295 | self.next() 296 | found = True 297 | return found 298 | 299 | def skip_whitespace(self): 300 | return self.skip(['whitespace']) 301 | 302 | def skip_comment(self): 303 | return self.skip(['comment']) 304 | 305 | def skip_whitespace_and_comment(self): 306 | return self.skip(['comment', 'whitespace']) 307 | 308 | 309 | class Selector: 310 | def __init__(self, tree, pseudo_element=None): 311 | self.parsed_tree = tree 312 | self.pseudo_element = pseudo_element 313 | if pseudo_element is None: 314 | #: Tuple of 3 integers: http://www.w3.org/TR/selectors/#specificity 315 | self.specificity = tree.specificity 316 | else: 317 | a, b, c = tree.specificity 318 | self.specificity = a, b, c + 1 319 | 320 | def __repr__(self): 321 | pseudo = f'::{self.pseudo_element}' if self.pseudo_element else '' 322 | return f'{self.parsed_tree!r}{pseudo}' 323 | 324 | 325 | class RelativeSelector: 326 | def __init__(self, combinator, selector): 327 | self.combinator = combinator 328 | self.selector = selector 329 | 330 | @property 331 | def specificity(self): 332 | return self.selector.specificity 333 | 334 | @property 335 | def pseudo_element(self): 336 | return self.selector.pseudo_element 337 | 338 | def __repr__(self): 339 | return ( 340 | f'{self.selector!r}' if self.combinator == ' ' 341 | else f'{self.combinator} {self.selector!r}') 342 | 343 | 344 | class CombinedSelector: 345 | def __init__(self, left, combinator, right): 346 | #: Combined or compound selector 347 | self.left = left 348 | # One of `` `` (a single space), ``>``, ``+`` or ``~``. 349 | self.combinator = combinator 350 | #: compound selector 351 | self.right = right 352 | 353 | @property 354 | def specificity(self): 355 | a1, b1, c1 = self.left.specificity 356 | a2, b2, c2 = self.right.specificity 357 | return a1 + a2, b1 + b2, c1 + c2 358 | 359 | def __repr__(self): 360 | return f'{self.left!r}{self.combinator}{self.right!r}' 361 | 362 | 363 | class CompoundSelector: 364 | def __init__(self, simple_selectors): 365 | self.simple_selectors = simple_selectors 366 | 367 | @property 368 | def specificity(self): 369 | if self.simple_selectors: 370 | # zip(*foo) turns [(a1, b1, c1), (a2, b2, c2), ...] 371 | # into [(a1, a2, ...), (b1, b2, ...), (c1, c2, ...)] 372 | return tuple(map(sum, zip( 373 | *(sel.specificity for sel in self.simple_selectors)))) 374 | else: 375 | return 0, 0, 0 376 | 377 | def __repr__(self): 378 | return ''.join(map(repr, self.simple_selectors)) 379 | 380 | 381 | class LocalNameSelector: 382 | specificity = 0, 0, 1 383 | 384 | def __init__(self, local_name): 385 | self.local_name, self.lower_local_name = local_name 386 | 387 | def __repr__(self): 388 | return self.local_name 389 | 390 | 391 | class NamespaceSelector: 392 | specificity = 0, 0, 0 393 | 394 | def __init__(self, namespace): 395 | #: The namespace URL as a string, 396 | #: or the empty string for elements not in any namespace. 397 | self.namespace = namespace 398 | 399 | def __repr__(self): 400 | return '|' if self.namespace == '' else f'{{{self.namespace}}}|' 401 | 402 | 403 | class IDSelector: 404 | specificity = 1, 0, 0 405 | 406 | def __init__(self, ident): 407 | self.ident = ident 408 | 409 | def __repr__(self): 410 | return f'#{self.ident}' 411 | 412 | 413 | class ClassSelector: 414 | specificity = 0, 1, 0 415 | 416 | def __init__(self, class_name): 417 | self.class_name = class_name 418 | 419 | def __repr__(self): 420 | return f'.{self.class_name}' 421 | 422 | 423 | class AttributeSelector: 424 | specificity = 0, 1, 0 425 | 426 | def __init__(self, namespace, name, operator, value, case_sensitive): 427 | self.namespace = namespace 428 | self.name, self.lower_name = name 429 | #: A string like ``=`` or ``~=``, or None for ``[attr]`` selectors 430 | self.operator = operator 431 | #: A string, or None for ``[attr]`` selectors 432 | self.value = value 433 | #: ``True`` if case-sensitive, ``False`` if case-insensitive, ``None`` 434 | #: if depends on the document language 435 | self.case_sensitive = case_sensitive 436 | 437 | def __repr__(self): 438 | namespace = '*|' if self.namespace is None else f'{{{self.namespace}}}' 439 | case_sensitive = ( 440 | '' if self.case_sensitive is None else 441 | f' {"s" if self.case_sensitive else "i"}') 442 | return ( 443 | f'[{namespace}{self.name}{self.operator}{self.value!r}' 444 | f'{case_sensitive}]') 445 | 446 | 447 | class PseudoClassSelector: 448 | specificity = 0, 1, 0 449 | 450 | def __init__(self, name): 451 | self.name = name 452 | 453 | def __repr__(self): 454 | return ':' + self.name 455 | 456 | 457 | class FunctionalPseudoClassSelector: 458 | specificity = 0, 1, 0 459 | 460 | def __init__(self, name, arguments): 461 | self.name = name 462 | self.arguments = arguments 463 | 464 | def __repr__(self): 465 | return f':{self.name}{tuple(self.arguments)!r}' 466 | 467 | 468 | class NegationSelector: 469 | def __init__(self, selector_list): 470 | self.selector_list = selector_list 471 | 472 | @property 473 | def specificity(self): 474 | if self.selector_list: 475 | return max(selector.specificity for selector in self.selector_list) 476 | else: 477 | return (0, 0, 0) 478 | 479 | def __repr__(self): 480 | return f':not({", ".join(repr(sel) for sel in self.selector_list)})' 481 | 482 | 483 | class RelationalSelector: 484 | def __init__(self, selector_list): 485 | self.selector_list = selector_list 486 | 487 | @property 488 | def specificity(self): 489 | if self.selector_list: 490 | return max(selector.specificity for selector in self.selector_list) 491 | else: 492 | return (0, 0, 0) 493 | 494 | def __repr__(self): 495 | return f':has({", ".join(repr(sel) for sel in self.selector_list)})' 496 | 497 | 498 | class MatchesAnySelector: 499 | def __init__(self, selector_list): 500 | self.selector_list = selector_list 501 | 502 | @property 503 | def specificity(self): 504 | if self.selector_list: 505 | return max(selector.specificity for selector in self.selector_list) 506 | else: 507 | return (0, 0, 0) 508 | 509 | def __repr__(self): 510 | return f':is({", ".join(repr(sel) for sel in self.selector_list)})' 511 | 512 | 513 | class SpecificityAdjustmentSelector: 514 | def __init__(self, selector_list): 515 | self.selector_list = selector_list 516 | 517 | @property 518 | def specificity(self): 519 | return (0, 0, 0) 520 | 521 | def __repr__(self): 522 | return f':where({", ".join(repr(sel) for sel in self.selector_list)})' 523 | -------------------------------------------------------------------------------- /cssselect2/tree.py: -------------------------------------------------------------------------------- 1 | from functools import cached_property 2 | from warnings import warn 3 | 4 | from webencodings import ascii_lower 5 | 6 | from .compiler import compile_selector_list, split_whitespace 7 | 8 | 9 | class ElementWrapper: 10 | """Wrapper of :class:`xml.etree.ElementTree.Element` for Selector matching. 11 | 12 | This class should not be instanciated directly. :meth:`from_xml_root` or 13 | :meth:`from_html_root` should be used for the root element of a document, 14 | and other elements should be accessed (and wrappers generated) using 15 | methods such as :meth:`iter_children` and :meth:`iter_subtree`. 16 | 17 | :class:`ElementWrapper` objects compare equal if their underlying 18 | :class:`xml.etree.ElementTree.Element` do. 19 | 20 | """ 21 | @classmethod 22 | def from_xml_root(cls, root, content_language=None): 23 | """Wrap for selector matching the root of an XML or XHTML document. 24 | 25 | :param root: 26 | An ElementTree :class:`xml.etree.ElementTree.Element` 27 | for the root element of a document. 28 | If the given element is not the root, 29 | selector matching will behave is if it were. 30 | In other words, selectors will be not be `scoped`_ 31 | to the subtree rooted at that element. 32 | :returns: 33 | A new :class:`ElementWrapper` 34 | 35 | .. _scoped: https://drafts.csswg.org/selectors-4/#scoping 36 | 37 | """ 38 | return cls._from_root(root, content_language, in_html_document=False) 39 | 40 | @classmethod 41 | def from_html_root(cls, root, content_language=None): 42 | """Same as :meth:`from_xml_root` with case-insensitive attribute names. 43 | 44 | Useful for documents parsed with an HTML parser like html5lib, which 45 | should be the case of documents with the ``text/html`` MIME type. 46 | 47 | """ 48 | return cls._from_root(root, content_language, in_html_document=True) 49 | 50 | @classmethod 51 | def _from_root(cls, root, content_language, in_html_document=True): 52 | if hasattr(root, 'getroot'): 53 | root = root.getroot() 54 | return cls( 55 | root, parent=None, index=0, previous=None, 56 | in_html_document=in_html_document, content_language=content_language) 57 | 58 | def __init__(self, etree_element, parent, index, previous, 59 | in_html_document, content_language=None): 60 | #: The underlying ElementTree :class:`xml.etree.ElementTree.Element` 61 | self.etree_element = etree_element 62 | #: The parent :class:`ElementWrapper`, 63 | #: or :obj:`None` for the root element. 64 | self.parent = parent 65 | #: The previous sibling :class:`ElementWrapper`, 66 | #: or :obj:`None` for the root element. 67 | self.previous = previous 68 | if parent is not None: 69 | #: The :attr:`parent`’s children 70 | #: as a list of 71 | #: ElementTree :class:`xml.etree.ElementTree.Element`\ s. 72 | #: For the root (which has no parent) 73 | self.etree_siblings = parent.etree_children 74 | else: 75 | self.etree_siblings = [etree_element] 76 | #: The position within the :attr:`parent`’s children, counting from 0. 77 | #: ``e.etree_siblings[e.index]`` is always ``e.etree_element``. 78 | self.index = index 79 | self.in_html_document = in_html_document 80 | self.transport_content_language = content_language 81 | 82 | # Cache 83 | self._ancestors = None 84 | self._previous_siblings = None 85 | 86 | def __eq__(self, other): 87 | return ( 88 | type(self) is type(other) and 89 | self.etree_element == other.etree_element) 90 | 91 | def __ne__(self, other): 92 | return not (self == other) 93 | 94 | def __hash__(self): 95 | return hash((type(self), self.etree_element)) 96 | 97 | def __iter__(self): 98 | yield from self.iter_children() 99 | 100 | @property 101 | def ancestors(self): 102 | """Tuple of existing ancestors. 103 | 104 | Tuple of existing :class:`ElementWrapper` objects for this element’s 105 | ancestors, in reversed tree order, from :attr:`parent` to the root. 106 | 107 | """ 108 | if self._ancestors is None: 109 | self._ancestors = ( 110 | () if self.parent is None else (*self.parent.ancestors, self.parent)) 111 | return self._ancestors 112 | 113 | @property 114 | def previous_siblings(self): 115 | """Tuple of previous siblings. 116 | 117 | Tuple of existing :class:`ElementWrapper` objects for this element’s 118 | previous siblings, in reversed tree order. 119 | 120 | """ 121 | if self._previous_siblings is None: 122 | self._previous_siblings = ( 123 | () if self.previous is None else 124 | (*self.previous.previous_siblings, self.previous)) 125 | return self._previous_siblings 126 | 127 | def iter_ancestors(self): 128 | """Iterate over ancestors. 129 | 130 | Return an iterator of existing :class:`ElementWrapper` objects for this 131 | element’s ancestors, in reversed tree order (from :attr:`parent` to the 132 | root). 133 | 134 | The element itself is not included, this is an empty sequence for the 135 | root element. 136 | 137 | This method is deprecated and will be removed in version 0.7.0. Use 138 | :attr:`ancestors` instead. 139 | 140 | """ 141 | warn( 142 | 'This method is deprecated and will be removed in version 0.7.0. ' 143 | 'Use the "ancestors" attribute instead.', 144 | DeprecationWarning) 145 | yield from self.ancestors 146 | 147 | def iter_previous_siblings(self): 148 | """Iterate over previous siblings. 149 | 150 | Return an iterator of existing :class:`ElementWrapper` objects for this 151 | element’s previous siblings, in reversed tree order. 152 | 153 | The element itself is not included, this is an empty sequence for a 154 | first child or the root element. 155 | 156 | This method is deprecated and will be removed in version 0.7.0. Use 157 | :attr:`previous_siblings` instead. 158 | 159 | """ 160 | warn( 161 | 'This method is deprecated and will be removed in version 0.7.0. ' 162 | 'Use the "previous_siblings" attribute instead.', 163 | DeprecationWarning) 164 | yield from self.previous_siblings 165 | 166 | def iter_siblings(self): 167 | """Iterate over siblings. 168 | 169 | Return an iterator of newly-created :class:`ElementWrapper` objects for 170 | this element’s siblings, in tree order. 171 | 172 | """ 173 | if self.parent is None: 174 | yield self 175 | else: 176 | yield from self.parent.iter_children() 177 | 178 | def iter_next_siblings(self): 179 | """Iterate over next siblings. 180 | 181 | Return an iterator of newly-created :class:`ElementWrapper` objects for 182 | this element’s next siblings, in tree order. 183 | 184 | """ 185 | found = False 186 | for sibling in self.iter_siblings(): 187 | if found: 188 | yield sibling 189 | if sibling == self: 190 | found = True 191 | 192 | def iter_children(self): 193 | """Iterate over children. 194 | 195 | Return an iterator of newly-created :class:`ElementWrapper` objects for 196 | this element’s child elements, in tree order. 197 | 198 | """ 199 | child = None 200 | for i, etree_child in enumerate(self.etree_children): 201 | child = type(self)( 202 | etree_child, parent=self, index=i, previous=child, 203 | in_html_document=self.in_html_document) 204 | yield child 205 | 206 | def iter_subtree(self): 207 | """Iterate over subtree. 208 | 209 | Return an iterator of newly-created :class:`ElementWrapper` objects for 210 | the entire subtree rooted at this element, in tree order. 211 | 212 | Unlike in other methods, the element itself *is* included. 213 | 214 | This loops over an entire document: 215 | 216 | .. code-block:: python 217 | 218 | for element in ElementWrapper.from_root(root_etree).iter_subtree(): 219 | ... 220 | 221 | """ 222 | stack = [iter([self])] 223 | while stack: 224 | element = next(stack[-1], None) 225 | if element is None: 226 | stack.pop() 227 | else: 228 | yield element 229 | stack.append(element.iter_children()) 230 | 231 | @staticmethod 232 | def _compile(selectors): 233 | return [ 234 | compiled_selector.test 235 | for selector in selectors 236 | for compiled_selector in ( 237 | [selector] if hasattr(selector, 'test') 238 | else compile_selector_list(selector)) 239 | if compiled_selector.pseudo_element is None and 240 | not compiled_selector.never_matches] 241 | 242 | def matches(self, *selectors): 243 | """Return wether this elememt matches any of the given selectors. 244 | 245 | :param selectors: 246 | Each given selector is either a :class:`compiler.CompiledSelector`, 247 | or an argument to :func:`compile_selector_list`. 248 | 249 | """ 250 | return any(test(self) for test in self._compile(selectors)) 251 | 252 | def query_all(self, *selectors): 253 | """Return elements, in tree order, that match any of given selectors. 254 | 255 | Selectors are `scoped`_ to the subtree rooted at this element. 256 | 257 | .. _scoped: https://drafts.csswg.org/selectors-4/#scoping 258 | 259 | :param selectors: 260 | Each given selector is either a :class:`compiler.CompiledSelector`, 261 | or an argument to :func:`compile_selector_list`. 262 | :returns: 263 | An iterator of newly-created :class:`ElementWrapper` objects. 264 | 265 | """ 266 | tests = self._compile(selectors) 267 | if len(tests) == 1: 268 | return filter(tests[0], self.iter_subtree()) 269 | elif selectors: 270 | return ( 271 | element for element in self.iter_subtree() 272 | if any(test(element) for test in tests)) 273 | else: 274 | return iter(()) 275 | 276 | def query(self, *selectors): 277 | """Return first element that matches any of given selectors. 278 | 279 | :param selectors: 280 | Each given selector is either a :class:`compiler.CompiledSelector`, 281 | or an argument to :func:`compile_selector_list`. 282 | :returns: 283 | A newly-created :class:`ElementWrapper` object, 284 | or :obj:`None` if there is no match. 285 | 286 | """ 287 | return next(self.query_all(*selectors), None) 288 | 289 | @cached_property 290 | def etree_children(self): 291 | """Children as a list of :class:`xml.etree.ElementTree.Element`. 292 | 293 | Other ElementTree nodes such as 294 | :func:`comments ` and 295 | :func:`processing instructions 296 | ` 297 | are not included. 298 | 299 | """ 300 | return [ 301 | element for element in self.etree_element 302 | if isinstance(element.tag, str)] 303 | 304 | @cached_property 305 | def local_name(self): 306 | """The local name of this element, as a string.""" 307 | namespace_url, local_name = _split_etree_tag(self.etree_element.tag) 308 | self.__dict__[str('namespace_url')] = namespace_url 309 | return local_name 310 | 311 | @cached_property 312 | def namespace_url(self): 313 | """The namespace URL of this element, as a string.""" 314 | namespace_url, local_name = _split_etree_tag(self.etree_element.tag) 315 | self.__dict__[str('local_name')] = local_name 316 | return namespace_url 317 | 318 | @cached_property 319 | def id(self): 320 | """The ID of this element, as a string.""" 321 | return self.etree_element.get('id') 322 | 323 | @cached_property 324 | def classes(self): 325 | """The classes of this element, as a :class:`set` of strings.""" 326 | return set(split_whitespace(self.etree_element.get('class', ''))) 327 | 328 | @cached_property 329 | def lang(self): 330 | """The language of this element, as a string.""" 331 | # http://whatwg.org/C#language 332 | xml_lang = self.etree_element.get('{http://www.w3.org/XML/1998/namespace}lang') 333 | if xml_lang is not None: 334 | return ascii_lower(xml_lang) 335 | is_html = ( 336 | self.in_html_document or 337 | self.namespace_url == 'http://www.w3.org/1999/xhtml') 338 | if is_html: 339 | lang = self.etree_element.get('lang') 340 | if lang is not None: 341 | return ascii_lower(lang) 342 | if self.parent is not None: 343 | return self.parent.lang 344 | # Root elememnt 345 | if is_html: 346 | content_language = None 347 | iterator = self.etree_element.iter('{http://www.w3.org/1999/xhtml}meta') 348 | for meta in iterator: 349 | http_equiv = meta.get('http-equiv', '') 350 | if ascii_lower(http_equiv) == 'content-language': 351 | content_language = _parse_content_language(meta.get('content')) 352 | if content_language is not None: 353 | return ascii_lower(content_language) 354 | # Empty string means unknown 355 | return _parse_content_language(self.transport_content_language) or '' 356 | 357 | @cached_property 358 | def in_disabled_fieldset(self): 359 | if self.parent is None: 360 | return False 361 | fieldset = '{http://www.w3.org/1999/xhtml}fieldset' 362 | legend = '{http://www.w3.org/1999/xhtml}legend' 363 | disabled_fieldset = ( 364 | self.parent.etree_element.tag == fieldset and 365 | self.parent.etree_element.get('disabled') is not None and ( 366 | self.etree_element.tag != legend or any( 367 | sibling.etree_element.tag == legend 368 | for sibling in self.iter_previous_siblings()))) 369 | return disabled_fieldset or self.parent.in_disabled_fieldset 370 | 371 | 372 | def _split_etree_tag(tag): 373 | position = tag.rfind('}') 374 | if position == -1: 375 | return '', tag 376 | else: 377 | assert tag[0] == '{' 378 | return tag[1:position], tag[position+1:] 379 | 380 | 381 | def _parse_content_language(value): 382 | if value is not None and ',' not in value: 383 | parts = split_whitespace(value) 384 | if len(parts) == 1: 385 | return parts[0] 386 | -------------------------------------------------------------------------------- /docs/api_reference.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | 5 | .. module:: cssselect2 6 | .. autoclass:: Matcher 7 | :members: 8 | .. autofunction:: compile_selector_list 9 | .. autoclass:: ElementWrapper 10 | :members: 11 | .. autoclass:: SelectorError 12 | 13 | .. module:: cssselect2.compiler 14 | .. autoclass:: CompiledSelector 15 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | 4 | 5 | Version 0.8.0 6 | ............. 7 | 8 | Released on 2025-03-05. 9 | 10 | * Drop support of Python 3.8 and 3.9, support 3.12 and 3.13 11 | * Handle case-sensitive and case-insensitive attribute selectors 12 | 13 | 14 | Version 0.7.0 15 | ............. 16 | 17 | Released on 2022-09-19. 18 | 19 | * Support :has selector 20 | 21 | 22 | Version 0.6.0 23 | ............. 24 | 25 | Released on 2022-04-15. 26 | 27 | **This version deprecates the ``iter_ancestors`` and ``iter_previous_siblings`` 28 | methods, that will be removed in 0.7.0. Use the ``ancestors`` and 29 | ``previous_siblings`` properties instead.** 30 | 31 | * Improve speed of ancestors and previous siblings 32 | 33 | 34 | Version 0.5.0 35 | ............. 36 | 37 | Released on 2022-02-27. 38 | 39 | * Support Python 3.10 40 | * Drop support of Python 3.6 41 | * Handle many CSS4 selectors 42 | * Ignore comments at the beginning of selectors 43 | 44 | 45 | Version 0.4.1 46 | ............. 47 | 48 | Released on 2020-10-29. 49 | 50 | * Fix PyPI description and various links. 51 | 52 | 53 | Version 0.4.0 54 | ............. 55 | 56 | Released on 2020-10-29. 57 | 58 | * Drop support of Python 3.5, add support of Python 3.9. 59 | * Don’t crash on empty :not() selectors. 60 | * New code structure, new packaging, new documentation. 61 | 62 | 63 | Version 0.3.0 64 | ............. 65 | 66 | Released on 2020-03-16. 67 | 68 | * Drop Python2 support. 69 | * Improve packaging and testing. 70 | 71 | 72 | Version 0.2.2 73 | ............. 74 | 75 | Released on 2019-09-06. 76 | 77 | * Optimize lang attribute selectors. 78 | 79 | 80 | Version 0.2.1 81 | ............. 82 | 83 | Released on 2017-10-02. 84 | 85 | * Fix documentation. 86 | 87 | 88 | Version 0.2.0 89 | ............. 90 | 91 | Released on 2017-08-16. 92 | 93 | * Fix some selectors for HTML documents with no namespace. 94 | * Don't crash when the attribute comparator is unknown. 95 | * Don't crash when there are empty attribute classes. 96 | * Follow semantic versioning. 97 | 98 | 99 | Version 0.1 100 | ........... 101 | 102 | Released on 2017-07-07. 103 | 104 | * Initial release. 105 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # cssselect2 documentation build configuration file. 2 | 3 | import cssselect2 4 | 5 | # Add any Sphinx extension module names here, as strings. They can be 6 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 7 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx'] 8 | 9 | # Add any paths that contain templates here, relative to this directory. 10 | templates_path = ['_templates'] 11 | 12 | # The suffix of source filenames. 13 | source_suffix = '.rst' 14 | 15 | # The master toctree document. 16 | master_doc = 'index' 17 | 18 | # General information about the project. 19 | project = 'cssselect2' 20 | copyright = 'Simon Sapin and contributors' 21 | 22 | # The version info for the project you're documenting, acts as replacement for 23 | # |version| and |release|, also used in various other places throughout the 24 | # built documents. 25 | # 26 | # The full version, including alpha/beta/rc tags. 27 | release = cssselect2.__version__ 28 | 29 | # The short X.Y version. 30 | version = '.'.join(release.split('.')[:2]) 31 | 32 | # List of patterns, relative to source directory, that match files and 33 | # directories to ignore when looking for source files. 34 | exclude_patterns = ['_build'] 35 | 36 | # The name of the Pygments (syntax highlighting) style to use. 37 | pygments_style = 'monokai' 38 | 39 | # The theme to use for HTML and HTML Help pages. See the documentation for 40 | # a list of builtin themes. 41 | html_theme = 'furo' 42 | 43 | html_theme_options = { 44 | 'top_of_page_buttons': ['edit'], 45 | 'source_edit_link': 'https://github.com/Kozea/cssselect2/edit/main/docs/{filename}', 46 | } 47 | 48 | # Favicon URL 49 | html_favicon = 'https://www.courtbouillon.org/static/images/favicon.png' 50 | 51 | # Add any paths that contain custom static files (such as style sheets) here, 52 | # relative to this directory. They are copied after the builtin static files, 53 | # so a file named "default.css" will overwrite the builtin "default.css". 54 | html_static_path = [] 55 | 56 | # These paths are either relative to html_static_path 57 | # or fully qualified paths (eg. https://...) 58 | html_css_files = [ 59 | 'https://www.courtbouillon.org/static/docs-furo.css', 60 | ] 61 | 62 | # Output file base name for HTML help builder. 63 | htmlhelp_basename = 'cssselect2doc' 64 | 65 | # One entry per manual page. List of tuples 66 | # (source start file, name, description, authors, manual section). 67 | man_pages = [ 68 | ('index', 'cssselect2', 'cssselect2 Documentation', 69 | ['Simon Sapin and contributors'], 1) 70 | ] 71 | 72 | # Grouping the document tree into Texinfo files. List of tuples 73 | # (source start file, target name, title, author, 74 | # dir menu entry, description, category) 75 | texinfo_documents = [ 76 | ('index', 'cssselect2', 'cssselect2 Documentation', 77 | 'Simon Sapin', 'cssselect2', 78 | 'A straightforward implementation of CSS3 Selectors.', 79 | 'Miscellaneous'), 80 | ] 81 | 82 | # Example configuration for intersphinx: refer to the Python standard library. 83 | intersphinx_mapping = { 84 | 'python': ('https://docs.python.org/3/', None), 85 | 'webencodings': ('https://pythonhosted.org/webencodings/', None), 86 | 'tinycss2': ('https://doc.courtbouillon.org/tinycss2/latest/', None), 87 | } 88 | -------------------------------------------------------------------------------- /docs/contribute.rst: -------------------------------------------------------------------------------- 1 | Contribute 2 | ========== 3 | 4 | You want to add some code to cssselect2, launch its tests or improve its 5 | documentation? Thank you very much! Here are some tips to help you play with 6 | cssselect2 in good conditions. 7 | 8 | The first step is to clone the repository, create a virtual environment and 9 | install cssselect2 dependencies. 10 | 11 | .. code-block:: shell 12 | 13 | git clone https://github.com/Kozea/cssselect2.git 14 | cd cssselect2 15 | python -m venv venv 16 | venv/bin/pip install .[doc,test] 17 | 18 | You can then let your terminal in the current directory and launch Python to 19 | test your changes. ``import cssselect2`` will then import the working directory 20 | code, so that you can modify it and test your changes. 21 | 22 | .. code-block:: shell 23 | 24 | venv/bin/python 25 | 26 | 27 | Code & Issues 28 | ------------- 29 | 30 | If you’ve found a bug in cssselect2, it’s time to report it, and to fix it if you 31 | can! 32 | 33 | You can report bugs and feature requests on `GitHub`_. If you want to add or 34 | fix some code, please fork the repository and create a pull request, we’ll be 35 | happy to review your work. 36 | 37 | .. _GitHub: https://github.com/Kozea/cssselect2 38 | 39 | 40 | Tests 41 | ----- 42 | 43 | Tests are stored in the ``tests`` folder at the top of the repository. They use 44 | the `pytest`_ library. 45 | 46 | You can launch tests using the following command:: 47 | 48 | venv/bin/python -m pytest 49 | 50 | cssselect2 also uses ruff_ to check the coding style:: 51 | 52 | venv/bin/python -m ruff check 53 | 54 | .. _pytest: https://docs.pytest.org/ 55 | .. _ruff: https://docs.astral.sh/ruff/ 56 | 57 | 58 | Documentation 59 | ------------- 60 | 61 | Documentation is stored in the ``docs`` folder at the top of the repository. It 62 | relies on the `Sphinx`_ library. 63 | 64 | You can build the documentation using the following command:: 65 | 66 | venv/bin/sphinx-build docs docs/_build 67 | 68 | The documentation home page can now be found in the ``docs/_build/index.html`` 69 | file. You can open this file in a browser to see the final rendering. 70 | 71 | .. _Sphinx: https://www.sphinx-doc.org/ 72 | -------------------------------------------------------------------------------- /docs/example.py: -------------------------------------------------------------------------------- 1 | from xml.etree import ElementTree 2 | 3 | import cssselect2 4 | import tinycss2 5 | 6 | # Parse CSS and add rules to the matcher 7 | 8 | matcher = cssselect2.Matcher() 9 | 10 | rules = tinycss2.parse_stylesheet(''' 11 | body { font-size: 2em } 12 | body p { background: red } 13 | p { color: blue } 14 | ''', skip_whitespace=True) 15 | 16 | for rule in rules: 17 | selectors = cssselect2.compile_selector_list(rule.prelude) 18 | selector_string = tinycss2.serialize(rule.prelude) 19 | content_string = tinycss2.serialize(rule.content) 20 | payload = (selector_string, content_string) 21 | for selector in selectors: 22 | matcher.add_selector(selector, payload) 23 | 24 | 25 | # Parse HTML and find CSS rules applying to each tag 26 | 27 | html_tree = ElementTree.fromstring(''' 28 | 29 | 30 |

Test

31 | 32 | 33 | ''') 34 | wrapper = cssselect2.ElementWrapper.from_html_root(html_tree) 35 | for element in wrapper.iter_subtree(): 36 | tag = element.etree_element.tag.split('}')[-1] 37 | print('Found tag "{}" in HTML'.format(tag)) 38 | 39 | matches = matcher.match(element) 40 | if matches: 41 | for match in matches: 42 | specificity, order, pseudo, payload = match 43 | selector_string, content_string = payload 44 | print('Matching selector "{}" ({})'.format( 45 | selector_string, content_string)) 46 | else: 47 | print('No rule matching this tag') 48 | print() 49 | -------------------------------------------------------------------------------- /docs/first_steps.rst: -------------------------------------------------------------------------------- 1 | First Steps 2 | =========== 3 | 4 | .. currentmodule:: cssselect2 5 | 6 | 7 | Installation 8 | ------------ 9 | 10 | The easiest way to use tinycss2 is to install it in a Python `virtual 11 | environment`_. When your virtual environment is activated, you can then install 12 | cssselect2 with pip_:: 13 | 14 | pip install cssselect2 15 | 16 | This will also automatically install tinycss2’s only dependencies, tinycss2_ 17 | and webencodings_. cssselect2, tinycss2 and webencodings only contain Python 18 | code and should work on any Python implementation. 19 | 20 | cssselect2 also is packaged for many Linux distributions (Debian, Ubuntu, 21 | Fedora, Archlinux, Gentoo…). 22 | 23 | .. _virtual environment: https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/ 24 | .. _pip: https://pip.pypa.io/ 25 | .. _webencodings: https://pythonhosted.org/webencodings/ 26 | .. _tinycss2: https://doc.courtbouillon.org/tinycss2/ 27 | 28 | 29 | Basic Example 30 | ------------- 31 | 32 | Here is a classical cssselect2 workflow: 33 | 34 | - parse a CSS stylesheet using tinycss2_, 35 | - store the CSS rules in a :class:`Matcher` object, 36 | - parse an HTML document using an ElementTree-like parser, 37 | - wrap the HTML tree in a :class:`ElementWrapper` object, 38 | - find the CSS rules matching each HTML tag, using the matcher and the wrapper. 39 | 40 | .. literalinclude:: example.py 41 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | cssselect2 2 | ========== 3 | 4 | .. currentmodule:: cssselect2 5 | 6 | .. include:: ../README.rst 7 | 8 | .. toctree:: 9 | :caption: Documentation 10 | :maxdepth: 2 11 | 12 | first_steps 13 | api_reference 14 | 15 | .. toctree:: 16 | :caption: Extra Information 17 | :maxdepth: 2 18 | 19 | changelog 20 | contribute 21 | support 22 | -------------------------------------------------------------------------------- /docs/support.rst: -------------------------------------------------------------------------------- 1 | Support 2 | ======= 3 | 4 | 5 | Sponsorship 6 | ----------- 7 | 8 | With `donations and sponsorship`_, you help the projects to be 9 | better. Donations allow the CourtBouillon team to have more time dedicated to 10 | add new features, fix bugs, and improve documentation. 11 | 12 | .. _donations and sponsorship: https://opencollective.com/courtbouillon 13 | 14 | 15 | Professionnal Support 16 | --------------------- 17 | 18 | You can improve your experience with CourtBouillon’s tools thanks to our 19 | professional support. You want bugs fixed as soon as possible? You projects 20 | would highly benefit from some new features? You or your team would like to get 21 | new skills with one of the technologies we master? 22 | 23 | Please contact us by mail_, by chat_ or by tweet_ to get in touch and find the 24 | best way we can help you. 25 | 26 | .. _mail: mailto:contact@courtbouillon.org 27 | .. _chat: https://gitter.im/CourtBouillon/cssselect2 28 | .. _tweet: https://twitter.com/BouillonCourt 29 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ['flit_core >=3.2,<4'] 3 | build-backend = 'flit_core.buildapi' 4 | 5 | [project] 6 | name = 'cssselect2' 7 | description = 'CSS selectors for Python ElementTree' 8 | keywords = ['css', 'elementtree'] 9 | authors = [{name = 'Simon Sapin', email = 'simon.sapin@exyr.org'}] 10 | maintainers = [{name = 'CourtBouillon', email = 'contact@courtbouillon.org'}] 11 | requires-python = '>=3.9' 12 | readme = {file = 'README.rst', content-type = 'text/x-rst'} 13 | license = {file = 'LICENSE'} 14 | dependencies = ['tinycss2', 'webencodings'] 15 | classifiers = [ 16 | 'Development Status :: 5 - Production/Stable', 17 | 'Intended Audience :: Developers', 18 | 'License :: OSI Approved :: BSD License', 19 | 'Operating System :: OS Independent', 20 | 'Programming Language :: Python', 21 | 'Programming Language :: Python :: 3', 22 | 'Programming Language :: Python :: 3 :: Only', 23 | 'Programming Language :: Python :: 3.9', 24 | 'Programming Language :: Python :: 3.10', 25 | 'Programming Language :: Python :: 3.11', 26 | 'Programming Language :: Python :: 3.12', 27 | 'Programming Language :: Python :: 3.13', 28 | 'Programming Language :: Python :: Implementation :: CPython', 29 | 'Programming Language :: Python :: Implementation :: PyPy', 30 | 'Topic :: Internet :: WWW/HTTP', 31 | ] 32 | dynamic = ['version'] 33 | 34 | [project.urls] 35 | Homepage = 'https://doc.courtbouillon.org/cssselect2/' 36 | Documentation = 'https://doc.courtbouillon.org/cssselect2/' 37 | Code = 'https://github.com/Kozea/cssselect2/' 38 | Issues = 'https://github.com/Kozea/cssselect2/issues' 39 | Changelog = 'https://github.com/Kozea/cssselect2/releases' 40 | Donation = 'https://opencollective.com/courtbouillon' 41 | 42 | [project.optional-dependencies] 43 | doc = ['sphinx', 'furo'] 44 | test = ['pytest', 'ruff'] 45 | 46 | [tool.flit.sdist] 47 | exclude = ['.*'] 48 | 49 | [tool.coverage.run] 50 | branch = true 51 | include = ['tests/*', 'cssselect2/*'] 52 | 53 | [tool.coverage.report] 54 | exclude_lines = ['pragma: no cover', 'def __repr__', 'raise NotImplementedError'] 55 | omit = ['.*'] 56 | 57 | [tool.ruff.lint] 58 | select = ['E', 'W', 'F', 'I', 'N', 'RUF'] 59 | ignore = ['RUF001', 'RUF002', 'RUF003'] 60 | 61 | [tool.ruff.lint.extend-per-file-ignores] 62 | 'docs/example.py' = ['I001'] 63 | -------------------------------------------------------------------------------- /tests/LICENSE: -------------------------------------------------------------------------------- 1 | These files are taken form the web-platform-test repository 2 | and used under a 3-clause BSD License. 3 | 4 | https://github.com/w3c/web-platform-tests/tree/master/selectors-api 5 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Test suite for cssselect2. 4 | 5 | """ 6 | -------------------------------------------------------------------------------- /tests/content.xhtml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Selectors-API Test Suite: HTML with Selectors Level 2 using TestHarness: Test Document 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 |
14 | 15 |
16 |

Universal selector tests inside element with id="universal".

17 |
18 |
Some preformatted text with some embedded code
19 |

This is a normal link: W3C

20 |
Some more nested elements code hyperlink
21 |
22 | 23 |
24 |
25 |
26 |
27 |
28 |

29 |

 30 | 		
31 |
    32 | 33 | 39 | 45 | 51 |
    52 | 53 |
    54 |
    55 |
    56 |
    57 |
    58 | 59 |
    60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 |
    70 | 71 |
    72 |
    73 | 74 |
    75 |
    76 |
    77 |
    78 | 79 |
    80 |
    81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 |

    91 |
    92 | 93 |
    94 |
    95 |
    96 |
    97 |
    98 |
    99 | 100 |
    101 | 102 | 103 | 104 | 105 |
    106 |
    107 |
    108 |
    109 |
    110 | 111 |

    112 |
    113 | 114 |
    115 | 116 | 117 | 118 | 119 |
    120 |
    121 |
    122 |
    123 | 124 |

    125 |
    126 | 127 |
    128 | 129 | 130 | 131 | 132 |
    133 |
    134 |
    135 |
    136 |
    137 |
    138 | 139 |

    140 |
    141 | 142 |
    143 | 144 | 145 | 146 | 147 |
    148 | 149 |
      150 |
    1. 151 |
    2. 152 |
    3. 153 |
    4. 154 |
    5. 155 |
    6. 156 |
    7. 157 |
    8. 158 |
    9. 159 |
    10. 160 |
    11. 161 |
    12. 162 |
    163 | 164 |

    165 | span1 166 | em1 167 | 168 | em2 169 | span2 170 | strong1 171 | em3 172 | span3 173 | span4 174 | strong2 175 | em4 176 |

    177 |
    178 | 179 |
    180 |
    181 |
    182 |
    183 | 184 |

    185 |

    186 |

    187 |
    188 | 189 |
    190 |

    191 |

    192 |

    193 | 194 |
    195 |
    196 |
    197 |
    198 | 199 |
    200 |

    201 | 202 |

    203 |

    204 | 205 | 206 |

    207 |

    208 | 209 | 210 | 211 |

    212 |
    > 213 | 214 |
    215 |

    216 |

    217 |

    218 |

    Text node

    219 |

    220 |
    221 | 222 | 231 | 232 |
    233 |
    234 |
    235 |
    236 |
    237 |
    238 | 239 |
    240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 |
    264 | 265 |
    266 |
    267 |
    268 |
    269 | 270 |

    271 |

    272 |

    273 |
    274 | 275 |
    All pseudo-element tests
    276 | 277 |
    278 |

    279 |

    280 |

    281 | 282 | 283 |
    284 |
    285 |

    286 |
    287 |

    288 |
    289 |
    290 |
    291 |
    292 | 293 | 294 | 295 | 296 | 297 | 298 |
    299 | 300 |
    301 |
    302 |
    303 | 304 |
      305 |
    • 306 |
    • 307 |
    • 308 |
    • 309 |
    310 | 311 | 312 | 313 | 314 | 315 | 316 |
    317 | 318 |
    319 |
    320 |
    321 |
    322 |
    323 |
    324 |
    325 |
    326 |
    327 | 328 |
    329 |
    330 |
    331 |
    332 |
    333 |
    334 |
    335 |
    336 |
    337 | 338 |
    339 |
    340 |
    341 |
    342 |
    343 |
    344 |

    345 |
    346 |
    347 |
    348 |

    349 |

    350 |
    351 | 352 |
    353 |
    354 |
    355 |
    356 |
    357 |
    358 |

    359 |
    360 |
    361 |
    362 |

    363 |

    364 |
    365 | 366 |
    367 | 368 | 369 |
    370 |
    371 | 372 | 373 | -------------------------------------------------------------------------------- /tests/ids.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
    6 | 7 | 8 | 9 | link 10 |
      11 |
    1. content
    2. 12 |
    3. 13 |
      14 |
      15 |
    4. 16 |
    5. 17 |
    6. 19 |
    7. 20 |
    8. 21 |
    9. 22 |
    23 |

    24 | hi there 25 | guy 26 | 27 | 28 | 29 | 30 | 31 | 32 | 34 |

    35 | 36 | 37 |
    38 |

    39 |
      40 |
    41 | 42 | 43 | 44 | 45 |
    46 |
    48 | 49 | -------------------------------------------------------------------------------- /tests/make_selectors.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import re 5 | from pathlib import Path 6 | from pprint import pformat 7 | from urllib.request import urlopen 8 | 9 | with urlopen('http://wpt.live/dom/nodes/selectors.js') as fd: 10 | js = fd.read().decode() 11 | 12 | js = js.replace(r'\_', '_') # Unescape underscores 13 | js = re.sub(r'/\*.*?\*/', '', js, flags=re.S) # Remove /* comments */ 14 | js = re.sub(r'( +|^)//.*$', '', js, flags=re.M) # Remove // comments 15 | js = re.sub(r',\s+testType:[0-fx\s\|]+\}', '}', js) # Remove testType 16 | js = re.sub(r'(\{|,\s+)(\w+):', r'\1"\2":', js) # Use strings for keys 17 | js = re.sub(r',\s+(\]|\})', r'\1', js, flags=re.M) # Remove trailing commas 18 | 19 | invalid_selectors = json.loads(re.search( 20 | r'var invalidSelectors = (\[.*?\]);', js, flags=re.S).group(1)) 21 | valid_selectors = json.loads(re.search( 22 | r'var validSelectors = (\[.*?\]);', js, flags=re.S).group(1)) 23 | 24 | python = f'''# File generated by make_selectors.py, do not edit 25 | 26 | invalid_selectors = {pformat(invalid_selectors, indent=4, width=79)} 27 | 28 | valid_selectors = {pformat(valid_selectors, indent=4, width=79)} 29 | ''' 30 | python = python.replace('= [ ', '= [\n ') 31 | python = python.replace(' { ', ' {\n ') 32 | python = python.replace(': [ ', ': [\n ') 33 | python = python.replace(' ', ' ') 34 | 35 | Path(__file__).parent.joinpath('w3_selectors.py').write_text(python) 36 | -------------------------------------------------------------------------------- /tests/shakespeare.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
    8 |
    9 |

    As You Like It

    10 |
    11 | by William Shakespeare 12 |
    13 |
    14 |

    ACT I, SCENE III. A room in the palace.

    15 |
    16 |
    Enter CELIA and ROSALIND
    17 |
    18 |
    CELIA
    19 |
    20 |
    Why, cousin! why, Rosalind! Cupid have mercy! not a word?
    21 |
    22 |
    ROSALIND
    23 |
    24 |
    Not one to throw at a dog.
    25 |
    26 |
    CELIA
    27 |
    28 |
    No, thy words are too precious to be cast away upon
    29 |
    curs; throw some of them at me; come, lame me with reasons.
    30 |
    31 |
    ROSALIND
    32 |
    CELIA
    33 |
    34 |
    But is all this for your father?
    35 |
    36 |
    37 |
    Then there were two cousins laid up; when the one
    38 |
    should be lamed with reasons and the other mad
    39 |
    without any.
    40 |
    41 |
    ROSALIND
    42 |
    43 |
    No, some of it is for my child's father. O, how
    44 |
    full of briers is this working-day world!
    45 |
    46 |
    CELIA
    47 |
    48 |
    They are but burs, cousin, thrown upon thee in
    49 |
    holiday foolery: if we walk not in the trodden
    50 |
    paths our very petticoats will catch them.
    51 |
    52 |
    ROSALIND
    53 |
    54 |
    I could shake them off my coat: these burs are in my heart.
    55 |
    56 |
    CELIA
    57 |
    58 |
    Hem them away.
    59 |
    60 |
    ROSALIND
    61 |
    62 |
    I would try, if I could cry 'hem' and have him.
    63 |
    64 |
    CELIA
    65 |
    66 |
    Come, come, wrestle with thy affections.
    67 |
    68 |
    ROSALIND
    69 |
    70 |
    O, they take the part of a better wrestler than myself!
    71 |
    72 |
    CELIA
    73 |
    74 |
    O, a good wish upon you! you will try in time, in
    75 |
    despite of a fall. But, turning these jests out of
    76 |
    service, let us talk in good earnest: is it
    77 |
    possible, on such a sudden, you should fall into so
    78 |
    strong a liking with old Sir Rowland's youngest son?
    79 |
    80 |
    ROSALIND
    81 |
    82 |
    The duke my father loved his father dearly.
    83 |
    84 |
    CELIA
    85 |
    86 |
    Doth it therefore ensue that you should love his son
    87 |
    dearly? By this kind of chase, I should hate him,
    88 |
    for my father hated his father dearly; yet I hate
    89 |
    not Orlando.
    90 |
    91 |
    ROSALIND
    92 |
    93 |
    No, faith, hate him not, for my sake.
    94 |
    95 |
    CELIA
    96 |
    97 |
    Why should I not? doth he not deserve well?
    98 |
    99 |
    ROSALIND
    100 |
    101 |
    Let me love him for that, and do you love him
    102 |
    because I do. Look, here comes the duke.
    103 |
    104 |
    CELIA
    105 |
    106 |
    With his eyes full of anger.
    107 |
    Enter DUKE FREDERICK, with Lords
    108 |
    109 |
    DUKE FREDERICK
    110 |
    111 |
    Mistress, dispatch you with your safest haste
    112 |
    And get you from our court.
    113 |
    114 |
    ROSALIND
    115 |
    116 |
    Me, uncle?
    117 |
    118 |
    DUKE FREDERICK
    119 |
    120 |
    You, cousin
    121 |
    Within these ten days if that thou be'st found
    122 |
    So near our public court as twenty miles,
    123 |
    Thou diest for it.
    124 |
    125 |
    ROSALIND
    126 |
    127 |
    I do beseech your grace,
    128 |
    Let me the knowledge of my fault bear with me:
    129 |
    If with myself I hold intelligence
    130 |
    Or have acquaintance with mine own desires,
    131 |
    If that I do not dream or be not frantic,--
    132 |
    As I do trust I am not--then, dear uncle,
    133 |
    Never so much as in a thought unborn
    134 |
    Did I offend your highness.
    135 |
    136 |
    DUKE FREDERICK
    137 |
    138 |
    Thus do all traitors:
    139 |
    If their purgation did consist in words,
    140 |
    They are as innocent as grace itself:
    141 |
    Let it suffice thee that I trust thee not.
    142 |
    143 |
    ROSALIND
    144 |
    145 |
    Yet your mistrust cannot make me a traitor:
    146 |
    Tell me whereon the likelihood depends.
    147 |
    148 |
    DUKE FREDERICK
    149 |
    150 |
    Thou art thy father's daughter; there's enough.
    151 |
    152 |
    ROSALIND
    153 |
    154 |
    So was I when your highness took his dukedom;
    155 |
    So was I when your highness banish'd him:
    156 |
    Treason is not inherited, my lord;
    157 |
    Or, if we did derive it from our friends,
    158 |
    What's that to me? my father was no traitor:
    159 |
    Then, good my liege, mistake me not so much
    160 |
    To think my poverty is treacherous.
    161 |
    162 |
    CELIA
    163 |
    164 |
    Dear sovereign, hear me speak.
    165 |
    166 |
    DUKE FREDERICK
    167 |
    168 |
    Ay, Celia; we stay'd her for your sake,
    169 |
    Else had she with her father ranged along.
    170 |
    171 |
    CELIA
    172 |
    173 |
    I did not then entreat to have her stay;
    174 |
    It was your pleasure and your own remorse:
    175 |
    I was too young that time to value her;
    176 |
    But now I know her: if she be a traitor,
    177 |
    Why so am I; we still have slept together,
    178 |
    Rose at an instant, learn'd, play'd, eat together,
    179 |
    And wheresoever we went, like Juno's swans,
    180 |
    Still we went coupled and inseparable.
    181 |
    182 |
    DUKE FREDERICK
    183 |
    184 |
    She is too subtle for thee; and her smoothness,
    185 |
    Her very silence and her patience
    186 |
    Speak to the people, and they pity her.
    187 |
    Thou art a fool: she robs thee of thy name;
    188 |
    And thou wilt show more bright and seem more virtuous
    189 |
    When she is gone. Then open not thy lips:
    190 |
    Firm and irrevocable is my doom
    191 |
    Which I have pass'd upon her; she is banish'd.
    192 |
    193 |
    CELIA
    194 |
    195 |
    Pronounce that sentence then on me, my liege:
    196 |
    I cannot live out of her company.
    197 |
    198 |
    DUKE FREDERICK
    199 |
    200 |
    You are a fool. You, niece, provide yourself:
    201 |
    If you outstay the time, upon mine honour,
    202 |
    And in the greatness of my word, you die.
    203 |
    Exeunt DUKE FREDERICK and Lords
    204 |
    205 |
    CELIA
    206 |
    207 |
    O my poor Rosalind, whither wilt thou go?
    208 |
    Wilt thou change fathers? I will give thee mine.
    209 |
    I charge thee, be not thou more grieved than I am.
    210 |
    211 |
    ROSALIND
    212 |
    213 |
    I have more cause.
    214 |
    215 |
    CELIA
    216 |
    217 |
    Thou hast not, cousin;
    218 |
    Prithee be cheerful: know'st thou not, the duke
    219 |
    Hath banish'd me, his daughter?
    220 |
    221 |
    ROSALIND
    222 |
    223 |
    That he hath not.
    224 |
    225 |
    CELIA
    226 |
    227 |
    No, hath not? Rosalind lacks then the love
    228 |
    Which teacheth thee that thou and I am one:
    229 |
    Shall we be sunder'd? shall we part, sweet girl?
    230 |
    No: let my father seek another heir.
    231 |
    Therefore devise with me how we may fly,
    232 |
    Whither to go and what to bear with us;
    233 |
    And do not seek to take your change upon you,
    234 |
    To bear your griefs yourself and leave me out;
    235 |
    For, by this heaven, now at our sorrows pale,
    236 |
    Say what thou canst, I'll go along with thee.
    237 |
    238 |
    ROSALIND
    239 |
    240 |
    Why, whither shall we go?
    241 |
    242 |
    CELIA
    243 |
    244 |
    To seek my uncle in the forest of Arden.
    245 |
    246 |
    ROSALIND
    247 |
    248 |
    Alas, what danger will it be to us,
    249 |
    Maids as we are, to travel forth so far!
    250 |
    Beauty provoketh thieves sooner than gold.
    251 |
    252 |
    CELIA
    253 |
    254 |
    I'll put myself in poor and mean attire
    255 |
    And with a kind of umber smirch my face;
    256 |
    The like do you: so shall we pass along
    257 |
    And never stir assailants.
    258 |
    259 |
    ROSALIND
    260 |
    261 |
    Were it not better,
    262 |
    Because that I am more than common tall,
    263 |
    That I did suit me all points like a man?
    264 |
    A gallant curtle-axe upon my thigh,
    265 |
    A boar-spear in my hand; and--in my heart
    266 |
    Lie there what hidden woman's fear there will--
    267 |
    We'll have a swashing and a martial outside,
    268 |
    As many other mannish cowards have
    269 |
    That do outface it with their semblances.
    270 |
    271 |
    CELIA
    272 |
    273 |
    What shall I call thee when thou art a man?
    274 |
    275 |
    ROSALIND
    276 |
    277 |
    I'll have no worse a name than Jove's own page;
    278 |
    And therefore look you call me Ganymede.
    279 |
    But what will you be call'd?
    280 |
    281 |
    CELIA
    282 |
    283 |
    Something that hath a reference to my state
    284 |
    No longer Celia, but Aliena.
    285 |
    286 |
    ROSALIND
    287 |
    288 |
    But, cousin, what if we assay'd to steal
    289 |
    The clownish fool out of your father's court?
    290 |
    Would he not be a comfort to our travel?
    291 |
    292 |
    CELIA
    293 |
    294 |
    He'll go along o'er the wide world with me;
    295 |
    Leave me alone to woo him. Let's away,
    296 |
    And get our jewels and our wealth together,
    297 |
    Devise the fittest time and safest way
    298 |
    To hide us from pursuit that will be made
    299 |
    After my flight. Now go we in content
    300 |
    To liberty and not to banishment.
    301 |
    Exeunt
    302 |
    303 |
    304 |
    305 |
    306 | 307 | 308 | -------------------------------------------------------------------------------- /tests/test_cssselect2.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Test suite for cssselect2. 4 | 5 | """ 6 | 7 | import xml.etree.ElementTree as etree # noqa: N813 8 | from pathlib import Path 9 | 10 | import pytest 11 | 12 | from cssselect2 import ElementWrapper, SelectorError, compile_selector_list 13 | 14 | from .w3_selectors import invalid_selectors, valid_selectors 15 | 16 | CURRENT_FOLDER = Path(__file__).parent 17 | IDS_ROOT = etree.parse(CURRENT_FOLDER / 'ids.html') 18 | ALL_IDS = [ 19 | element.etree_element.get('id', 'nil') for element in 20 | ElementWrapper.from_html_root(IDS_ROOT).query_all('*')] 21 | SHAKESPEARE_BODY = ( 22 | ElementWrapper.from_xml_root( 23 | etree.parse(CURRENT_FOLDER / 'shakespeare.html').find( 24 | './/{http://www.w3.org/1999/xhtml}body'))) 25 | 26 | 27 | def get_test_document(): 28 | document = etree.parse(CURRENT_FOLDER / 'content.xhtml') 29 | parent = document.find(".//*[@id='root']") 30 | 31 | # Setup namespace tests 32 | for id in ('any-namespace', 'no-namespace'): 33 | div = etree.SubElement(parent, '{http://www.w3.org/1999/xhtml}div') 34 | div.set('id', id) 35 | div1 = etree.SubElement(div, '{http://www.w3.org/1999/xhtml}div') 36 | div1.set('id', id + '-div1') 37 | div2 = etree.SubElement(div, '{http://www.w3.org/1999/xhtml}div') 38 | div2.set('id', id + '-div2') 39 | div3 = etree.SubElement(div, 'div') 40 | div3.set('id', id + '-div3') 41 | div4 = etree.SubElement(div, '{http://www.example.org/ns}div') 42 | div4.set('id', id + '-div4') 43 | 44 | return document 45 | 46 | 47 | TEST_DOCUMENT = get_test_document() 48 | 49 | 50 | # Remove unsuitable tests 51 | valid_selectors = [ 52 | test for test in valid_selectors 53 | if not set(test.get('exclude', ())) & {'document', 'xhtml'}] 54 | 55 | # Mark failing tests 56 | for failing in (2, 9, 104, 105, 111, 197, 198): 57 | valid_selectors[failing] = pytest.param( 58 | valid_selectors[failing], marks=pytest.mark.xfail) 59 | 60 | 61 | @pytest.mark.parametrize('test', invalid_selectors) 62 | def test_invalid_selectors(test): 63 | try: 64 | compile_selector_list(test['selector']) 65 | except SelectorError: 66 | pass 67 | else: # pragma: no cover 68 | raise AssertionError( 69 | f'Should be invalid: {test["selector"]!r} ({test["name"]})') 70 | 71 | 72 | @pytest.mark.parametrize('test', valid_selectors) 73 | def test_valid_selectors(test): 74 | root = ElementWrapper.from_xml_root(TEST_DOCUMENT) 75 | result = [element.id for element in root.query_all(test['selector'])] 76 | if result != test['expect']: # pragma: no cover 77 | raise AssertionError( 78 | f'{test["selector"]!r}: {result} != {test["expect"]} ({test["name"]})') 79 | 80 | 81 | def test_lang(): 82 | doc = etree.fromstring(''' 83 | 84 | ''') 85 | assert not ElementWrapper.from_xml_root(doc).matches(':lang(fr)') 86 | 87 | doc = etree.fromstring(''' 88 | 89 | 90 | 91 | ''') 92 | root = ElementWrapper.from_xml_root(doc, content_language='en') 93 | assert root.matches(':lang(fr)') 94 | 95 | doc = etree.fromstring(''' 96 | 97 | 98 | 99 | ''') 100 | root = ElementWrapper.from_xml_root(doc, content_language='en') 101 | assert root.matches(':lang(en)') 102 | 103 | doc = etree.fromstring('') 104 | root = ElementWrapper.from_xml_root(doc, content_language='en') 105 | assert root.matches(':lang(en)') 106 | 107 | root = ElementWrapper.from_xml_root(doc, content_language='en, es') 108 | assert not root.matches(':lang(en)') 109 | 110 | root = ElementWrapper.from_xml_root(doc) 111 | assert not root.matches(':lang(en)') 112 | 113 | doc = etree.fromstring('') 114 | root = ElementWrapper.from_html_root(doc) 115 | assert root.matches(':lang(en)') 116 | 117 | doc = etree.fromstring('') 118 | root = ElementWrapper.from_xml_root(doc) 119 | assert not root.matches(':lang(en)') 120 | 121 | 122 | @pytest.mark.parametrize('selector, result', ( 123 | ('*', ALL_IDS), 124 | ('div', ['outer-div', 'li-div', 'foobar-div']), 125 | ('div div', ['li-div']), 126 | ('div, div div', ['outer-div', 'li-div', 'foobar-div']), 127 | ('div , div div', ['outer-div', 'li-div', 'foobar-div']), 128 | ('a[name]', ['name-anchor']), 129 | ('a[rel]', ['tag-anchor', 'nofollow-anchor']), 130 | ('a[rel="tag"]', ['tag-anchor']), 131 | ('a[href*="localhost"]', ['tag-anchor']), 132 | ('a[href*=""]', []), 133 | ('a[href^="http"]', ['tag-anchor', 'nofollow-anchor']), 134 | ('a[href^="http:"]', ['tag-anchor']), 135 | ('a[href^=""]', []), 136 | ('a[href$="org"]', ['nofollow-anchor']), 137 | ('a[href$=""]', []), 138 | ('div[foobar~="bc"]', ['foobar-div']), 139 | ('div[foobar~="cde"]', ['foobar-div']), 140 | ('[foobar~="ab bc"]', []), 141 | ('[foobar~=""]', []), 142 | ('[foobar~=" \t"]', []), 143 | ('div[foobar~="cd"]', []), 144 | 145 | ('a[rel="tAg"]', []), 146 | ('a[rel="tAg" s]', []), 147 | ('a[rel="tAg" i]', ['tag-anchor']), 148 | ('a[href*="localHOST"]', []), 149 | ('a[href*="localHOST" s]', []), 150 | ('a[href*="localHOST" i]', ['tag-anchor']), 151 | ('a[href^="hTtp"]', []), 152 | ('a[href^="hTtp" s]', []), 153 | ('a[href^="hTtp" i]', ['tag-anchor', 'nofollow-anchor']), 154 | ('a[href$="Org"]', []), 155 | ('a[href$="Org" S]', []), 156 | ('a[href$="Org" I]', ['nofollow-anchor']), 157 | ('div[foobar~="BC"]', []), 158 | ('div[foobar~="BC" s]', []), 159 | ('div[foobar~="BC" i]', ['foobar-div']), 160 | 161 | # Attribute values are case sensitive… 162 | ('*[lang|="En"]', ['second-li']), 163 | ('[lang|="En-us"]', ['second-li']), 164 | ('*[lang|="en"]', []), 165 | ('[lang|="en-US"]', []), 166 | ('*[lang|="e"]', []), 167 | # … but :lang() is not. 168 | (':lang(EN)', ['second-li', 'li-div']), 169 | ('*:lang(en-US)', ['second-li', 'li-div']), 170 | (':lang(En)', ['second-li', 'li-div']), 171 | (':lang(e)', []), 172 | (':lang("en-US")', ['second-li', 'li-div']), 173 | pytest.param( 174 | ':lang("*-US")', ['second-li', 'li-div'], marks=pytest.mark.xfail), 175 | pytest.param( 176 | ':lang(\\*-US)', ['second-li', 'li-div'], marks=pytest.mark.xfail), 177 | (':lang(en /* English */, fr /* French */)', ['second-li', 'li-div']), 178 | 179 | ('li:nth-child(3)', ['third-li']), 180 | ('li:nth-child(10)', []), 181 | ('li:nth-child(2n)', ['second-li', 'fourth-li', 'sixth-li']), 182 | ('li:nth-child(even)', ['second-li', 'fourth-li', 'sixth-li']), 183 | ('li:nth-child(+2n+0)', ['second-li', 'fourth-li', 'sixth-li']), 184 | ('li:nth-child(2n+1)', ['first-li', 'third-li', 'fifth-li', 'seventh-li']), 185 | ('li:nth-child(odd)', ['first-li', 'third-li', 'fifth-li', 'seventh-li']), 186 | ('li:nth-child(2n+4)', ['fourth-li', 'sixth-li']), 187 | ('li:nth-child(3n+1)', ['first-li', 'fourth-li', 'seventh-li']), 188 | ('p > input:nth-child(2n of p input[type=checkbox])', [ 189 | 'checkbox-disabled', 'checkbox-disabled-checked']), 190 | ('li:nth-last-child(1)', ['seventh-li']), 191 | ('li:nth-last-child(0)', []), 192 | ('li:nth-last-child(2n+2)', ['second-li', 'fourth-li', 'sixth-li']), 193 | ('li:nth-last-child(even)', ['second-li', 'fourth-li', 'sixth-li']), 194 | ('li:nth-last-child(2n+4)', ['second-li', 'fourth-li']), 195 | (':nth-last-child(1 of [type=checkbox])', [ 196 | 'checkbox-disabled-checked', 'checkbox-fieldset-disabled']), 197 | ('ol:first-of-type', ['first-ol']), 198 | ('ol:nth-child(1)', []), 199 | ('ol:nth-of-type(2)', ['second-ol']), 200 | (':nth-of-type(1 of .e)', ['tag-anchor', 'first-ol']), 201 | ('ol:nth-last-of-type(2)', ['first-ol']), 202 | (':nth-last-of-type(1 of .e)', ['tag-anchor', 'second-ol']), 203 | ('span:only-child', ['foobar-span']), 204 | ('div:only-child', ['li-div']), 205 | ('div *:only-child', ['li-div', 'foobar-span']), 206 | ('p *:only-of-type', ['p-em', 'fieldset']), 207 | ('p:only-of-type', ['paragraph']), 208 | 209 | ('a:empty', ['name-anchor']), 210 | ('a:EMpty', ['name-anchor']), 211 | ('li:empty', ['third-li', 'fourth-li', 'fifth-li', 'sixth-li']), 212 | (':root', ['html']), 213 | ('html:root', ['html']), 214 | ('li:root', []), 215 | ('* :root', []), 216 | ('.a', ['first-ol']), 217 | ('.b', ['first-ol']), 218 | ('*.a', ['first-ol']), 219 | ('ol.a', ['first-ol']), 220 | ('.c', ['first-ol', 'third-li', 'fourth-li']), 221 | ('*.c', ['first-ol', 'third-li', 'fourth-li']), 222 | ('ol *.c', ['third-li', 'fourth-li']), 223 | ('ol li.c', ['third-li', 'fourth-li']), 224 | ('li ~ li.c', ['third-li', 'fourth-li']), 225 | ('ol > li.c', ['third-li', 'fourth-li']), 226 | ('#first-li', ['first-li']), 227 | ('li#first-li', ['first-li']), 228 | ('*#first-li', ['first-li']), 229 | ('li div', ['li-div']), 230 | ('li > div', ['li-div']), 231 | ('div div', ['li-div']), 232 | ('div > div', []), 233 | ('div>.c', ['first-ol']), 234 | ('div > .c', ['first-ol']), 235 | ('div + div', ['foobar-div']), 236 | ('a ~ a', ['tag-anchor', 'nofollow-anchor']), 237 | ('a[rel="tag"] ~ a', ['nofollow-anchor']), 238 | ('ol#first-ol li:last-child', ['seventh-li']), 239 | ('ol#first-ol *:last-child', ['li-div', 'seventh-li']), 240 | ('#outer-div:first-child', ['outer-div']), 241 | ('#outer-div :first-child', [ 242 | 'name-anchor', 'first-li', 'li-div', 'p-b', 243 | 'checkbox-fieldset-disabled', 'area-href']), 244 | ('a[href]', ['tag-anchor', 'nofollow-anchor']), 245 | (':not(*)', []), 246 | ('a:not([href])', ['name-anchor']), 247 | ('ol :Not([class])', [ 248 | 'first-li', 'second-li', 'li-div', 249 | 'fifth-li', 'sixth-li', 'seventh-li']), 250 | ('li:not(:nth-child(odd), #second-li)', ['fourth-li', 'sixth-li']), 251 | ('li:not(li)', []), 252 | (':is(*)', ALL_IDS), 253 | (':is(div)', ['outer-div', 'li-div', 'foobar-div']), 254 | (':is(div, fieldset)', ['outer-div', 'li-div', 'fieldset', 'foobar-div']), 255 | (':is(:::wrong)', []), 256 | (':is(div, :::wrong, fieldset)', [ 257 | 'outer-div', 'li-div', 'fieldset', 'foobar-div']), 258 | ('div :is(div, div)', ['li-div']), 259 | ('li:is(.c)', ['third-li', 'fourth-li']), 260 | ('input:is([type="text"])', ['text-checked']), 261 | ('div:is(:not(#outer-div))', ['li-div', 'foobar-div']), 262 | ('div:is(div::before)', []), 263 | (':where(*)', ALL_IDS), 264 | (':where(div)', ['outer-div', 'li-div', 'foobar-div']), 265 | (':where(div, fieldset)', [ 266 | 'outer-div', 'li-div', 'fieldset', 'foobar-div']), 267 | (':where(:::wrong)', []), 268 | (':where(div, :::wrong, fieldset)', [ 269 | 'outer-div', 'li-div', 'fieldset', 'foobar-div']), 270 | ('div :where(div, div)', ['li-div']), 271 | ('li:where(.c)', ['third-li', 'fourth-li']), 272 | ('input:where([type="text"])', ['text-checked']), 273 | ('div:where(:not(#outer-div))', ['li-div', 'foobar-div']), 274 | ('div:where(div::before)', []), 275 | ('p:has(input)', ['paragraph']), 276 | ('p:has(fieldset input)', ['paragraph']), 277 | ('p:has(> fieldset)', ['paragraph']), 278 | ('ol:has(> div)', []), 279 | ('ol:has(input, li)', ['first-ol']), 280 | ('ol:has(input, fieldset)', []), 281 | ('ol:has(+ p)', ['first-ol']), 282 | ('ol:has(~ ol)', ['first-ol']), 283 | ('ol:has(>a, ~ ol)', ['first-ol']), 284 | ('ol:has(a,ol, li )', ['first-ol']), 285 | ('ol:has(*)', ['first-ol']), 286 | ('ol:has(:not(li))', ['first-ol']), 287 | ('ol:has( > :not( li ))', []), 288 | ('ol:has(:not(li, div))', []), 289 | 290 | # Invalid characters in XPath element names, should not crash 291 | (r'di\a0 v', []), 292 | (r'div\[', []), 293 | (r'[h\a0 ref]', []), 294 | (r'[h\]ref]', []), 295 | 296 | (':link', ['link-href', 'tag-anchor', 'nofollow-anchor', 'area-href']), 297 | (':any-link', ['link-href', 'tag-anchor', 'nofollow-anchor', 'area-href']), 298 | (':local-link', ['link-href', 'area-href']), 299 | (':visited', []), 300 | (':hover', []), 301 | (':active', []), 302 | (':focus', []), 303 | (':target', []), 304 | (':enabled', [ 305 | 'link-href', 'tag-anchor', 'nofollow-anchor', 'checkbox-unchecked', 306 | 'text-checked', 'input-hidden', 'checkbox-checked', 'area-href']), 307 | (':disabled', [ 308 | 'checkbox-disabled', 'input-hidden-disabled', 309 | 'checkbox-disabled-checked', 'fieldset', 'checkbox-fieldset-disabled', 310 | 'hidden-fieldset-disabled']), 311 | (':checked', ['checkbox-checked', 'checkbox-disabled-checked']), 312 | 313 | ('a:not([href]), div div', ['name-anchor', 'li-div']), 314 | ('a:not([href]) /* test */, div div', ['name-anchor', 'li-div']), 315 | ('a:not([href]), /* test */ div div', ['name-anchor', 'li-div']), 316 | ('/* test */a:not([href]),div div', ['name-anchor', 'li-div']), 317 | ('a:not([href]) , div div/* test */', ['name-anchor', 'li-div']), 318 | ('/* test */a:not([href]), /* test */ div div', ['name-anchor', 'li-div']), 319 | ('/* test */a:not([href])/* test */,div div', ['name-anchor', 'li-div']), 320 | ('/* test */ a:not([href]), div/* test */ div', ['name-anchor', 'li-div']), 321 | ('a:not([href]) /* test */,/* test */div div', ['name-anchor', 'li-div']), 322 | )) 323 | def test_select(selector, result): 324 | xml_ids = [ 325 | element.etree_element.get('id', 'nil') for element in 326 | ElementWrapper.from_xml_root(IDS_ROOT).query_all(selector)] 327 | html_ids = [ 328 | element.etree_element.get('id', 'nil') for element in 329 | ElementWrapper.from_html_root(IDS_ROOT).query_all(selector)] 330 | assert xml_ids == html_ids == result 331 | 332 | 333 | @pytest.mark.parametrize('selector, result', ( 334 | ('DIV', ['outer-div', 'li-div', 'foobar-div']), 335 | ('a[NAme]', ['name-anchor']), 336 | ('HTML :link', [ 337 | 'link-href', 'tag-anchor', 'nofollow-anchor', 'area-href']), 338 | )) 339 | def test_html_select(selector, result): 340 | assert not [ 341 | element.etree_element.get('id', 'nil') for element in 342 | ElementWrapper.from_xml_root(IDS_ROOT).query_all(selector)] 343 | assert result == [ 344 | element.etree_element.get('id', 'nil') for element in 345 | ElementWrapper.from_html_root(IDS_ROOT).query_all(selector)] 346 | 347 | 348 | # Data borrowed from http://mootools.net/slickspeed/ 349 | @pytest.mark.parametrize('selector, result', ( 350 | # Changed from original because we’re only searching the body. 351 | # ('*', 252), 352 | ('*', 246), 353 | # ('div:contains(CELIA)', 26), 354 | ('div:only-child', 22), # ? 355 | ('div:nth-child(even)', 106), 356 | ('div:nth-child(2n)', 106), 357 | ('div:nth-child(odd)', 137), 358 | ('div:nth-child(2n+1)', 137), 359 | ('div:nth-child(n)', 243), 360 | ('div:last-child', 53), 361 | ('div:first-child', 51), 362 | ('div > div', 242), 363 | ('div + div', 190), 364 | ('div ~ div', 190), 365 | ('body', 1), 366 | ('body div', 243), 367 | ('div', 243), 368 | ('div div', 242), 369 | ('div div div', 241), 370 | ('div, div, div', 243), 371 | ('div, a, span', 243), 372 | ('.dialog', 51), 373 | ('div.dialog', 51), 374 | ('div .dialog', 51), 375 | ('div.character, div.dialog', 99), 376 | ('div.direction.dialog', 0), 377 | ('div.dialog.direction', 0), 378 | ('div.dialog.scene', 1), 379 | ('div.scene.scene', 1), 380 | ('div.scene .scene', 0), 381 | ('div.direction .dialog ', 0), 382 | ('div .dialog .direction', 4), 383 | ('div.dialog .dialog .direction', 4), 384 | ('#speech5', 1), 385 | ('div#speech5', 1), 386 | ('div #speech5', 1), 387 | ('div.scene div.dialog', 49), 388 | ('div#scene1 div.dialog div', 142), 389 | ('#scene1 #speech1', 1), 390 | ('div[class]', 103), 391 | ('div[class=dialog]', 50), 392 | ('div[class^=dia]', 51), 393 | ('div[class$=log]', 50), 394 | ('div[class*=sce]', 1), 395 | ('div[class|=dialog]', 50), # ? Seems right 396 | # assert count('div[class!=madeup]', 243), # ? Seems right 397 | ('div[class~=dialog]', 51), # ? Seems right 398 | )) 399 | def test_select_shakespeare(selector, result): 400 | assert sum(1 for _ in SHAKESPEARE_BODY.query_all(selector)) == result 401 | -------------------------------------------------------------------------------- /tests/w3_selectors.py: -------------------------------------------------------------------------------- 1 | # File generated by make_selectors.py, do not edit 2 | 3 | invalid_selectors = [ 4 | {'name': 'Empty String', 'selector': ''}, 5 | {'name': 'Invalid character', 'selector': '['}, 6 | {'name': 'Invalid character', 'selector': ']'}, 7 | {'name': 'Invalid character', 'selector': '('}, 8 | {'name': 'Invalid character', 'selector': ')'}, 9 | {'name': 'Invalid character', 'selector': '{'}, 10 | {'name': 'Invalid character', 'selector': '}'}, 11 | {'name': 'Invalid character', 'selector': '<'}, 12 | {'name': 'Invalid character', 'selector': '>'}, 13 | {'name': 'Invalid ID', 'selector': '#'}, 14 | {'name': 'Invalid group of selectors', 'selector': 'div,'}, 15 | {'name': 'Invalid class', 'selector': '.'}, 16 | {'name': 'Invalid class', 'selector': '.5cm'}, 17 | {'name': 'Invalid class', 'selector': '..test'}, 18 | {'name': 'Invalid class', 'selector': '.foo..quux'}, 19 | {'name': 'Invalid class', 'selector': '.bar.'}, 20 | {'name': 'Invalid combinator', 'selector': 'div & address, p'}, 21 | {'name': 'Invalid combinator', 'selector': 'div ++ address, p'}, 22 | {'name': 'Invalid combinator', 'selector': 'div ~~ address, p'}, 23 | {'name': 'Invalid [att=value] selector', 'selector': '[*=test]'}, 24 | {'name': 'Invalid [att=value] selector', 'selector': '[*|*=test]'}, 25 | { 26 | 'name': 'Invalid [att=value] selector', 27 | 'selector': '[class= space unquoted ]'}, 28 | {'name': 'Unknown pseudo-class', 'selector': 'div:example'}, 29 | {'name': 'Unknown pseudo-class', 'selector': ':example'}, 30 | {'name': 'Unknown pseudo-class', 'selector': 'div:linkexample'}, 31 | {'name': 'Unknown pseudo-element', 'selector': 'div::example'}, 32 | {'name': 'Unknown pseudo-element', 'selector': '::example'}, 33 | {'name': 'Invalid pseudo-element', 'selector': ':::before'}, 34 | {'name': 'Invalid pseudo-element', 'selector': ':: before'}, 35 | {'name': 'Undeclared namespace', 'selector': 'ns|div'}, 36 | {'name': 'Undeclared namespace', 'selector': ':not(ns|div)'}, 37 | {'name': 'Invalid arguments', 'selector': ':nth-child(3 of)'}, 38 | {'name': 'Invalid namespace', 'selector': '^|div'}, 39 | {'name': 'Invalid namespace', 'selector': '$|div'}, 40 | {'name': 'Relative selector', 'selector': '>*'}] 41 | 42 | valid_selectors = [ 43 | { 44 | 'exclude': ['element', 'fragment', 'detached'], 45 | 'expect': ['html'], 46 | 'level': 1, 47 | 'name': 'Type selector, matching html element', 48 | 'selector': 'html'}, 49 | { 50 | 'exclude': ['document'], 51 | 'expect': [], 52 | 'level': 1, 53 | 'name': 'Type selector, matching html element', 54 | 'selector': 'html'}, 55 | { 56 | 'exclude': ['element', 'fragment', 'detached'], 57 | 'expect': ['body'], 58 | 'level': 1, 59 | 'name': 'Type selector, matching body element', 60 | 'selector': 'body'}, 61 | { 62 | 'exclude': ['document'], 63 | 'expect': [], 64 | 'level': 1, 65 | 'name': 'Type selector, matching body element', 66 | 'selector': 'body'}, 67 | { 68 | 'expect': [ 69 | 'universal', 70 | 'universal-p1', 71 | 'universal-code1', 72 | 'universal-hr1', 73 | 'universal-pre1', 74 | 'universal-span1', 75 | 'universal-p2', 76 | 'universal-a1', 77 | 'universal-address1', 78 | 'universal-code2', 79 | 'universal-a2'], 80 | 'level': 2, 81 | 'name': 'Universal selector, matching all elements', 82 | 'selector': '*'}, 83 | { 84 | 'expect': [ 85 | 'universal-p1', 86 | 'universal-hr1', 87 | 'universal-pre1', 88 | 'universal-p2', 89 | 'universal-address1'], 90 | 'level': 2, 91 | 'name': 'Universal selector, matching all children of element with ' 92 | 'specified ID', 93 | 'selector': '#universal>*'}, 94 | { 95 | 'expect': [ 96 | 'universal-code1', 97 | 'universal-span1', 98 | 'universal-a1', 99 | 'universal-code2'], 100 | 'level': 2, 101 | 'name': 'Universal selector, matching all grandchildren of element ' 102 | 'with specified ID', 103 | 'selector': '#universal>*>*'}, 104 | { 105 | 'expect': [], 106 | 'level': 2, 107 | 'name': 'Universal selector, matching all children of empty element ' 108 | 'with specified ID', 109 | 'selector': '#empty>*'}, 110 | { 111 | 'expect': [ 112 | 'universal-p1', 113 | 'universal-code1', 114 | 'universal-hr1', 115 | 'universal-pre1', 116 | 'universal-span1', 117 | 'universal-p2', 118 | 'universal-a1', 119 | 'universal-address1', 120 | 'universal-code2', 121 | 'universal-a2'], 122 | 'level': 2, 123 | 'name': 'Universal selector, matching all descendants of element with ' 124 | 'specified ID', 125 | 'selector': '#universal *'}, 126 | { 127 | 'expect': ['attr-presence-div1'], 128 | 'level': 2, 129 | 'name': 'Attribute presence selector, matching align attribute with ' 130 | 'value', 131 | 'selector': '.attr-presence-div1[align]'}, 132 | { 133 | 'expect': ['attr-presence-div2'], 134 | 'level': 2, 135 | 'name': 'Attribute presence selector, matching align attribute with ' 136 | 'empty value', 137 | 'selector': '.attr-presence-div2[align]'}, 138 | { 139 | 'exclude': ['xhtml'], 140 | 'expect': [ 141 | 'attr-presence-a1', 142 | 'attr-presence-span1', 143 | 'attr-presence-i1'], 144 | 'level': 2, 145 | 'name': 'Attribute presence selector, matching title attribute, case ' 146 | 'insensitivity', 147 | 'selector': '#attr-presence [*|TiTlE]'}, 148 | { 149 | 'exclude': ['html'], 150 | 'expect': [], 151 | 'level': 2, 152 | 'name': 'Attribute presence selector, not matching title attribute, ' 153 | 'case sensitivity', 154 | 'selector': '#attr-presence [*|TiTlE]'}, 155 | { 156 | 'expect': ['attr-presence-pre1', 'attr-presence-blockquote1'], 157 | 'level': 2, 158 | 'name': 'Attribute presence selector, matching custom data-* ' 159 | 'attribute', 160 | 'selector': '[data-attr-presence]'}, 161 | { 162 | 'expect': [], 163 | 'level': 2, 164 | 'name': 'Attribute presence selector, not matching attribute with ' 165 | 'similar name', 166 | 'selector': '.attr-presence-div3[align], .attr-presence-div4[align]'}, 167 | { 168 | 'expect': ['attr-presence-ul1'], 169 | 'level': 2, 170 | 'name': 'Attribute presence selector, matching attribute with ' 171 | 'non-ASCII characters', 172 | 'selector': 'ul[data-中文]'}, 173 | { 174 | 'expect': [], 175 | 'level': 2, 176 | 'name': 'Attribute presence selector, not matching default option ' 177 | 'without selected attribute', 178 | 'selector': '#attr-presence-select1 option[selected]'}, 179 | { 180 | 'expect': ['attr-presence-select2-option4'], 181 | 'level': 2, 182 | 'name': 'Attribute presence selector, matching option with selected ' 183 | 'attribute', 184 | 'selector': '#attr-presence-select2 option[selected]'}, 185 | { 186 | 'expect': [ 187 | 'attr-presence-select3-option2', 188 | 'attr-presence-select3-option3'], 189 | 'level': 2, 190 | 'name': 'Attribute presence selector, matching multiple options with ' 191 | 'selected attributes', 192 | 'selector': '#attr-presence-select3 option[selected]'}, 193 | { 194 | 'expect': ['attr-value-div1'], 195 | 'level': 2, 196 | 'name': 'Attribute value selector, matching align attribute with ' 197 | 'value', 198 | 'selector': '#attr-value [align="center"]'}, 199 | { 200 | 'expect': ['attr-value-div1'], 201 | 'level': 2, 202 | 'name': 'Attribute value selector, matching align attribute with ' 203 | 'value, unclosed bracket', 204 | 'selector': '#attr-value [align="center"'}, 205 | { 206 | 'expect': ['attr-value-div2'], 207 | 'level': 2, 208 | 'name': 'Attribute value selector, matching align attribute with ' 209 | 'empty value', 210 | 'selector': '#attr-value [align=""]'}, 211 | { 212 | 'expect': [], 213 | 'level': 2, 214 | 'name': 'Attribute value selector, not matching align attribute with ' 215 | 'partial value', 216 | 'selector': '#attr-value [align="c"]'}, 217 | { 218 | 'expect': [], 219 | 'level': 2, 220 | 'name': 'Attribute value selector, not matching align attribute with ' 221 | 'incorrect value', 222 | 'selector': '#attr-value [align="centera"]'}, 223 | { 224 | 'expect': ['attr-value-div3'], 225 | 'level': 2, 226 | 'name': 'Attribute value selector, matching custom data-* attribute ' 227 | 'with unicode escaped value', 228 | 'selector': '[data-attr-value="\\e9"]'}, 229 | { 230 | 'expect': ['attr-value-div4'], 231 | 'level': 2, 232 | 'name': 'Attribute value selector, matching custom data-* attribute ' 233 | 'with escaped character', 234 | 'selector': '[data-attr-value_foo="\\e9"]'}, 235 | { 236 | 'expect': [ 237 | 'attr-value-input3', 238 | 'attr-value-input4', 239 | 'attr-value-input6', 240 | 'attr-value-input8', 241 | 'attr-value-input9'], 242 | 'level': 2, 243 | 'name': 'Attribute value selector with single-quoted value, matching ' 244 | 'multiple inputs with type attributes', 245 | 'selector': "#attr-value input[type='hidden'],#attr-value " 246 | "input[type='radio']"}, 247 | { 248 | 'expect': [ 249 | 'attr-value-input3', 250 | 'attr-value-input4', 251 | 'attr-value-input6', 252 | 'attr-value-input8', 253 | 'attr-value-input9'], 254 | 'level': 2, 255 | 'name': 'Attribute value selector with double-quoted value, matching ' 256 | 'multiple inputs with type attributes', 257 | 'selector': '#attr-value input[type="hidden"],#attr-value ' 258 | "input[type='radio']"}, 259 | { 260 | 'expect': [ 261 | 'attr-value-input3', 262 | 'attr-value-input4', 263 | 'attr-value-input6', 264 | 'attr-value-input8', 265 | 'attr-value-input9'], 266 | 'level': 2, 267 | 'name': 'Attribute value selector with unquoted value, matching ' 268 | 'multiple inputs with type attributes', 269 | 'selector': '#attr-value input[type=hidden],#attr-value ' 270 | 'input[type=radio]'}, 271 | { 272 | 'expect': ['attr-value-div5'], 273 | 'level': 2, 274 | 'name': 'Attribute value selector, matching attribute with value ' 275 | 'using non-ASCII characters', 276 | 'selector': '[data-attr-value=中文]'}, 277 | { 278 | 'expect': ['attr-whitespace-div1'], 279 | 'level': 2, 280 | 'name': 'Attribute whitespace-separated list selector, matching class ' 281 | 'attribute with value', 282 | 'selector': '#attr-whitespace [class~="div1"]'}, 283 | { 284 | 'expect': [], 285 | 'level': 2, 286 | 'name': 'Attribute whitespace-separated list selector, not matching ' 287 | 'class attribute with empty value', 288 | 'selector': '#attr-whitespace [class~=""]'}, 289 | { 290 | 'expect': [], 291 | 'level': 2, 292 | 'name': 'Attribute whitespace-separated list selector, not matching ' 293 | 'class attribute with partial value', 294 | 'selector': '[data-attr-whitespace~="div"]'}, 295 | { 296 | 'expect': ['attr-whitespace-div4'], 297 | 'level': 2, 298 | 'name': 'Attribute whitespace-separated list selector, matching ' 299 | 'custom data-* attribute with unicode escaped value', 300 | 'selector': '[data-attr-whitespace~="\\0000e9"]'}, 301 | { 302 | 'expect': ['attr-whitespace-div5'], 303 | 'level': 2, 304 | 'name': 'Attribute whitespace-separated list selector, matching ' 305 | 'custom data-* attribute with escaped character', 306 | 'selector': '[data-attr-whitespace_foo~="\\e9"]'}, 307 | { 308 | 'expect': [ 309 | 'attr-whitespace-a1', 310 | 'attr-whitespace-a2', 311 | 'attr-whitespace-a3', 312 | 'attr-whitespace-a5', 313 | 'attr-whitespace-a7'], 314 | 'level': 2, 315 | 'name': 'Attribute whitespace-separated list selector with ' 316 | 'single-quoted value, matching multiple links with rel ' 317 | 'attributes', 318 | 'selector': "#attr-whitespace a[rel~='bookmark'], #attr-whitespace " 319 | "a[rel~='nofollow']"}, 320 | { 321 | 'expect': [ 322 | 'attr-whitespace-a1', 323 | 'attr-whitespace-a2', 324 | 'attr-whitespace-a3', 325 | 'attr-whitespace-a5', 326 | 'attr-whitespace-a7'], 327 | 'level': 2, 328 | 'name': 'Attribute whitespace-separated list selector with ' 329 | 'double-quoted value, matching multiple links with rel ' 330 | 'attributes', 331 | 'selector': '#attr-whitespace a[rel~="bookmark"],#attr-whitespace ' 332 | "a[rel~='nofollow']"}, 333 | { 334 | 'expect': [ 335 | 'attr-whitespace-a1', 336 | 'attr-whitespace-a2', 337 | 'attr-whitespace-a3', 338 | 'attr-whitespace-a5', 339 | 'attr-whitespace-a7'], 340 | 'level': 2, 341 | 'name': 'Attribute whitespace-separated list selector with unquoted ' 342 | 'value, matching multiple links with rel attributes', 343 | 'selector': '#attr-whitespace a[rel~=bookmark], #attr-whitespace ' 344 | 'a[rel~=nofollow]'}, 345 | { 346 | 'expect': [], 347 | 'level': 2, 348 | 'name': 'Attribute whitespace-separated list selector with ' 349 | 'double-quoted value, not matching value with space', 350 | 'selector': '#attr-whitespace a[rel~="book mark"]'}, 351 | { 352 | 'expect': ['attr-whitespace-p1'], 353 | 'level': 2, 354 | 'name': 'Attribute whitespace-separated list selector, matching title ' 355 | 'attribute with value using non-ASCII characters', 356 | 'selector': '#attr-whitespace [title~=中文]'}, 357 | { 358 | 'expect': [], 359 | 'level': 2, 360 | 'name': 'Attribute hyphen-separated list selector, not matching ' 361 | 'unspecified lang attribute', 362 | 'selector': '#attr-hyphen-div1[lang|="en"]'}, 363 | { 364 | 'expect': ['attr-hyphen-div2'], 365 | 'level': 2, 366 | 'name': 'Attribute hyphen-separated list selector, matching lang ' 367 | 'attribute with exact value', 368 | 'selector': '#attr-hyphen-div2[lang|="fr"]'}, 369 | { 370 | 'expect': ['attr-hyphen-div3'], 371 | 'level': 2, 372 | 'name': 'Attribute hyphen-separated list selector, matching lang ' 373 | 'attribute with partial value', 374 | 'selector': '#attr-hyphen-div3[lang|="en"]'}, 375 | { 376 | 'expect': [], 377 | 'level': 2, 378 | 'name': 'Attribute hyphen-separated list selector, not matching ' 379 | 'incorrect value', 380 | 'selector': '#attr-hyphen-div4[lang|="es-AR"]'}, 381 | { 382 | 'expect': ['attr-begins-a1', 'attr-begins-a3'], 383 | 'level': 3, 384 | 'name': 'Attribute begins with selector, matching href attributes ' 385 | 'beginning with specified substring', 386 | 'selector': '#attr-begins a[href^="http://www"]'}, 387 | { 388 | 'expect': ['attr-begins-div2', 'attr-begins-div4'], 389 | 'level': 3, 390 | 'name': 'Attribute begins with selector, matching lang attributes ' 391 | 'beginning with specified substring, ', 392 | 'selector': '#attr-begins [lang^="en-"]'}, 393 | { 394 | 'expect': [], 395 | 'level': 3, 396 | 'name': 'Attribute begins with selector, not matching class attribute ' 397 | 'with empty value', 398 | 'selector': '#attr-begins [class^=""]'}, 399 | { 400 | 'expect': [], 401 | 'level': 3, 402 | 'name': 'Attribute begins with selector, not matching class attribute ' 403 | 'not beginning with specified substring', 404 | 'selector': '#attr-begins [class^=apple]'}, 405 | { 406 | 'expect': ['attr-begins-p1'], 407 | 'level': 3, 408 | 'name': 'Attribute begins with selector with single-quoted value, ' 409 | 'matching class attribute beginning with specified substring', 410 | 'selector': "#attr-begins [class^=' apple']"}, 411 | { 412 | 'expect': ['attr-begins-p1'], 413 | 'level': 3, 414 | 'name': 'Attribute begins with selector with double-quoted value, ' 415 | 'matching class attribute beginning with specified substring', 416 | 'selector': '#attr-begins [class^=" apple"]'}, 417 | { 418 | 'expect': [], 419 | 'level': 3, 420 | 'name': 'Attribute begins with selector with unquoted value, not ' 421 | 'matching class attribute not beginning with specified ' 422 | 'substring', 423 | 'selector': '#attr-begins [class^= apple]'}, 424 | { 425 | 'expect': ['attr-ends-a1', 'attr-ends-a3'], 426 | 'level': 3, 427 | 'name': 'Attribute ends with selector, matching href attributes ' 428 | 'ending with specified substring', 429 | 'selector': '#attr-ends a[href$=".org"]'}, 430 | { 431 | 'expect': ['attr-ends-div2', 'attr-ends-div4'], 432 | 'level': 3, 433 | 'name': 'Attribute ends with selector, matching lang attributes ' 434 | 'ending with specified substring, ', 435 | 'selector': '#attr-ends [lang$="-CH"]'}, 436 | { 437 | 'expect': [], 438 | 'level': 3, 439 | 'name': 'Attribute ends with selector, not matching class attribute ' 440 | 'with empty value', 441 | 'selector': '#attr-ends [class$=""]'}, 442 | { 443 | 'expect': [], 444 | 'level': 3, 445 | 'name': 'Attribute ends with selector, not matching class attribute ' 446 | 'not ending with specified substring', 447 | 'selector': '#attr-ends [class$=apple]'}, 448 | { 449 | 'expect': ['attr-ends-p1'], 450 | 'level': 3, 451 | 'name': 'Attribute ends with selector with single-quoted value, ' 452 | 'matching class attribute ending with specified substring', 453 | 'selector': "#attr-ends [class$='apple ']"}, 454 | { 455 | 'expect': ['attr-ends-p1'], 456 | 'level': 3, 457 | 'name': 'Attribute ends with selector with double-quoted value, ' 458 | 'matching class attribute ending with specified substring', 459 | 'selector': '#attr-ends [class$="apple "]'}, 460 | { 461 | 'expect': [], 462 | 'level': 3, 463 | 'name': 'Attribute ends with selector with unquoted value, not ' 464 | 'matching class attribute not ending with specified substring', 465 | 'selector': '#attr-ends [class$=apple ]'}, 466 | { 467 | 'expect': ['attr-contains-a1', 'attr-contains-a3'], 468 | 'level': 3, 469 | 'name': 'Attribute contains selector, matching href attributes ' 470 | 'beginning with specified substring', 471 | 'selector': '#attr-contains a[href*="http://www"]'}, 472 | { 473 | 'expect': ['attr-contains-a1', 'attr-contains-a2'], 474 | 'level': 3, 475 | 'name': 'Attribute contains selector, matching href attributes ending ' 476 | 'with specified substring', 477 | 'selector': '#attr-contains a[href*=".org"]'}, 478 | { 479 | 'expect': ['attr-contains-a1', 'attr-contains-a3'], 480 | 'level': 3, 481 | 'name': 'Attribute contains selector, matching href attributes ' 482 | 'containing specified substring', 483 | 'selector': '#attr-contains a[href*=".example."]'}, 484 | { 485 | 'expect': ['attr-contains-div2', 'attr-contains-div6'], 486 | 'level': 3, 487 | 'name': 'Attribute contains selector, matching lang attributes ' 488 | 'beginning with specified substring, ', 489 | 'selector': '#attr-contains [lang*="en-"]'}, 490 | { 491 | 'expect': ['attr-contains-div3', 'attr-contains-div5'], 492 | 'level': 3, 493 | 'name': 'Attribute contains selector, matching lang attributes ending ' 494 | 'with specified substring, ', 495 | 'selector': '#attr-contains [lang*="-CH"]'}, 496 | { 497 | 'expect': [], 498 | 'level': 3, 499 | 'name': 'Attribute contains selector, not matching class attribute ' 500 | 'with empty value', 501 | 'selector': '#attr-contains [class*=""]'}, 502 | { 503 | 'expect': ['attr-contains-p1'], 504 | 'level': 3, 505 | 'name': 'Attribute contains selector with single-quoted value, ' 506 | 'matching class attribute beginning with specified substring', 507 | 'selector': "#attr-contains [class*=' apple']"}, 508 | { 509 | 'expect': ['attr-contains-p1'], 510 | 'level': 3, 511 | 'name': 'Attribute contains selector with single-quoted value, ' 512 | 'matching class attribute ending with specified substring', 513 | 'selector': "#attr-contains [class*='orange ']"}, 514 | { 515 | 'expect': ['attr-contains-p1'], 516 | 'level': 3, 517 | 'name': 'Attribute contains selector with single-quoted value, ' 518 | 'matching class attribute containing specified substring', 519 | 'selector': "#attr-contains [class*='ple banana ora']"}, 520 | { 521 | 'expect': ['attr-contains-p1'], 522 | 'level': 3, 523 | 'name': 'Attribute contains selector with double-quoted value, ' 524 | 'matching class attribute beginning with specified substring', 525 | 'selector': '#attr-contains [class*=" apple"]'}, 526 | { 527 | 'expect': ['attr-contains-p1'], 528 | 'level': 3, 529 | 'name': 'Attribute contains selector with double-quoted value, ' 530 | 'matching class attribute ending with specified substring', 531 | 'selector': '#attr-contains [class*="orange "]'}, 532 | { 533 | 'expect': ['attr-contains-p1'], 534 | 'level': 3, 535 | 'name': 'Attribute contains selector with double-quoted value, ' 536 | 'matching class attribute containing specified substring', 537 | 'selector': '#attr-contains [class*="ple banana ora"]'}, 538 | { 539 | 'expect': ['attr-contains-p1'], 540 | 'level': 3, 541 | 'name': 'Attribute contains selector with unquoted value, matching ' 542 | 'class attribute beginning with specified substring', 543 | 'selector': '#attr-contains [class*= apple]'}, 544 | { 545 | 'expect': ['attr-contains-p1'], 546 | 'level': 3, 547 | 'name': 'Attribute contains selector with unquoted value, matching ' 548 | 'class attribute ending with specified substring', 549 | 'selector': '#attr-contains [class*=orange ]'}, 550 | { 551 | 'expect': ['attr-contains-p1'], 552 | 'level': 3, 553 | 'name': 'Attribute contains selector with unquoted value, matching ' 554 | 'class attribute containing specified substring', 555 | 'selector': '#attr-contains [class*= banana ]'}, 556 | { 557 | 'exclude': ['element', 'fragment', 'detached'], 558 | 'expect': ['html'], 559 | 'level': 3, 560 | 'name': ':root pseudo-class selector, matching document root element', 561 | 'selector': ':root'}, 562 | { 563 | 'exclude': ['document'], 564 | 'expect': [], 565 | 'level': 3, 566 | 'name': ':root pseudo-class selector, not matching document root ' 567 | 'element', 568 | 'selector': ':root'}, 569 | { 570 | 'expect': [ 571 | 'pseudo-nth-td3', 572 | 'pseudo-nth-td9', 573 | 'pseudo-nth-tr3', 574 | 'pseudo-nth-td15'], 575 | 'level': 3, 576 | 'name': ':nth-child selector, matching the third child element', 577 | 'selector': '#pseudo-nth-table1 :nth-child(3)'}, 578 | { 579 | 'expect': [ 580 | 'pseudo-nth-li3', 581 | 'pseudo-nth-li6', 582 | 'pseudo-nth-li9', 583 | 'pseudo-nth-li12'], 584 | 'level': 3, 585 | 'name': ':nth-child selector, matching every third child element', 586 | 'selector': '#pseudo-nth li:nth-child(3n)'}, 587 | { 588 | 'expect': [ 589 | 'pseudo-nth-li4', 590 | 'pseudo-nth-li6', 591 | 'pseudo-nth-li8', 592 | 'pseudo-nth-li10', 593 | 'pseudo-nth-li12'], 594 | 'level': 3, 595 | 'name': ':nth-child selector, matching every second child element, ' 596 | 'starting from the fourth', 597 | 'selector': '#pseudo-nth li:nth-child(2n+4)'}, 598 | { 599 | 'expect': ['pseudo-nth-em2', 'pseudo-nth-span3'], 600 | 'level': 3, 601 | 'name': ':nth-child selector, matching every fourth child element, ' 602 | 'starting from the third', 603 | 'selector': '#pseudo-nth-p1 :nth-child(4n-1)'}, 604 | { 605 | 'expect': [ 606 | 'pseudo-nth-tr1', 607 | 'pseudo-nth-td4', 608 | 'pseudo-nth-td10', 609 | 'pseudo-nth-td16'], 610 | 'level': 3, 611 | 'name': ':nth-last-child selector, matching the third last child ' 612 | 'element', 613 | 'selector': '#pseudo-nth-table1 :nth-last-child(3)'}, 614 | { 615 | 'expect': [ 616 | 'pseudo-nth-li1', 617 | 'pseudo-nth-li4', 618 | 'pseudo-nth-li7', 619 | 'pseudo-nth-li10'], 620 | 'level': 3, 621 | 'name': ':nth-last-child selector, matching every third child element ' 622 | 'from the end', 623 | 'selector': '#pseudo-nth li:nth-last-child(3n)'}, 624 | { 625 | 'expect': [ 626 | 'pseudo-nth-li1', 627 | 'pseudo-nth-li3', 628 | 'pseudo-nth-li5', 629 | 'pseudo-nth-li7', 630 | 'pseudo-nth-li9'], 631 | 'level': 3, 632 | 'name': ':nth-last-child selector, matching every second child ' 633 | 'element from the end, starting from the fourth last', 634 | 'selector': '#pseudo-nth li:nth-last-child(2n+4)'}, 635 | { 636 | 'expect': ['pseudo-nth-span2', 'pseudo-nth-span4'], 637 | 'level': 3, 638 | 'name': ':nth-last-child selector, matching every fourth element from ' 639 | 'the end, starting from the third last', 640 | 'selector': '#pseudo-nth-p1 :nth-last-child(4n-1)'}, 641 | { 642 | 'expect': ['pseudo-nth-em3'], 643 | 'level': 3, 644 | 'name': ':nth-of-type selector, matching the third em element', 645 | 'selector': '#pseudo-nth-p1 em:nth-of-type(3)'}, 646 | { 647 | 'expect': [ 648 | 'pseudo-nth-em2', 649 | 'pseudo-nth-span2', 650 | 'pseudo-nth-span4', 651 | 'pseudo-nth-strong2', 652 | 'pseudo-nth-em4'], 653 | 'level': 3, 654 | 'name': ':nth-of-type selector, matching every second element of ' 655 | 'their type', 656 | 'selector': '#pseudo-nth-p1 :nth-of-type(2n)'}, 657 | { 658 | 'expect': ['pseudo-nth-span1', 'pseudo-nth-span3'], 659 | 'level': 3, 660 | 'name': ':nth-of-type selector, matching every second elemetn of ' 661 | 'their type, starting from the first', 662 | 'selector': '#pseudo-nth-p1 span:nth-of-type(2n-1)'}, 663 | { 664 | 'expect': ['pseudo-nth-em2'], 665 | 'level': 3, 666 | 'name': ':nth-last-of-type selector, matching the third last em ' 667 | 'element', 668 | 'selector': '#pseudo-nth-p1 em:nth-last-of-type(3)'}, 669 | { 670 | 'expect': [ 671 | 'pseudo-nth-span1', 672 | 'pseudo-nth-em1', 673 | 'pseudo-nth-strong1', 674 | 'pseudo-nth-em3', 675 | 'pseudo-nth-span3'], 676 | 'level': 3, 677 | 'name': ':nth-last-of-type selector, matching every second last ' 678 | 'element of their type', 679 | 'selector': '#pseudo-nth-p1 :nth-last-of-type(2n)'}, 680 | { 681 | 'expect': ['pseudo-nth-span2', 'pseudo-nth-span4'], 682 | 'level': 3, 683 | 'name': ':nth-last-of-type selector, matching every second last ' 684 | 'element of their type, starting from the last', 685 | 'selector': '#pseudo-nth-p1 span:nth-last-of-type(2n-1)'}, 686 | { 687 | 'expect': ['pseudo-nth-em1'], 688 | 'level': 3, 689 | 'name': ':first-of-type selector, matching the first em element', 690 | 'selector': '#pseudo-nth-p1 em:first-of-type'}, 691 | { 692 | 'expect': ['pseudo-nth-span1', 'pseudo-nth-em1', 'pseudo-nth-strong1'], 693 | 'level': 3, 694 | 'name': ':first-of-type selector, matching the first of every type of ' 695 | 'element', 696 | 'selector': '#pseudo-nth-p1 :first-of-type'}, 697 | { 698 | 'expect': ['pseudo-nth-td1', 'pseudo-nth-td7', 'pseudo-nth-td13'], 699 | 'level': 3, 700 | 'name': ':first-of-type selector, matching the first td element in ' 701 | 'each table row', 702 | 'selector': '#pseudo-nth-table1 tr :first-of-type'}, 703 | { 704 | 'expect': ['pseudo-nth-em4'], 705 | 'level': 3, 706 | 'name': ':last-of-type selector, matching the last em elemnet', 707 | 'selector': '#pseudo-nth-p1 em:last-of-type'}, 708 | { 709 | 'expect': ['pseudo-nth-span4', 'pseudo-nth-strong2', 'pseudo-nth-em4'], 710 | 'level': 3, 711 | 'name': ':last-of-type selector, matching the last of every type of ' 712 | 'element', 713 | 'selector': '#pseudo-nth-p1 :last-of-type'}, 714 | { 715 | 'expect': ['pseudo-nth-td6', 'pseudo-nth-td12', 'pseudo-nth-td18'], 716 | 'level': 3, 717 | 'name': ':last-of-type selector, matching the last td element in each ' 718 | 'table row', 719 | 'selector': '#pseudo-nth-table1 tr :last-of-type'}, 720 | { 721 | 'expect': ['pseudo-first-child-div1'], 722 | 'level': 2, 723 | 'name': ':first-child pseudo-class selector, matching first child div ' 724 | 'element', 725 | 'selector': '#pseudo-first-child div:first-child'}, 726 | { 727 | 'expect': [], 728 | 'level': 2, 729 | 'name': ":first-child pseudo-class selector, doesn't match " 730 | 'non-first-child elements', 731 | 'selector': '.pseudo-first-child-div2:first-child, ' 732 | '.pseudo-first-child-div3:first-child'}, 733 | { 734 | 'expect': [ 735 | 'pseudo-first-child-span1', 736 | 'pseudo-first-child-span3', 737 | 'pseudo-first-child-span5'], 738 | 'level': 2, 739 | 'name': ':first-child pseudo-class selector, matching first-child of ' 740 | 'multiple elements', 741 | 'selector': '#pseudo-first-child span:first-child'}, 742 | { 743 | 'expect': ['pseudo-last-child-div3'], 744 | 'level': 3, 745 | 'name': ':last-child pseudo-class selector, matching last child div ' 746 | 'element', 747 | 'selector': '#pseudo-last-child div:last-child'}, 748 | { 749 | 'expect': [], 750 | 'level': 3, 751 | 'name': ":last-child pseudo-class selector, doesn't match " 752 | 'non-last-child elements', 753 | 'selector': '.pseudo-last-child-div1:last-child, ' 754 | '.pseudo-last-child-div2:first-child'}, 755 | { 756 | 'expect': [ 757 | 'pseudo-last-child-span2', 758 | 'pseudo-last-child-span4', 759 | 'pseudo-last-child-span6'], 760 | 'level': 3, 761 | 'name': ':last-child pseudo-class selector, matching first-child of ' 762 | 'multiple elements', 763 | 'selector': '#pseudo-last-child span:last-child'}, 764 | { 765 | 'expect': ['pseudo-only-span1'], 766 | 'level': 3, 767 | 'name': ':pseudo-only-child pseudo-class selector, matching all ' 768 | 'only-child elements', 769 | 'selector': '#pseudo-only :only-child'}, 770 | { 771 | 'expect': [], 772 | 'level': 3, 773 | 'name': ':pseudo-only-child pseudo-class selector, matching ' 774 | 'only-child em elements', 775 | 'selector': '#pseudo-only em:only-child'}, 776 | { 777 | 'expect': ['pseudo-only-span1', 'pseudo-only-em1'], 778 | 'level': 3, 779 | 'name': ':pseudo-only-of-type pseudo-class selector, matching all ' 780 | 'elements with no siblings of the same type', 781 | 'selector': '#pseudo-only :only-of-type'}, 782 | { 783 | 'expect': ['pseudo-only-em1'], 784 | 'level': 3, 785 | 'name': ':pseudo-only-of-type pseudo-class selector, matching em ' 786 | 'elements with no siblings of the same type', 787 | 'selector': '#pseudo-only em:only-of-type'}, 788 | { 789 | 'expect': ['pseudo-empty-p1', 'pseudo-empty-p2'], 790 | 'level': 3, 791 | 'name': ':empty pseudo-class selector, matching empty p elements', 792 | 'selector': '#pseudo-empty p:empty'}, 793 | { 794 | 'expect': ['pseudo-empty-p1', 'pseudo-empty-p2', 'pseudo-empty-span1'], 795 | 'level': 3, 796 | 'name': ':empty pseudo-class selector, matching all empty elements', 797 | 'selector': '#pseudo-empty :empty'}, 798 | { 799 | 'expect': ['pseudo-link-a1', 'pseudo-link-a2', 'pseudo-link-area1'], 800 | 'level': 1, 801 | 'name': ':link and :visited pseudo-class selectors, matching a and ' 802 | 'area elements with href attributes', 803 | 'selector': '#pseudo-link :link, #pseudo-link :visited'}, 804 | { 805 | 'exclude': ['element', 'fragment', 'detached'], 806 | 'expect': [], 807 | 'level': 1, 808 | 'name': ':link and :visited pseudo-class selectors, matching no ' 809 | 'elements', 810 | 'selector': '#head :link, #head :visited'}, 811 | { 812 | 'exclude': ['document'], 813 | 'expect': [], 814 | 'level': 1, 815 | 'name': ':link and :visited pseudo-class selectors, not matching link ' 816 | 'elements with href attributes', 817 | 'selector': '#head :link, #head :visited'}, 818 | { 819 | 'exclude': ['document'], 820 | 'expect': [], 821 | 'level': 1, 822 | 'name': ':link and :visited pseudo-class selectors, chained, mutually ' 823 | 'exclusive pseudo-classes match nothing', 824 | 'selector': ':link:visited'}, 825 | { 826 | 'exclude': ['document', 'element'], 827 | 'expect': [], 828 | 'level': 3, 829 | 'name': ':target pseudo-class selector, matching the element ' 830 | 'referenced by the URL fragment identifier', 831 | 'selector': ':target'}, 832 | { 833 | 'exclude': ['fragment', 'detached'], 834 | 'expect': ['target'], 835 | 'level': 3, 836 | 'name': ':target pseudo-class selector, matching the element ' 837 | 'referenced by the URL fragment identifier', 838 | 'selector': ':target'}, 839 | { 840 | 'exclude': ['detached', 'fragment'], 841 | 'expect': ['pseudo-lang-div1'], 842 | 'level': 2, 843 | 'name': ':lang pseudo-class selector, matching inherited language', 844 | 'selector': '#pseudo-lang-div1:lang(en)'}, 845 | { 846 | 'exclude': ['document', 'element'], 847 | 'expect': [], 848 | 'level': 2, 849 | 'name': ':lang pseudo-class selector, not matching element with no ' 850 | 'inherited language', 851 | 'selector': '#pseudo-lang-div1:lang(en)'}, 852 | { 853 | 'expect': ['pseudo-lang-div2'], 854 | 'level': 2, 855 | 'name': ':lang pseudo-class selector, matching specified language ' 856 | 'with exact value', 857 | 'selector': '#pseudo-lang-div2:lang(fr)'}, 858 | { 859 | 'expect': ['pseudo-lang-div3'], 860 | 'level': 2, 861 | 'name': ':lang pseudo-class selector, matching specified language ' 862 | 'with partial value', 863 | 'selector': '#pseudo-lang-div3:lang(en)'}, 864 | { 865 | 'expect': [], 866 | 'level': 2, 867 | 'name': ':lang pseudo-class selector, not matching incorrect language', 868 | 'selector': '#pseudo-lang-div4:lang(es-AR)'}, 869 | { 870 | 'expect': [ 871 | 'pseudo-ui-input1', 872 | 'pseudo-ui-input2', 873 | 'pseudo-ui-input3', 874 | 'pseudo-ui-input4', 875 | 'pseudo-ui-input5', 876 | 'pseudo-ui-input6', 877 | 'pseudo-ui-input7', 878 | 'pseudo-ui-input8', 879 | 'pseudo-ui-input9', 880 | 'pseudo-ui-textarea1', 881 | 'pseudo-ui-button1'], 882 | 'level': 3, 883 | 'name': ':enabled pseudo-class selector, matching all enabled form ' 884 | 'controls', 885 | 'selector': '#pseudo-ui :enabled'}, 886 | { 887 | 'expect': [], 888 | 'level': 3, 889 | 'name': ':enabled pseudo-class selector, not matching link elements', 890 | 'selector': '#pseudo-link :enabled', 891 | 'unexpected': [ 892 | 'pseudo-link-a1', 893 | 'pseudo-link-a2', 894 | 'pseudo-link-a3', 895 | 'pseudo-link-map1', 896 | 'pseudo-link-area1', 897 | 'pseudo-link-area2']}, 898 | { 899 | 'expect': [ 900 | 'pseudo-ui-input10', 901 | 'pseudo-ui-input11', 902 | 'pseudo-ui-input12', 903 | 'pseudo-ui-input13', 904 | 'pseudo-ui-input14', 905 | 'pseudo-ui-input15', 906 | 'pseudo-ui-input16', 907 | 'pseudo-ui-input17', 908 | 'pseudo-ui-input18', 909 | 'pseudo-ui-textarea2', 910 | 'pseudo-ui-button2'], 911 | 'level': 3, 912 | 'name': ':disabled pseudo-class selector, matching all disabled form ' 913 | 'controls', 914 | 'selector': '#pseudo-ui :disabled'}, 915 | { 916 | 'expect': [], 917 | 'level': 3, 918 | 'name': ':disabled pseudo-class selector, not matching link elements', 919 | 'selector': '#pseudo-link :disabled', 920 | 'unexpected': [ 921 | 'pseudo-link-a1', 922 | 'pseudo-link-a2', 923 | 'pseudo-link-a3', 924 | 'pseudo-link-map1', 925 | 'pseudo-link-area1', 926 | 'pseudo-link-area2']}, 927 | { 928 | 'expect': [ 929 | 'pseudo-ui-input4', 930 | 'pseudo-ui-input6', 931 | 'pseudo-ui-input13', 932 | 'pseudo-ui-input15'], 933 | 'level': 3, 934 | 'name': ':checked pseudo-class selector, matching checked radio ' 935 | 'buttons and checkboxes', 936 | 'selector': '#pseudo-ui :checked'}, 937 | { 938 | 'expect': ['not-p1', 'not-p2', 'not-p3'], 939 | 'level': 3, 940 | 'name': ':not pseudo-class selector, matching ', 941 | 'selector': '#not>:not(div)'}, 942 | { 943 | 'expect': ['not-em1', 'not-em2', 'not-em3'], 944 | 'level': 3, 945 | 'name': ':not pseudo-class selector, matching ', 946 | 'selector': '#not * :not(:first-child)'}, 947 | { 948 | 'expect': [], 949 | 'level': 3, 950 | 'name': ':not pseudo-class selector, matching nothing', 951 | 'selector': ':not(*)'}, 952 | { 953 | 'expect': [], 954 | 'level': 3, 955 | 'name': ':not pseudo-class selector, matching nothing', 956 | 'selector': ':not(*|*)'}, 957 | { 958 | 'expect': ['not-p1', 'not-p2', 'not-p3'], 959 | 'level': 3, 960 | 'name': ':not pseudo-class selector argument surrounded by spaces, ' 961 | 'matching ', 962 | 'selector': '#not>:not( div )'}, 963 | { 964 | 'expect': [], 965 | 'level': 2, 966 | 'name': ':first-line pseudo-element (one-colon syntax) selector, not ' 967 | 'matching any elements', 968 | 'selector': '#pseudo-element:first-line'}, 969 | { 970 | 'expect': [], 971 | 'level': 3, 972 | 'name': '::first-line pseudo-element (two-colon syntax) selector, not ' 973 | 'matching any elements', 974 | 'selector': '#pseudo-element::first-line'}, 975 | { 976 | 'expect': [], 977 | 'level': 2, 978 | 'name': ':first-letter pseudo-element (one-colon syntax) selector, ' 979 | 'not matching any elements', 980 | 'selector': '#pseudo-element:first-letter'}, 981 | { 982 | 'expect': [], 983 | 'level': 3, 984 | 'name': '::first-letter pseudo-element (two-colon syntax) selector, ' 985 | 'not matching any elements', 986 | 'selector': '#pseudo-element::first-letter'}, 987 | { 988 | 'expect': [], 989 | 'level': 2, 990 | 'name': ':before pseudo-element (one-colon syntax) selector, not ' 991 | 'matching any elements', 992 | 'selector': '#pseudo-element:before'}, 993 | { 994 | 'expect': [], 995 | 'level': 3, 996 | 'name': '::before pseudo-element (two-colon syntax) selector, not ' 997 | 'matching any elements', 998 | 'selector': '#pseudo-element::before'}, 999 | { 1000 | 'expect': [], 1001 | 'level': 2, 1002 | 'name': ':after pseudo-element (one-colon syntax) selector, not ' 1003 | 'matching any elements', 1004 | 'selector': '#pseudo-element:after'}, 1005 | { 1006 | 'expect': [], 1007 | 'level': 3, 1008 | 'name': '::after pseudo-element (two-colon syntax) selector, not ' 1009 | 'matching any elements', 1010 | 'selector': '#pseudo-element::after'}, 1011 | { 1012 | 'expect': ['class-p1', 'class-p2', 'class-p3'], 1013 | 'level': 1, 1014 | 'name': 'Class selector, matching element with specified class', 1015 | 'selector': '.class-p'}, 1016 | { 1017 | 'expect': [ 1018 | 'class-div1', 1019 | 'class-div2', 1020 | 'class-p4', 1021 | 'class-div3', 1022 | 'class-p6', 1023 | 'class-div4'], 1024 | 'level': 1, 1025 | 'name': 'Class selector, chained, matching only elements with all ' 1026 | 'specified classes', 1027 | 'selector': '#class .apple.orange.banana'}, 1028 | { 1029 | 'expect': ['class-div1', 'class-div2', 'class-div3', 'class-div4'], 1030 | 'level': 1, 1031 | 'name': 'Class Selector, chained, with type selector', 1032 | 'selector': 'div.apple.banana.orange'}, 1033 | { 1034 | 'expect': ['class-span1'], 1035 | 'level': 1, 1036 | 'name': 'Class selector, matching element with class value using ' 1037 | 'non-ASCII characters (1)', 1038 | 'selector': '.台北Táiběi'}, 1039 | { 1040 | 'expect': ['class-span1', 'class-span2'], 1041 | 'level': 1, 1042 | 'name': 'Class selector, matching multiple elements with class value ' 1043 | 'using non-ASCII characters', 1044 | 'selector': '.台北'}, 1045 | { 1046 | 'expect': ['class-span1'], 1047 | 'level': 1, 1048 | 'name': 'Class selector, chained, matching element with multiple ' 1049 | 'class values using non-ASCII characters (1)', 1050 | 'selector': '.台北Táiběi.台北'}, 1051 | { 1052 | 'expect': ['class-span3'], 1053 | 'level': 1, 1054 | 'name': 'Class selector, matching element with class with escaped ' 1055 | 'character', 1056 | 'selector': '.foo\\:bar'}, 1057 | { 1058 | 'expect': ['class-span4'], 1059 | 'level': 1, 1060 | 'name': 'Class selector, matching element with class with escaped ' 1061 | 'character', 1062 | 'selector': '.test\\.foo\\[5\\]bar'}, 1063 | { 1064 | 'expect': ['id-div1'], 1065 | 'level': 1, 1066 | 'name': 'ID selector, matching element with specified id', 1067 | 'selector': '#id #id-div1'}, 1068 | { 1069 | 'expect': ['id-div1'], 1070 | 'level': 1, 1071 | 'name': 'ID selector, chained, matching element with specified id', 1072 | 'selector': '#id-div1, #id-div1'}, 1073 | { 1074 | 'expect': ['id-div1', 'id-div2'], 1075 | 'level': 1, 1076 | 'name': 'ID selector, chained, matching element with specified id', 1077 | 'selector': '#id-div1, #id-div2'}, 1078 | { 1079 | 'expect': ['id-div1', 'id-div2'], 1080 | 'level': 1, 1081 | 'name': 'ID Selector, chained, with type selector', 1082 | 'selector': 'div#id-div1, div#id-div2'}, 1083 | { 1084 | 'expect': [], 1085 | 'level': 1, 1086 | 'name': 'ID selector, not matching non-existent descendant', 1087 | 'selector': '#id #none'}, 1088 | { 1089 | 'expect': [], 1090 | 'level': 1, 1091 | 'name': 'ID selector, not matching non-existent ancestor', 1092 | 'selector': '#none #id-div1'}, 1093 | { 1094 | 'expect': [ 1095 | 'id-li-duplicate', 1096 | 'id-li-duplicate', 1097 | 'id-li-duplicate', 1098 | 'id-li-duplicate'], 1099 | 'level': 1, 1100 | 'name': 'ID selector, matching multiple elements with duplicate id', 1101 | 'selector': '#id-li-duplicate'}, 1102 | { 1103 | 'expect': ['台北Táiběi'], 1104 | 'level': 1, 1105 | 'name': 'ID selector, matching id value using non-ASCII characters ' 1106 | '(1)', 1107 | 'selector': '#台北Táiběi'}, 1108 | { 1109 | 'expect': ['台北'], 1110 | 'level': 1, 1111 | 'name': 'ID selector, matching id value using non-ASCII characters ' 1112 | '(2)', 1113 | 'selector': '#台北'}, 1114 | { 1115 | 'expect': ['台北Táiběi', '台北'], 1116 | 'level': 1, 1117 | 'name': 'ID selector, matching id values using non-ASCII characters ' 1118 | '(1)', 1119 | 'selector': '#台北Táiběi, #台北'}, 1120 | { 1121 | 'expect': ['#foo:bar'], 1122 | 'level': 1, 1123 | 'name': 'ID selector, matching element with id with escaped character', 1124 | 'selector': '#\\#foo\\:bar'}, 1125 | { 1126 | 'expect': ['test.foo[5]bar'], 1127 | 'level': 1, 1128 | 'name': 'ID selector, matching element with id with escaped character', 1129 | 'selector': '#test\\.foo\\[5\\]bar'}, 1130 | { 1131 | 'expect': [ 1132 | 'any-namespace-div1', 1133 | 'any-namespace-div2', 1134 | 'any-namespace-div3', 1135 | 'any-namespace-div4'], 1136 | 'level': 3, 1137 | 'name': 'Namespace selector, matching element with any namespace', 1138 | 'selector': '#any-namespace *|div'}, 1139 | { 1140 | 'expect': ['no-namespace-div3'], 1141 | 'level': 3, 1142 | 'name': 'Namespace selector, matching div elements in no namespace ' 1143 | 'only', 1144 | 'selector': '#no-namespace |div'}, 1145 | { 1146 | 'expect': ['no-namespace-div3'], 1147 | 'level': 3, 1148 | 'name': 'Namespace selector, matching any elements in no namespace ' 1149 | 'only', 1150 | 'selector': '#no-namespace |*'}, 1151 | { 1152 | 'expect': [ 1153 | 'descendant-div1', 1154 | 'descendant-div2', 1155 | 'descendant-div3', 1156 | 'descendant-div4'], 1157 | 'level': 1, 1158 | 'name': 'Descendant combinator, matching element that is a descendant ' 1159 | 'of an element with id', 1160 | 'selector': '#descendant div'}, 1161 | { 1162 | 'exclude': ['detached', 'fragment'], 1163 | 'expect': ['descendant-div1'], 1164 | 'level': 1, 1165 | 'name': 'Descendant combinator, matching element with id that is a ' 1166 | 'descendant of an element', 1167 | 'selector': 'body #descendant-div1'}, 1168 | { 1169 | 'expect': ['descendant-div1'], 1170 | 'level': 1, 1171 | 'name': 'Descendant combinator, matching element with id that is a ' 1172 | 'descendant of an element', 1173 | 'selector': 'div #descendant-div1'}, 1174 | { 1175 | 'expect': ['descendant-div2'], 1176 | 'level': 1, 1177 | 'name': 'Descendant combinator, matching element with id that is a ' 1178 | 'descendant of an element with id', 1179 | 'selector': '#descendant #descendant-div2'}, 1180 | { 1181 | 'expect': ['descendant-div2'], 1182 | 'level': 1, 1183 | 'name': 'Descendant combinator, matching element with class that is a ' 1184 | 'descendant of an element with id', 1185 | 'selector': '#descendant .descendant-div2'}, 1186 | { 1187 | 'expect': ['descendant-div3'], 1188 | 'level': 1, 1189 | 'name': 'Descendant combinator, matching element with class that is a ' 1190 | 'descendant of an element with class', 1191 | 'selector': '.descendant-div1 .descendant-div3'}, 1192 | { 1193 | 'expect': [], 1194 | 'level': 1, 1195 | 'name': 'Descendant combinator, not matching element with id that is ' 1196 | 'not a descendant of an element with id', 1197 | 'selector': '#descendant-div1 #descendant-div4'}, 1198 | { 1199 | 'expect': ['descendant-div2'], 1200 | 'level': 1, 1201 | 'name': 'Descendant combinator, whitespace characters', 1202 | 'selector': '#descendant\t\r\n#descendant-div2'}, 1203 | { 1204 | 'expect': ['child-div1', 'child-div4'], 1205 | 'level': 2, 1206 | 'name': 'Child combinator, matching element that is a child of an ' 1207 | 'element with id', 1208 | 'selector': '#child>div'}, 1209 | { 1210 | 'expect': ['child-div1'], 1211 | 'level': 2, 1212 | 'name': 'Child combinator, matching element with id that is a child ' 1213 | 'of an element', 1214 | 'selector': 'div>#child-div1'}, 1215 | { 1216 | 'expect': ['child-div1'], 1217 | 'level': 2, 1218 | 'name': 'Child combinator, matching element with id that is a child ' 1219 | 'of an element with id', 1220 | 'selector': '#child>#child-div1'}, 1221 | { 1222 | 'expect': ['child-div2'], 1223 | 'level': 2, 1224 | 'name': 'Child combinator, matching element with id that is a child ' 1225 | 'of an element with class', 1226 | 'selector': '#child-div1>.child-div2'}, 1227 | { 1228 | 'expect': ['child-div2'], 1229 | 'level': 2, 1230 | 'name': 'Child combinator, matching element with class that is a ' 1231 | 'child of an element with class', 1232 | 'selector': '.child-div1>.child-div2'}, 1233 | { 1234 | 'expect': [], 1235 | 'level': 2, 1236 | 'name': 'Child combinator, not matching element with id that is not a ' 1237 | 'child of an element with id', 1238 | 'selector': '#child>#child-div3'}, 1239 | { 1240 | 'expect': [], 1241 | 'level': 2, 1242 | 'name': 'Child combinator, not matching element with id that is not a ' 1243 | 'child of an element with class', 1244 | 'selector': '#child-div1>.child-div3'}, 1245 | { 1246 | 'expect': [], 1247 | 'level': 2, 1248 | 'name': 'Child combinator, not matching element with class that is ' 1249 | 'not a child of an element with class', 1250 | 'selector': '.child-div1>.child-div3'}, 1251 | { 1252 | 'expect': ['child-div2'], 1253 | 'level': 2, 1254 | 'name': 'Child combinator, surrounded by whitespace', 1255 | 'selector': '#child-div1\t\r\n>\t\r\n#child-div2'}, 1256 | { 1257 | 'expect': ['child-div2'], 1258 | 'level': 2, 1259 | 'name': 'Child combinator, whitespace after', 1260 | 'selector': '#child-div1>\t\r\n#child-div2'}, 1261 | { 1262 | 'expect': ['child-div2'], 1263 | 'level': 2, 1264 | 'name': 'Child combinator, whitespace before', 1265 | 'selector': '#child-div1\t\r\n>#child-div2'}, 1266 | { 1267 | 'expect': ['child-div2'], 1268 | 'level': 2, 1269 | 'name': 'Child combinator, no whitespace', 1270 | 'selector': '#child-div1>#child-div2'}, 1271 | { 1272 | 'expect': ['adjacent-div4'], 1273 | 'level': 2, 1274 | 'name': 'Adjacent sibling combinator, matching element that is an ' 1275 | 'adjacent sibling of an element with id', 1276 | 'selector': '#adjacent-div2+div'}, 1277 | { 1278 | 'expect': ['adjacent-div4'], 1279 | 'level': 2, 1280 | 'name': 'Adjacent sibling combinator, matching element with id that ' 1281 | 'is an adjacent sibling of an element', 1282 | 'selector': 'div+#adjacent-div4'}, 1283 | { 1284 | 'expect': ['adjacent-div4'], 1285 | 'level': 2, 1286 | 'name': 'Adjacent sibling combinator, matching element with id that ' 1287 | 'is an adjacent sibling of an element with id', 1288 | 'selector': '#adjacent-div2+#adjacent-div4'}, 1289 | { 1290 | 'expect': ['adjacent-div4'], 1291 | 'level': 2, 1292 | 'name': 'Adjacent sibling combinator, matching element with class ' 1293 | 'that is an adjacent sibling of an element with id', 1294 | 'selector': '#adjacent-div2+.adjacent-div4'}, 1295 | { 1296 | 'expect': ['adjacent-div4'], 1297 | 'level': 2, 1298 | 'name': 'Adjacent sibling combinator, matching element with class ' 1299 | 'that is an adjacent sibling of an element with class', 1300 | 'selector': '.adjacent-div2+.adjacent-div4'}, 1301 | { 1302 | 'expect': ['adjacent-p2'], 1303 | 'level': 2, 1304 | 'name': 'Adjacent sibling combinator, matching p element that is an ' 1305 | 'adjacent sibling of a div element', 1306 | 'selector': '#adjacent div+p'}, 1307 | { 1308 | 'expect': [], 1309 | 'level': 2, 1310 | 'name': 'Adjacent sibling combinator, not matching element with id ' 1311 | 'that is not an adjacent sibling of an element with id', 1312 | 'selector': '#adjacent-div2+#adjacent-p2, ' 1313 | '#adjacent-div2+#adjacent-div1'}, 1314 | { 1315 | 'expect': ['adjacent-p3'], 1316 | 'level': 2, 1317 | 'name': 'Adjacent sibling combinator, surrounded by whitespace', 1318 | 'selector': '#adjacent-p2\t\r\n+\t\r\n#adjacent-p3'}, 1319 | { 1320 | 'expect': ['adjacent-p3'], 1321 | 'level': 2, 1322 | 'name': 'Adjacent sibling combinator, whitespace after', 1323 | 'selector': '#adjacent-p2+\t\r\n#adjacent-p3'}, 1324 | { 1325 | 'expect': ['adjacent-p3'], 1326 | 'level': 2, 1327 | 'name': 'Adjacent sibling combinator, whitespace before', 1328 | 'selector': '#adjacent-p2\t\r\n+#adjacent-p3'}, 1329 | { 1330 | 'expect': ['adjacent-p3'], 1331 | 'level': 2, 1332 | 'name': 'Adjacent sibling combinator, no whitespace', 1333 | 'selector': '#adjacent-p2+#adjacent-p3'}, 1334 | { 1335 | 'expect': ['sibling-div4', 'sibling-div6'], 1336 | 'level': 3, 1337 | 'name': 'General sibling combinator, matching element that is a ' 1338 | 'sibling of an element with id', 1339 | 'selector': '#sibling-div2~div'}, 1340 | { 1341 | 'expect': ['sibling-div4'], 1342 | 'level': 3, 1343 | 'name': 'General sibling combinator, matching element with id that is ' 1344 | 'a sibling of an element', 1345 | 'selector': 'div~#sibling-div4'}, 1346 | { 1347 | 'expect': ['sibling-div4'], 1348 | 'level': 3, 1349 | 'name': 'General sibling combinator, matching element with id that is ' 1350 | 'a sibling of an element with id', 1351 | 'selector': '#sibling-div2~#sibling-div4'}, 1352 | { 1353 | 'expect': ['sibling-div4', 'sibling-div6'], 1354 | 'level': 3, 1355 | 'name': 'General sibling combinator, matching element with class that ' 1356 | 'is a sibling of an element with id', 1357 | 'selector': '#sibling-div2~.sibling-div'}, 1358 | { 1359 | 'expect': ['sibling-p2', 'sibling-p3'], 1360 | 'level': 3, 1361 | 'name': 'General sibling combinator, matching p element that is a ' 1362 | 'sibling of a div element', 1363 | 'selector': '#sibling div~p'}, 1364 | { 1365 | 'expect': [], 1366 | 'level': 3, 1367 | 'name': 'General sibling combinator, not matching element with id ' 1368 | 'that is not a sibling after a p element', 1369 | 'selector': '#sibling>p~div'}, 1370 | { 1371 | 'expect': [], 1372 | 'level': 3, 1373 | 'name': 'General sibling combinator, not matching element with id ' 1374 | 'that is not a sibling after an element with id', 1375 | 'selector': '#sibling-div2~#sibling-div3, ' 1376 | '#sibling-div2~#sibling-div1'}, 1377 | { 1378 | 'expect': ['sibling-p3'], 1379 | 'level': 3, 1380 | 'name': 'General sibling combinator, surrounded by whitespace', 1381 | 'selector': '#sibling-p2\t\r\n~\t\r\n#sibling-p3'}, 1382 | { 1383 | 'expect': ['sibling-p3'], 1384 | 'level': 3, 1385 | 'name': 'General sibling combinator, whitespace after', 1386 | 'selector': '#sibling-p2~\t\r\n#sibling-p3'}, 1387 | { 1388 | 'expect': ['sibling-p3'], 1389 | 'level': 3, 1390 | 'name': 'General sibling combinator, whitespace before', 1391 | 'selector': '#sibling-p2\t\r\n~#sibling-p3'}, 1392 | { 1393 | 'expect': ['sibling-p3'], 1394 | 'level': 3, 1395 | 'name': 'General sibling combinator, no whitespace', 1396 | 'selector': '#sibling-p2~#sibling-p3'}, 1397 | { 1398 | 'expect': ['group-em1', 'group-strong1'], 1399 | 'level': 1, 1400 | 'name': 'Syntax, group of selectors separator, surrounded by ' 1401 | 'whitespace', 1402 | 'selector': '#group em\t\r \n,\t\r \n#group strong'}, 1403 | { 1404 | 'expect': ['group-em1', 'group-strong1'], 1405 | 'level': 1, 1406 | 'name': 'Syntax, group of selectors separator, whitespace after', 1407 | 'selector': '#group em,\t\r\n#group strong'}, 1408 | { 1409 | 'expect': ['group-em1', 'group-strong1'], 1410 | 'level': 1, 1411 | 'name': 'Syntax, group of selectors separator, whitespace before', 1412 | 'selector': '#group em\t\r\n,#group strong'}, 1413 | { 1414 | 'expect': ['group-em1', 'group-strong1'], 1415 | 'level': 1, 1416 | 'name': 'Syntax, group of selectors separator, no whitespace', 1417 | 'selector': '#group em,#group strong'}, 1418 | { 1419 | 'expect': [], 1420 | 'level': 3, 1421 | 'name': 'Slotted selector', 1422 | 'selector': '::slotted(foo)'}, 1423 | { 1424 | 'expect': [], 1425 | 'level': 3, 1426 | 'name': 'Slotted selector (no matching closing paren)', 1427 | 'selector': '::slotted(foo'}] 1428 | --------------------------------------------------------------------------------