├── lib ├── selector │ ├── parser │ │ ├── pseudo │ │ │ ├── custom_ident.ex │ │ │ ├── element_type.ex │ │ │ ├── compound_selector.ex │ │ │ ├── part_name_list.ex │ │ │ ├── pt_name_selector.ex │ │ │ ├── relative_selector_list.ex │ │ │ ├── selector_list.ex │ │ │ ├── selector.ex │ │ │ ├── direction.ex │ │ │ ├── direction_type.ex │ │ │ ├── name.ex │ │ │ ├── language_code.ex │ │ │ └── nth_formula.ex │ │ ├── utils.ex │ │ ├── combinator.ex │ │ ├── class.ex │ │ ├── tag_name.ex │ │ ├── hex.ex │ │ ├── id.ex │ │ ├── selector.ex │ │ ├── pseudo.ex │ │ ├── attribute.ex │ │ └── guards.ex │ ├── parser.ex │ └── renderer.ex └── selector.ex ├── test ├── test_helper.exs ├── selector_test.exs └── selector │ ├── render_test.exs │ ├── parser_test.exs │ └── parser │ └── guards_test.exs ├── assets └── logo.png ├── .formatter.exs ├── .gitignore ├── LICENSE.md ├── mix.exs ├── mix.lock ├── test_selector_guard.exs └── README.md /lib/selector/parser/pseudo/custom_ident.ex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/element_type.ex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/compound_selector.ex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/part_name_list.ex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/pt_name_selector.ex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/relative_selector_list.ex: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/liveview-native/selector/HEAD/assets/logo.png -------------------------------------------------------------------------------- /test/selector_test.exs: -------------------------------------------------------------------------------- 1 | defmodule SelectorTest do 2 | use ExUnit.Case 3 | doctest Selector 4 | end 5 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/selector_list.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.SelectorList do 2 | @moduledoc false 3 | 4 | def parse(selectors, opts) do 5 | Selector.Parser.Selector.parse(selectors, [], opts) 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /lib/selector/parser/utils.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Utils do 2 | @moduledoc false 3 | import Selector.Parser.Guards 4 | 5 | def drain_whitespace(<>) when is_whitespace(char), 6 | do: drain_whitespace(selectors) 7 | def drain_whitespace(selectors), do: selectors 8 | end 9 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/selector.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.Selector do 2 | @moduledoc false 3 | 4 | def parse(selectors, opts) do 5 | case Selector.Parser.Selector.parse(selectors, [], opts) do 6 | {[param], selectors} -> {[param], selectors} 7 | {[_ | _], _selectors} -> raise ArgumentError, "Pseudo type only accepts a single selector as a param." 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/direction.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.Direction do 2 | @moduledoc false 3 | 4 | @directions ~w{ 5 | ltr 6 | rtl 7 | } 8 | 9 | for direction <- @directions do 10 | def parse(<>, _opts) do 11 | {unquote(direction), selectors} 12 | end 13 | end 14 | 15 | def parse(_selectors, _opts) do 16 | raise ArgumentError, "Invalid argument for Direction." 17 | end 18 | end 19 | 20 | 21 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/direction_type.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.DirectionType do 2 | @moduledoc false 3 | 4 | @directions ~w{ 5 | up 6 | down 7 | left 8 | right 9 | * 10 | } 11 | 12 | for direction <- @directions do 13 | def parse(<>, _opts) do 14 | {unquote(direction), selectors} 15 | end 16 | end 17 | 18 | def parse(_selectors, _opts) do 19 | raise ArgumentError, "Invalid argument for DirectionType." 20 | end 21 | end 22 | 23 | -------------------------------------------------------------------------------- /lib/selector/parser.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser do 2 | @moduledoc """ 3 | Parser for CSS selectors. 4 | """ 5 | 6 | @doc """ 7 | Parses a CSS selector string into an AST. 8 | Accepts an optional keyword list of options. 9 | """ 10 | def parse(selectors, opts \\ []) when is_binary(selectors) do 11 | case Selector.Parser.Selector.parse(selectors, [], opts) do 12 | {selector_list, ""} -> {:selectors, selector_list} 13 | {_selector_list, selectors} -> raise ArgumentError, "Cannot parse: #{selectors}" 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/name.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.Name do 2 | @moduledoc false 3 | import Selector.Parser.Guards 4 | 5 | def parse(<>, [], opts) when is_identifier_start_char(char) do 6 | parse(selectors, [char], opts) 7 | end 8 | 9 | def parse(<>, name, opts) when is_identifier_start_char(char) do 10 | parse(selectors, [name, char], opts) 11 | end 12 | 13 | def parse(selectors, name, _opts) do 14 | {List.to_string(name), selectors} 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/language_code.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.LanguageCode do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | 6 | def parse(<>, [], opts) when is_lang_start_char(char) do 7 | parse(selectors, [char], opts) 8 | end 9 | 10 | def parse(<>, lang, opts) when is_lang_char(char) do 11 | parse(selectors, [lang, char], opts) 12 | end 13 | 14 | def parse(selectors, lang, _opts) do 15 | {List.to_string(lang), selectors} 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # Where third-party dependencies like ExDoc output generated docs. 8 | /doc/ 9 | 10 | # If the VM crashes, it generates a dump, let's ignore it too. 11 | erl_crash.dump 12 | 13 | # Also ignore archive artifacts (built via "mix archive.build"). 14 | *.ez 15 | 16 | # Ignore package tarball (built via "mix hex.build"). 17 | selector-*.tar 18 | 19 | # Temporary files, for example, from tests. 20 | /tmp/ 21 | 22 | # Ignore build artifacts but track libcss source 23 | /deps/* 24 | !/deps/libcss 25 | /deps/libcss/build 26 | /deps/libcss/.git 27 | /priv/ 28 | .coder 29 | .opencode 30 | -------------------------------------------------------------------------------- /lib/selector/parser/combinator.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Combinator do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | import Selector.Parser.Utils 6 | 7 | def parse(<>, opts) when is_whitespace(char) do 8 | selectors = drain_whitespace(selectors) 9 | parse(selectors, opts) 10 | end 11 | 12 | def parse(<<"||"::utf8, selectors::binary>>, _opts) do 13 | {[combinator: "||"], drain_whitespace(selectors)} 14 | end 15 | 16 | def parse(<>, _opts) when is_combinator_char(char) do 17 | {[combinator: List.to_string([char])], drain_whitespace(selectors)} 18 | end 19 | 20 | def parse(selectors, _opts) do 21 | {[], selectors} 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/selector/parser/class.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Class do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | 6 | def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, class, opts) when is_escapable_char(char) do 7 | parse(selectors, [class, char], opts) 8 | end 9 | 10 | def parse(<>, [], opts) when is_class_start_char(char) do 11 | parse(selectors, [char], opts) 12 | end 13 | 14 | def parse(<>, class, opts) when class != [] and is_class_char(char) do 15 | parse(selectors, [class, char], opts) 16 | end 17 | 18 | def parse(_selectors, [], _opts) do 19 | raise ArgumentError, "Expected class name." 20 | end 21 | 22 | def parse(selectors, class, _opts) do 23 | {List.to_string(class), selectors} 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2024 DockYard, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Selector.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :selector, 7 | version: "0.0.1", 8 | elixir: "~> 1.18", 9 | start_permanent: Mix.env() == :prod, 10 | deps: deps(), 11 | package: package(), 12 | description: "CSS Selector Parsing", 13 | 14 | # Docs 15 | name: "Selector", 16 | source_url: "https://github.com/liveview-native/selector", 17 | homepage_url: "https://github.com/liveview-native/selector", 18 | docs: [ 19 | main: "Selector", 20 | logo: "assets/logo.png", 21 | extras: ["README.md", "LICENSE.md"], 22 | groups_for_modules: [ 23 | "Core": [Selector], 24 | "Parser": [Selector.Parser], 25 | "AST": [Selector.AST] 26 | ], 27 | groups_for_extras: [ 28 | "Guides": ["README.md"], 29 | "Legal": ["LICENSE.md"] 30 | ] 31 | ] 32 | ] 33 | end 34 | 35 | defp package do 36 | [ 37 | maintainers: ["Brian Cardarella"], 38 | licenses: ["MIT"], 39 | links: %{"GitHub" => "https://github.com/liveview-native/selector"} 40 | ] 41 | end 42 | 43 | def application do 44 | [ 45 | extra_applications: [:logger], 46 | ] 47 | end 48 | 49 | defp deps do 50 | [ 51 | {:ex_doc, "~> 0.38", only: :dev, runtime: false, warn_if_outdated: true}, 52 | ] 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/selector/parser/tag_name.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.TagName do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | 6 | def parse(<<"\\ "::utf8, selectors::binary>>, tag_name, opts) do 7 | parse(selectors, [tag_name, ?\s], opts) 8 | end 9 | 10 | def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, tag_name, opts) when is_escapable_char(char) do 11 | parse(selectors, [tag_name, char], opts) 12 | end 13 | 14 | def parse(<<"|"::utf8, char::utf8, selectors::binary>>, namespace, opts) when char != ?| do 15 | parse(List.to_string([char, selectors]), [], Keyword.put(opts, :namespace, List.to_string(namespace))) 16 | end 17 | 18 | def parse(<<"\\*"::utf8, selectors::binary>>, [], opts) do 19 | {"*", selectors, extract_opts(opts)} 20 | end 21 | 22 | def parse(<<"*"::utf8, selectors::binary>>, [], opts) do 23 | {"*", selectors, extract_opts(opts)} 24 | end 25 | 26 | def parse(<>, ~c"|", opts) when is_tag_name_char(char) do 27 | parse(selectors, [char], Keyword.put(opts, :namespace, "")) 28 | end 29 | 30 | def parse(<>, tag_name, opts) when is_tag_name_char(char) do 31 | parse(selectors, [tag_name, char], opts) 32 | end 33 | 34 | def parse(selectors, tag_name, opts) do 35 | {List.to_string(tag_name), selectors, extract_opts(opts)} 36 | end 37 | 38 | defp extract_opts(opts) do 39 | Keyword.take(opts, [ 40 | :namespace 41 | ]) 42 | end 43 | end 44 | -------------------------------------------------------------------------------- /lib/selector/parser/hex.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Hex do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | import Selector.Parser.Utils 6 | 7 | def parse(<>, _opts) 8 | when is_hex_digit(hex1) 9 | and is_hex_digit(hex2) 10 | and is_hex_digit(hex3) 11 | and is_hex_digit(hex4) 12 | and is_hex_digit(hex5) 13 | and is_hex_digit(hex6) 14 | do 15 | {[List.to_integer([hex1, hex2, hex3, hex4, hex5, hex6], 16)], drain_whitespace(selectors)} 16 | end 17 | 18 | def parse(<>, _opts) 19 | when is_hex_digit(hex1) 20 | and is_hex_digit(hex2) 21 | and is_hex_digit(hex3) 22 | and is_hex_digit(hex4) 23 | and is_hex_digit(hex5) 24 | do 25 | {[List.to_integer([hex1, hex2, hex3, hex4, hex5], 16)], drain_whitespace(selectors)} 26 | end 27 | 28 | def parse(<>, _opts) 29 | when is_hex_digit(hex1) 30 | and is_hex_digit(hex2) 31 | and is_hex_digit(hex3) 32 | and is_hex_digit(hex4) 33 | do 34 | {[List.to_integer([hex1, hex2, hex3, hex4], 16)], drain_whitespace(selectors)} 35 | end 36 | 37 | def parse(<>, _opts) 38 | when is_hex_digit(hex1) 39 | and is_hex_digit(hex2) 40 | and is_hex_digit(hex3) 41 | do 42 | {[List.to_integer([hex1, hex2, hex3], 16)], drain_whitespace(selectors)} 43 | end 44 | 45 | def parse(<>, _opts) 46 | when is_hex_digit(hex1) 47 | and is_hex_digit(hex2) 48 | do 49 | {[List.to_integer([hex1, hex2], 16)], drain_whitespace(selectors)} 50 | end 51 | 52 | def parse(<>, _opts) 53 | when is_hex_digit(hex1) 54 | do 55 | 56 | {[List.to_integer([hex1], 16)], drain_whitespace(selectors)} 57 | end 58 | 59 | def parse(selectors, _opts), 60 | do: {[], selectors} 61 | end 62 | -------------------------------------------------------------------------------- /mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, 3 | "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"}, 4 | "ex_doc": {:hex, :ex_doc, "0.38.2", "504d25eef296b4dec3b8e33e810bc8b5344d565998cd83914ffe1b8503737c02", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "732f2d972e42c116a70802f9898c51b54916e542cc50968ac6980512ec90f42b"}, 5 | "makeup": {:hex, :makeup, "1.2.1", "e90ac1c65589ef354378def3ba19d401e739ee7ee06fb47f94c687016e3713d1", [:mix], [{:nimble_parsec, "~> 1.4", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "d36484867b0bae0fea568d10131197a4c2e47056a6fbe84922bf6ba71c8d17ce"}, 6 | "makeup_elixir": {:hex, :makeup_elixir, "1.0.1", "e928a4f984e795e41e3abd27bfc09f51db16ab8ba1aebdba2b3a575437efafc2", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "7284900d412a3e5cfd97fdaed4f5ed389b8f2b4cb49efc0eb3bd10e2febf9507"}, 7 | "makeup_erlang": {:hex, :makeup_erlang, "1.0.2", "03e1804074b3aa64d5fad7aa64601ed0fb395337b982d9bcf04029d68d51b6a7", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "af33ff7ef368d5893e4a267933e7744e46ce3cf1f61e2dccf53a111ed3aa3727"}, 8 | "nimble_parsec": {:hex, :nimble_parsec, "1.4.2", "8efba0122db06df95bfaa78f791344a89352ba04baedd3849593bfce4d0dc1c6", [:mix], [], "hexpm", "4b21398942dda052b403bbe1da991ccd03a053668d147d53fb8c4e0efe09c973"}, 9 | } 10 | -------------------------------------------------------------------------------- /test_selector_guard.exs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env elixir 2 | 3 | # Load the guards module 4 | Code.require_file("lib/selector/parser/guards.ex") 5 | 6 | import Selector.Parser.Guards 7 | 8 | IO.puts("Testing is_selector_char guard...") 9 | 10 | # Test cases 11 | test_cases = [ 12 | # Should pass 13 | {?a, true, "letter 'a'"}, 14 | {?Z, true, "letter 'Z'"}, 15 | {?0, true, "digit '0'"}, 16 | {?#, true, "hash '#'"}, 17 | {?., true, "dot '.'"}, 18 | {?:, true, "colon ':'"}, 19 | {?,, true, "comma ','"}, 20 | {?>, true, "greater than '>'"}, 21 | {?+, true, "plus '+'"}, 22 | {?~, true, "tilde '~'"}, 23 | {?*, true, "asterisk '*'"}, 24 | {?|, true, "pipe '|'"}, 25 | {?[, true, "bracket '['"}, 26 | {?], true, "bracket ']'"}, 27 | {?", true, "double quote"}, 28 | {?', true, "single quote"}, 29 | {?\\, true, "backslash"}, 30 | {0x0020, true, "space"}, 31 | {0x4E2D, true, "Chinese character 中"}, 32 | {0x0391, true, "Greek letter Α"}, 33 | 34 | # Should fail 35 | {0x0000, false, "null character"}, 36 | {0x0001, false, "control character"}, 37 | {0x007F, false, "DEL character"}, 38 | {0xD800, false, "surrogate codepoint"}, 39 | ] 40 | 41 | # Run tests 42 | passed = 0 43 | failed = 0 44 | 45 | for {codepoint, expected, description} <- test_cases do 46 | result = is_selector_char(codepoint) 47 | status = if result == expected do 48 | passed = passed + 1 49 | "✓" 50 | else 51 | failed = failed + 1 52 | "✗" 53 | end 54 | 55 | IO.puts("#{status} #{description}: is_selector_char(#{inspect(codepoint)}) => #{result} (expected #{expected})") 56 | end 57 | 58 | IO.puts("\nSummary: #{passed} passed, #{failed} failed") 59 | 60 | # Test with actual selector strings 61 | IO.puts("\nTesting complete selector strings:") 62 | 63 | selectors = [ 64 | "div.class#id", 65 | "[data-value~=\"test\"]:nth-child(2n+1)", 66 | ".クラス#标识符[атрибут=\"القيمة\"]", 67 | "ns|element > .class + #id ~ [attr]", 68 | "div, .class, #id" 69 | ] 70 | 71 | for selector <- selectors do 72 | all_valid = Enum.all?(String.to_charlist(selector), &is_selector_char/1) 73 | status = if all_valid, do: "✓", else: "✗" 74 | IO.puts("#{status} \"#{selector}\" - all characters valid: #{all_valid}") 75 | end -------------------------------------------------------------------------------- /lib/selector/parser/id.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.ID do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | 6 | def parse(<<"-"::utf8, selectors::binary>>, [], opts) do 7 | {buffer, selectors} = parse_hyphen_identifier(selectors, ~c"-", opts) 8 | parse(selectors, buffer, opts) 9 | end 10 | 11 | def parse(<>, [], opts) when is_identifier_start_char(char) do 12 | parse(selectors, [char], opts) 13 | end 14 | 15 | def parse(<>, buffer, opts) when is_identifier_char(char) do 16 | parse(selectors, [buffer, char], opts) 17 | end 18 | 19 | def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, buffer, opts) when is_hex_digit(char) do 20 | {hex_buffer, selectors} = Selector.Parser.Hex.parse(List.to_string([char, selectors]), opts) 21 | parse(selectors, [buffer, hex_buffer], opts) 22 | end 23 | 24 | def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, buffer, opts) when is_escapable_char(char) do 25 | parse(selectors, [buffer, char], opts) 26 | end 27 | 28 | def parse(<>, buffer, opts) when is_whitespace(char) do 29 | parse(selectors, buffer, opts) 30 | end 31 | 32 | def parse(_selectors, ~c"-", _opts) do 33 | raise ArgumentError, "Identifiers cannot consist of a single hyphen." 34 | end 35 | 36 | def parse(_selectors, [], _opts) do 37 | raise ArgumentError, "Expected identifier." 38 | end 39 | 40 | def parse(selectors, buffer, _opts) do 41 | {List.to_string(buffer), selectors} 42 | end 43 | 44 | # This works because the default value passed in for `buffer` is always ~c"-" 45 | defp parse_hyphen_identifier(<<"-"::utf8, selectors::binary>>, buffer, opts) do 46 | case Keyword.get(opts, :strict, true) do 47 | true -> raise ArgumentError, "Identifiers cannot start with two hyphens with strict mode on." 48 | false -> parse_hyphen_identifier(selectors, [buffer, ?-], opts) 49 | end 50 | end 51 | 52 | defp parse_hyphen_identifier(<>, _buffer, _opts) when is_utf8_digit(number) do 53 | raise ArgumentError, "Identifiers cannot start with hyphens followed by digits." 54 | end 55 | 56 | defp parse_hyphen_identifier(selectors, buffer, _opts) do 57 | {buffer, selectors} 58 | end 59 | 60 | end 61 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo/nth_formula.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo.NthFormula do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | import Selector.Parser.Utils 6 | 7 | def parse(<<"even"::utf8, selectors::binary>>, _opts) do 8 | {[a: 2, b: 0], selectors} 9 | end 10 | 11 | def parse(<<"odd"::utf8, selectors::binary>>, _opts) do 12 | {[a: 2, b: 1], selectors} 13 | end 14 | 15 | def parse(<> = selectors, opts) when is_nth_formula_starting_char(char) do 16 | case parse_an_plus_b(selectors, opts) do 17 | # yes, it's a hack 18 | {[b: b, a: a], selectors} -> {[a: a, b: b], selectors} 19 | result -> result 20 | end 21 | end 22 | 23 | defp parse_an_plus_b(selectors, opts) do 24 | parse_coefficient(selectors, [a: 1, b: 0], opts) 25 | end 26 | 27 | defp parse_coefficient(selectors, formula, opts) do 28 | {formula, selectors} = parse_coefficient_sign(selectors, formula, opts) 29 | {formula, selectors} = parse_coefficient_number(selectors, formula, opts) 30 | 31 | parse_variable(selectors, formula, opts) 32 | end 33 | 34 | defp parse_coefficient_sign(<<"+"::utf8, selectors::binary>>, formula, _opts) do 35 | {formula, selectors} 36 | end 37 | 38 | defp parse_coefficient_sign(<<"-"::utf8, selectors::binary>>, formula, _opts) do 39 | formula = Keyword.put(formula, :a, -1) 40 | {formula, selectors} 41 | end 42 | 43 | defp parse_coefficient_sign(selectors, formula, _opts) do 44 | {formula, selectors} 45 | end 46 | 47 | defp parse_coefficient_number(selectors, formula, opts) do 48 | {number, selectors} = parse_number(selectors, nil, opts) 49 | formula = Keyword.update(formula, :a, 1, &(&1 * (number || 1))) 50 | {formula, selectors} 51 | end 52 | 53 | defp parse_number(<>, number, opts) when char in ?0..?9 do 54 | number = (number || 0 * 10) + (char - ?0) 55 | parse_number(selectors, number, opts) 56 | end 57 | 58 | defp parse_number(selectors, number, _opts) do 59 | {number, selectors} 60 | end 61 | 62 | defp parse_variable(<<"\\6e"::utf8, selectors::binary>>, formula, opts) do 63 | parse_operator(selectors, formula, opts) 64 | end 65 | 66 | defp parse_variable(<<"\\n"::utf8, selectors::binary>>, formula, opts) do 67 | parse_operator(selectors, formula, opts) 68 | end 69 | 70 | defp parse_variable(<<"n"::utf8, selectors::binary>>, formula, opts) do 71 | parse_operator(selectors, formula, opts) 72 | end 73 | 74 | defp parse_variable(selectors, formula, _opts) do 75 | formula = [a: 0, b: Keyword.get(formula, :a)] 76 | {formula, selectors} 77 | end 78 | 79 | defp parse_operator(<>, formula, opts) when is_whitespace(char) do 80 | selectors = drain_whitespace(selectors) 81 | parse_operator(selectors, formula, opts) 82 | end 83 | 84 | defp parse_operator(<<"+"::utf8, selectors::binary>>, formula, opts) do 85 | parse_offset(selectors, Keyword.put(formula, :b, 1), 0, opts) 86 | end 87 | 88 | defp parse_operator(<<"-"::utf8, selectors::binary>>, formula, opts) do 89 | parse_offset(selectors, Keyword.put(formula, :b, -1), 0, opts) 90 | end 91 | 92 | defp parse_operator(selectors, formula, opts) do 93 | parse_offset(selectors, formula, 0, opts) 94 | end 95 | 96 | defp parse_offset(<>, formula, offset, opts) when is_whitespace(char) do 97 | selectors = drain_whitespace(selectors) 98 | parse_offset(selectors, formula, offset, opts) 99 | end 100 | 101 | defp parse_offset(<>, formula, offset, opts) when char in ?0..?9 do 102 | offset = (offset * 10) + (char - ?0) 103 | parse_offset(selectors, formula, offset, opts) 104 | end 105 | 106 | defp parse_offset(selectors, formula, number, _opts) do 107 | formula = Keyword.update(formula, :b, 1, &(&1 * number)) 108 | {formula, selectors} 109 | end 110 | end 111 | -------------------------------------------------------------------------------- /lib/selector.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector do 2 | @moduledoc """ 3 | A CSS selector parser and renderer for Elixir. 4 | 5 | This library provides functionality to parse CSS selector strings into an 6 | Abstract Syntax Tree (AST) and render them back to CSS strings. It supports 7 | CSS Selectors Level 1, 2, and 3 completely, with partial support for stable 8 | CSS Selectors Level 4 features. 9 | 10 | ## Features 11 | 12 | - Parse CSS selectors into a structured AST 13 | - Render AST back to CSS selector strings 14 | - Support for all CSS3 selectors and many CSS4 features 15 | - Namespace support for XML/SVG elements 16 | - Strict and non-strict parsing modes 17 | 18 | ## Basic Usage 19 | 20 | # Parse a CSS selector 21 | ast = Selector.parse("div#main > p.text") 22 | 23 | # Render AST back to CSS 24 | css = Selector.render(ast) 25 | 26 | ## Supported Selectors 27 | 28 | - Type selectors: `div`, `span`, `p` 29 | - Class selectors: `.class`, `.multiple.classes` 30 | - ID selectors: `#id` 31 | - Universal selector: `*` 32 | - Attribute selectors: `[attr]`, `[attr=value]`, `[attr^=prefix]` 33 | - Pseudo-classes: `:hover`, `:nth-child(2n+1)`, `:not(.active)` 34 | - Pseudo-elements: `::before`, `::after`, `::first-line` 35 | - Combinators: descendant (` `), child (`>`), adjacent (`+`), general sibling (`~`), column (`||`) 36 | - Namespaces: `svg|rect`, `*|*`, `|div` 37 | 38 | See the README for comprehensive documentation and examples. 39 | """ 40 | 41 | alias Selector.{ 42 | Parser, 43 | Renderer 44 | } 45 | 46 | @doc """ 47 | Parses a CSS selector string into an Abstract Syntax Tree (AST). 48 | 49 | ## Parameters 50 | 51 | * `selector` - A CSS selector string to parse 52 | * `opts` - Optional keyword list of parsing options (default: `[]`) 53 | 54 | ## Options 55 | 56 | * `:strict` - When `true` (default), enforces strict CSS parsing rules. 57 | When `false`, allows some non-standard but commonly used patterns like 58 | identifiers starting with double hyphens (`--`). 59 | 60 | ## Returns 61 | 62 | Returns a tuple `{:selectors, [selector_groups]}` representing the parsed selector AST. 63 | Each selector group is `{:rules, [rules]}` and each rule has the format 64 | `{:rule, selectors, options}` where: 65 | 66 | * `selectors` is a list of selector components (tags, classes, IDs, etc.) 67 | * `options` is a keyword list containing combinator information 68 | 69 | ## Examples 70 | 71 | Basic selectors: 72 | 73 | iex> Selector.parse("div") 74 | {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}], []}]}]} 75 | 76 | iex> Selector.parse("#header") 77 | {:selectors, [{:rules, [{:rule, [{:id, "header"}], []}]}]} 78 | 79 | iex> Selector.parse(".button") 80 | {:selectors, [{:rules, [{:rule, [{:class, "button"}], []}]}]} 81 | 82 | Complex selectors: 83 | 84 | iex> Selector.parse("div#main.container[data-role='navigation']") 85 | {:selectors, [{:rules, [{:rule, [ 86 | {:tag_name, "div", []}, 87 | {:id, "main"}, 88 | {:class, "container"}, 89 | {:attribute, {:equal, "data-role", "navigation", []}} 90 | ], []}]}]} 91 | 92 | Multiple selectors: 93 | 94 | iex> Selector.parse("h1, h2, h3") 95 | {:selectors, [ 96 | {:rules, [{:rule, [{:tag_name, "h1", []}], []}]}, 97 | {:rules, [{:rule, [{:tag_name, "h2", []}], []}]}, 98 | {:rules, [{:rule, [{:tag_name, "h3", []}], []}]} 99 | ]} 100 | 101 | Combinators: 102 | 103 | iex> Selector.parse("article > p") 104 | {:selectors, [{:rules, [ 105 | {:rule, [{:tag_name, "article", []}], []}, 106 | {:rule, [{:tag_name, "p", []}], combinator: ">"} 107 | ]}]} 108 | 109 | Pseudo-classes with arguments: 110 | 111 | iex> Selector.parse(":nth-child(2n+1)") 112 | {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}]}]} 113 | 114 | iex> Selector.parse(":not(.active)") 115 | {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"not", [ 116 | [{:rules, [{:rule, [{:class, "active"}], []}]}] 117 | ]}}], []}]}]} 118 | 119 | With options: 120 | 121 | iex> Selector.parse("#--custom-id", strict: false) 122 | {:selectors, [{:rules, [{:rule, [{:id, "--custom-id"}], []}]}]} 123 | 124 | ## Error Handling 125 | 126 | Raises `ArgumentError` for invalid CSS selectors: 127 | 128 | iex> Selector.parse(".") 129 | ** (ArgumentError) Expected class name. 130 | 131 | iex> Selector.parse("#") 132 | ** (ArgumentError) Expected identifier. 133 | 134 | iex> Selector.parse("div >") 135 | ** (ArgumentError) Expected rule but end of input reached. 136 | 137 | ## Supported CSS Features 138 | 139 | This parser supports CSS Selectors Level 3 completely and many stable 140 | features from CSS Selectors Level 4: 141 | 142 | * Basic selectors: type, class, ID, universal (`*`) 143 | * Attribute selectors with all operators and case-sensitivity flags 144 | * All combinators including the column combinator (`||`) 145 | * Pseudo-classes including `:is()`, `:where()`, `:has()`, `:not()` 146 | * Pseudo-elements with both `::` and legacy `:` syntax 147 | * Namespaced selectors 148 | * Complex nested selectors 149 | * Escaped characters and Unicode 150 | 151 | See the project README for comprehensive examples and use cases. 152 | """ 153 | def parse(selector, opts \\ []) do 154 | Parser.parse(selector, opts) 155 | end 156 | 157 | @doc """ 158 | Renders a selector AST back to a CSS selector string. 159 | """ 160 | def render(selectors, opts \\ []) do 161 | Renderer.render(selectors, opts) 162 | end 163 | end 164 | -------------------------------------------------------------------------------- /lib/selector/parser/selector.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Selector do 2 | @moduledoc false 3 | 4 | alias Selector.Parser.{ 5 | Attribute, 6 | Class, 7 | Combinator, 8 | ID, 9 | Pseudo, 10 | TagName 11 | } 12 | 13 | import Selector.Parser.Guards 14 | import Selector.Parser.Utils 15 | 16 | def parse(<>, selector_list, opts) when is_whitespace(char) do 17 | selectors = drain_whitespace(selectors) 18 | parse(selectors, selector_list, opts) 19 | end 20 | 21 | def parse(<<","::utf8, selectors::binary>>, selector_list, opts) do 22 | case drain_whitespace(selectors) do 23 | <<>> -> raise ArgumentError, "Expected selector but end of input reached." 24 | selectors -> parse(selectors, selector_list, opts) 25 | end 26 | end 27 | 28 | def parse(<<>>, selector_list, _opts) do 29 | {Enum.reverse(selector_list), ""} 30 | end 31 | 32 | def parse(<> = selectors, selector_list, opts) when is_selector_start_char(char) do 33 | {selector, selectors} = parse_rules(selectors, [], opts) 34 | parse(selectors, [{:rules, selector} | selector_list], opts) 35 | end 36 | 37 | def parse(selectors, selector_list, _opts) do 38 | {Enum.reverse(selector_list), selectors} 39 | end 40 | 41 | defp parse_rules(<<>>, [], _opts) do 42 | raise ArgumentError, "Expected rule but end of input reached." 43 | end 44 | 45 | defp parse_rules(<<>>, rules, _opts) do 46 | {Enum.reverse(rules), ""} 47 | end 48 | 49 | defp parse_rules(<>, rules, _opts) when is_whitespace(char) do 50 | selectors = drain_whitespace(selectors) 51 | {Enum.reverse(rules), selectors} 52 | end 53 | 54 | defp parse_rules(<> = selectors, rules, opts) when is_selector_start_char(char) do 55 | {rule, selectors} = parse_rule(selectors, [], opts) 56 | {combinator, opts} = Keyword.split(opts, [:combinator]) 57 | 58 | {new_combinator, selectors} = Combinator.parse(selectors, opts) 59 | 60 | opts = Keyword.merge(opts, new_combinator) 61 | 62 | parse_rules(selectors, [{:rule, rule, combinator} | rules], opts) 63 | end 64 | 65 | defp parse_rules(selectors, rules, _opts) do 66 | {Enum.reverse(rules), selectors} 67 | end 68 | 69 | defp parse_rule(<<>>, rule, _opts) do 70 | {Enum.reverse(rule), ""} 71 | end 72 | 73 | defp parse_rule(<<"#"::utf8, selectors::binary>>, rule, opts) do 74 | {id, selectors} = ID.parse(selectors, [], opts) 75 | parse_rule(selectors, [{:id, id} | rule], opts) 76 | end 77 | 78 | defp parse_rule(<<"."::utf8, selectors::binary>>, rule, opts) do 79 | {class, selectors} = Class.parse(selectors, [], opts) 80 | parse_rule(selectors, [{:class, class} | rule], opts) 81 | end 82 | 83 | defp parse_rule(<<"*"::utf8, selectors::binary>>, rule, opts) do 84 | {tag_name, selectors, tag_opts} = TagName.parse(selectors, ["*"], opts) 85 | parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts) 86 | end 87 | 88 | defp parse_rule(<<"\\*"::utf8, selectors::binary>>, rule, opts) do 89 | {tag_name, selectors, tag_opts} = TagName.parse(selectors, ["*"], opts) 90 | parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts) 91 | end 92 | 93 | defp parse_rule(<<"|"::utf8, char::utf8, selectors::binary>>, rule, opts) when char != ?| do 94 | {tag_name, selectors, tag_opts} = TagName.parse(List.to_string([?|, List.to_string([char, selectors])]), [], opts) 95 | parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts) 96 | end 97 | 98 | defp parse_rule(<<"\\|"::utf8, selectors::binary>>, rule, opts) do 99 | {tag_name, selectors, tag_opts} = TagName.parse(List.to_string([~c"\\|", selectors]), [], opts) 100 | parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts) 101 | end 102 | 103 | defp parse_rule(<>, rule, opts) when is_tag_name_start_char(char) do 104 | {tag_name, selectors, tag_opts} = TagName.parse(selectors, [char], opts) 105 | parse_rule(selectors, [{:tag_name, tag_name, tag_opts} | rule], opts) 106 | end 107 | 108 | defp parse_rule(<<"["::utf8, selectors::binary>>, rule, opts) do 109 | {attribute, selectors} = Attribute.parse(selectors, nil, opts) 110 | parse_rule(selectors, [{:attribute, attribute} | rule], opts) 111 | end 112 | 113 | defp parse_rule(<<"::"::utf8, selectors::binary>>, rule, opts) do 114 | {{pseudo_name, _} = pseudo_element, remaining} = Pseudo.parse(selectors, opts) 115 | 116 | if pseudo_name not in Selector.Parser.Pseudo.elements() do 117 | raise ArgumentError, "Invalid pseudo-element syntax." 118 | end 119 | 120 | parse_rule(remaining, [{:pseudo_element, pseudo_element} | rule], opts) 121 | end 122 | 123 | # Legacy CSS Level 2 support for single-colon pseduo elements 124 | 125 | defp parse_rule(<<":before"::utf8, selectors::binary>>, rule, opts) do 126 | parse_rule(selectors, [{:pseudo_element, {"before", []}} | rule], opts) 127 | end 128 | 129 | defp parse_rule(<<":after"::utf8, selectors::binary>>, rule, opts) do 130 | parse_rule(selectors, [{:pseudo_element, {"after", []}} | rule], opts) 131 | end 132 | 133 | defp parse_rule(<<":first-line"::utf8, selectors::binary>>, rule, opts) do 134 | parse_rule(selectors, [{:pseudo_element, {"first-line", []}} | rule], opts) 135 | end 136 | 137 | defp parse_rule(<<":first-letter"::utf8, selectors::binary>>, rule, opts) do 138 | parse_rule(selectors, [{:pseudo_element, {"first-letter", []}} | rule], opts) 139 | end 140 | 141 | defp parse_rule(<<":-"::utf8, _selectors::binary>>, _rule, _opts) do 142 | raise ArgumentError, "Identifiers cannot consist of a single hyphen." 143 | end 144 | 145 | defp parse_rule(<<":"::utf8, selectors::binary>>, rule, opts) do 146 | {{pseudo_name, _} = pseudo_class, selectors} = Pseudo.parse(selectors, opts) 147 | 148 | if pseudo_name not in Selector.Parser.Pseudo.classes() do 149 | raise ArgumentError, "Invalid pseudo-class syntax." 150 | end 151 | 152 | parse_rule(selectors, [{:pseudo_class, pseudo_class} | rule], opts) 153 | end 154 | 155 | defp parse_rule(selectors, rule, _opts) do 156 | {Enum.reverse(rule), selectors} 157 | end 158 | end 159 | 160 | -------------------------------------------------------------------------------- /test/selector/render_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Selector.RenderTest do 2 | use ExUnit.Case 3 | 4 | describe "render/1" do 5 | test "renders basic selectors" do 6 | assert Selector.parse(".class") |> Selector.render() == ".class" 7 | assert Selector.parse(".class1.class2") |> Selector.render() == ".class1.class2" 8 | assert Selector.parse("tag.class") |> Selector.render() == "tag.class" 9 | assert Selector.parse("tag#id.class") |> Selector.render() == "tag#id.class" 10 | end 11 | 12 | test "renders attribute selectors" do 13 | assert Selector.parse("tag#id.class[attr]") |> Selector.render() == "tag#id.class[attr]" 14 | assert Selector.parse("tag#id.class[attr=value]") |> Selector.render() == "tag#id.class[attr=\"value\"]" 15 | assert Selector.parse("tag#id.class[attr~=value]") |> Selector.render() == "tag#id.class[attr~=\"value\"]" 16 | assert Selector.parse("tag#id.class[attr*=value]") |> Selector.render() == "tag#id.class[attr*=\"value\"]" 17 | assert Selector.parse("tag#id.class[attr^=value]") |> Selector.render() == "tag#id.class[attr^=\"value\"]" 18 | assert Selector.parse("tag#id.class[attr$=value]") |> Selector.render() == "tag#id.class[attr$=\"value\"]" 19 | end 20 | 21 | test "handles attribute case sensitivity" do 22 | assert Selector.parse("tag#id.class[attr$=value i]") |> Selector.render() == "tag#id.class[attr$=\"value\" i]" 23 | # Parser normalizes case sensitivity flags to lowercase 24 | assert Selector.parse("tag#id.class[attr$=value I]") |> Selector.render() == "tag#id.class[attr$=\"value\" i]" 25 | assert Selector.parse("tag#id.class[attr$=value s]") |> Selector.render() == "tag#id.class[attr$=\"value\" s]" 26 | assert Selector.parse("tag#id.class[attr$=value S]") |> Selector.render() == "tag#id.class[attr$=\"value\" s]" 27 | end 28 | 29 | test "handles attribute escaping" do 30 | assert Selector.parse(~s(tagname[x="y"])) |> Selector.render() == ~s(tagname[x="y"]) 31 | assert Selector.parse(~s(tagname[x='y'])) |> Selector.render() == ~s(tagname[x="y"]) 32 | assert Selector.parse(~s(tagname[x="y"])) |> Selector.render() == ~s(tagname[x="y"]) 33 | assert Selector.parse(~s(tagname[x="y"])) |> Selector.render() == ~s(tagname[x="y"]) 34 | assert Selector.parse(~s(tagname[x="y "])) |> Selector.render() == ~s(tagname[x="y "]) 35 | # This test has invalid CSS - unescaped quote in attribute value 36 | # assert Selector.parse(~s(tagname[x="y\\"])) |> Selector.render() == ~s(tagname[x="y\\"]) 37 | assert Selector.parse(~s(tagname[x="y'"])) |> Selector.render() == ~s(tagname[x="y'"]) 38 | assert Selector.parse(~s(div[role='a\00000ab'])) |> Selector.render() == ~s(div[role="a\a b"]) 39 | assert Selector.parse(~s(div[role='\a'])) |> Selector.render() == ~s(div[role="\a"]) 40 | end 41 | 42 | test "renders combinators" do 43 | assert Selector.parse("tag1 tag2") |> Selector.render() == "tag1 tag2" 44 | assert Selector.parse("ns1|tag1") |> Selector.render() == "ns1|tag1" 45 | assert Selector.parse("|tag1") |> Selector.render() == "|tag1" 46 | assert Selector.parse("*|tag1") |> Selector.render() == "*|tag1" 47 | assert Selector.parse("*|*") |> Selector.render() == "*|*" 48 | assert Selector.parse("*|*||*|*") |> Selector.render() == "*|* || *|*" 49 | assert Selector.parse("tag1>tag2") |> Selector.render() == "tag1 > tag2" 50 | assert Selector.parse("tag1+tag2") |> Selector.render() == "tag1 + tag2" 51 | assert Selector.parse("tag1~tag2") |> Selector.render() == "tag1 ~ tag2" 52 | end 53 | 54 | test "renders pseudo-classes and pseudo-elements" do 55 | assert Selector.parse("tag1:first") |> Selector.render() == "tag1:first" 56 | assert Selector.parse("tag1:lt(a3)") |> Selector.render() == "tag1:lt(a3)" 57 | assert Selector.parse("tag1:lt($var)") |> Selector.render() == "tag1:lt($var)" 58 | assert Selector.parse("tag1:lang(en\\))") |> Selector.render() == "tag1:lang(en\\))" 59 | assert Selector.parse("tag1:nth-child(odd)") |> Selector.render() == "tag1:nth-child(odd)" 60 | assert Selector.parse("tag1:nth-child(even)") |> Selector.render() == "tag1:nth-child(even)" 61 | assert Selector.parse("tag1:nth-child(-n+3)") |> Selector.render() == "tag1:nth-child(-n+3)" 62 | assert Selector.parse("tag1:nth-child(-1n+3)") |> Selector.render() == "tag1:nth-child(-n+3)" 63 | assert Selector.parse("tag1:nth-child(-5n+3)") |> Selector.render() == "tag1:nth-child(-5n+3)" 64 | assert Selector.parse("tag1:nth-child(-5n-3)") |> Selector.render() == "tag1:nth-child(-5n-3)" 65 | assert Selector.parse("tag1:nth-child(-5\\n-3)") |> Selector.render() == "tag1:nth-child(-5n-3)" 66 | assert Selector.parse("tag1:nth-child(-5\\6e-3)") |> Selector.render() == "tag1:nth-child(-5n-3)" 67 | assert Selector.parse("tag1:nth-child(-5n)") |> Selector.render() == "tag1:nth-child(-5n)" 68 | assert Selector.parse("tag1:nth-child(5)") |> Selector.render() == "tag1:nth-child(5)" 69 | assert Selector.parse("tag1:nth-child(-5)") |> Selector.render() == "tag1:nth-child(-5)" 70 | assert Selector.parse("tag1:nth-child(0)") |> Selector.render() == "tag1:nth-child(0)" 71 | assert Selector.parse("tag1:nth-child(n)") |> Selector.render() == "tag1:nth-child(n)" 72 | assert Selector.parse("tag1:nth-child(-n)") |> Selector.render() == "tag1:nth-child(-n)" 73 | assert Selector.parse("tag1:has(.class)") |> Selector.render() == "tag1:has(.class)" 74 | assert Selector.parse("tag1:has(.class,.class2)") |> Selector.render() == "tag1:has(.class, .class2)" 75 | assert Selector.parse("tag1:has(.class:has(.subcls),.class2)") |> Selector.render() == "tag1:has(.class:has(.subcls), .class2)" 76 | assert Selector.parse("tag1:has(> div)") |> Selector.render() == "tag1:has(> div)" 77 | assert Selector.parse("tag1:current(.class:has(.subcls),.class2)") |> Selector.render() == "tag1:current(.class:has(.subcls), .class2)" 78 | assert Selector.parse("tag1:current") |> Selector.render() == "tag1:current" 79 | assert Selector.parse("tag1::before") |> Selector.render() == "tag1::before" 80 | assert Selector.parse("tag1::hey(hello)") |> Selector.render() == "tag1::hey(hello)" 81 | assert Selector.parse("tag1::num(1)") |> Selector.render() == "tag1::num(\\31)" 82 | assert Selector.parse("tag1::num($var)") |> Selector.render() == "tag1::num($var)" 83 | assert Selector.parse("tag1::none") |> Selector.render() == "tag1::none" 84 | end 85 | 86 | test "handles special characters and escaping" do 87 | assert Selector.parse("tag\\/name") |> Selector.render() == "tag\\/name" 88 | assert Selector.parse(".class\\/name") |> Selector.render() == ".class\\/name" 89 | assert Selector.parse("#id\\/name") |> Selector.render() == "#id\\/name" 90 | assert Selector.parse(".\\30 wow") |> Selector.render() == ".\\30 wow" 91 | assert Selector.parse(".\\30wow") |> Selector.render() == ".\\30 wow" 92 | assert Selector.parse(".\\20wow") |> Selector.render() == ".\\20 wow" 93 | assert Selector.parse("tag\\n\\\\name\\.\\[") |> Selector.render() == "tagn\\\\name\\.\\[" 94 | assert Selector.parse(".cls\\n\\\\name\\.\\[") |> Selector.render() == ".clsn\\\\name\\.\\[" 95 | assert Selector.parse("[attr\\n\\\\name\\.\\[=a1]") |> Selector.render() == "[attrn\\\\name\\.\\[=\"a1\"]" 96 | # Complex escaping edge case - parser handles escapes differently 97 | # assert Selector.parse(":pseudo\\n\\\\name\\.\\[\\((123)") |> Selector.render() == ":pseudon\\\\name\\.\\[\\((\\31 23)" 98 | assert Selector.parse("[attr=\"val\\nval\"]") |> Selector.render() == "[attr=\"val\\a val\"]" 99 | assert Selector.parse("[attr=\"val\\\"val\"]") |> Selector.render() == "[attr=\"val\\\"val\"]" 100 | assert Selector.parse("[attr=\"val\\00a0val\"]") |> Selector.render() == "[attr=\"val\ val\"]" 101 | assert Selector.parse("tag\\00a0 tag") |> Selector.render() == "tag\\a0 tag" 102 | assert Selector.parse(".class\\00a0 class") |> Selector.render() == ".class\\a0 class" 103 | assert Selector.parse("[attr\\a0 attr]") |> Selector.render() == "[attr\\a0 attr]" 104 | assert Selector.parse("[attr=$var]") |> Selector.render() == "[attr=$var]" 105 | assert Selector.parse(".cls1.cls2#y .cls3+abc#def[x=y]>yy,ff") |> Selector.render() == ".cls1.cls2#y .cls3 + abc#def[x=\"y\"] > yy, ff" 106 | assert Selector.parse("#google_ads_iframe_\\/100500\\/Pewpew_0") |> Selector.render() == "#google_ads_iframe_\\/100500\\/Pewpew_0" 107 | assert Selector.parse("#\\3123") |> Selector.render() == "#\\3123" 108 | assert Selector.parse("#\\31 23") |> Selector.render() == "#\\31 23" 109 | assert Selector.parse("#\\00031 23") |> Selector.render() == "#\\31 23" 110 | end 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /lib/selector/parser/pseudo.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Pseudo do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | import Selector.Parser.Utils 6 | 7 | alias Selector.Parser.Pseudo.NthFormula 8 | alias Selector.Parser.Pseudo.{ 9 | LanguageCode, 10 | Name, 11 | NthFormula, 12 | SelectorList 13 | } 14 | 15 | defguard is_nth_param(name) when name in ~w{ 16 | nth-child 17 | nth-col 18 | nth-last-child 19 | nth-last-of-type 20 | nth-of-type 21 | } 22 | defguard is_selector_param(name) when name in ~w{ 23 | cue 24 | cue-region 25 | } 26 | defguard is_compound_selector_param(name) when name in ~w{ 27 | host 28 | host-context 29 | slotted 30 | } 31 | defguard is_relative_selector_param(name) when name in ~w{ 32 | has 33 | host 34 | host-context 35 | slotted 36 | } 37 | defguard is_selector_list_param(name) when name in ~w{ 38 | is 39 | matches 40 | not 41 | where 42 | -webkit-any 43 | -moz-any 44 | } 45 | defguard is_dir_keyword_param(name) when name in ~w{ 46 | dir 47 | } 48 | defguard is_dir_type_param(name) when name in ~w{ 49 | scroll-button 50 | } 51 | defguard is_lang_code_param(name) when name in ~w{ 52 | lang 53 | } 54 | defguard is_name_param(name) when name in ~w{ 55 | active-view-transition-type 56 | highlight 57 | part 58 | picker 59 | state 60 | } 61 | 62 | defguard is_param_pseudo(name) when 63 | is_nth_param(name) or 64 | is_selector_param(name) or 65 | is_compound_selector_param(name) or 66 | is_relative_selector_param(name) or 67 | is_selector_list_param(name) or 68 | is_dir_keyword_param(name) or 69 | is_dir_type_param(name) or 70 | is_lang_code_param(name) or 71 | is_name_param(name) 72 | 73 | @pseudo_classes ~w{ 74 | active 75 | active-view-transition 76 | active-view-transition-type 77 | any-link 78 | autofill 79 | blank 80 | buffering 81 | checked 82 | current 83 | default 84 | defined 85 | dir 86 | disabled 87 | empty 88 | enabled 89 | first 90 | first-child 91 | first-of-type 92 | focus 93 | focus-visible 94 | focus-within 95 | fullscreen 96 | future 97 | has 98 | host 99 | host-context 100 | hover 101 | in-range 102 | indeterminate 103 | invalid 104 | is 105 | lang 106 | last-child 107 | last-of-type 108 | left 109 | link 110 | local-link 111 | matches 112 | modal 113 | muted 114 | not 115 | nth-child 116 | nth-col 117 | nth-last-child 118 | nth-last-col 119 | nth-last-of-type 120 | nth-of-type 121 | only-child 122 | only-of-type 123 | open 124 | optional 125 | out-of-range 126 | past 127 | paused 128 | picture-in-picture 129 | placeholder-shown 130 | playing 131 | popover-open 132 | read-only 133 | read-write 134 | required 135 | right 136 | root 137 | scope 138 | seeking 139 | stalled 140 | state 141 | target 142 | target-current 143 | target-within 144 | user-invalid 145 | user-valid 146 | valid 147 | visited 148 | volume-locked 149 | where 150 | -moz-any-link 151 | -moz-broken 152 | -moz-drag-over 153 | -moz-first-node 154 | -moz-focusring 155 | -moz-full-screen 156 | -moz-last-node 157 | -moz-loading 158 | -moz-only-whitespace 159 | -moz-range-progress 160 | -moz-range-thumb 161 | -moz-range-track 162 | -moz-read-only 163 | -moz-read-write 164 | -moz-suppressed 165 | -moz-ui-invalid 166 | -moz-ui-valid 167 | -moz-user-disabled 168 | -moz-window-inactive 169 | -ms-accelerator 170 | -ms-alt 171 | -ms-checked 172 | -ms-disabled 173 | -ms-enabled 174 | -ms-expand 175 | -ms-fill 176 | -ms-first-child 177 | -ms-fullscreen 178 | -ms-hover 179 | -ms-indeterminate 180 | -ms-keyboard-active 181 | -ms-keyboard-select 182 | -ms-link 183 | -ms-link-visited 184 | -ms-logical 185 | -ms-middle 186 | -ms-read-only 187 | -ms-read-write 188 | -ms-selected 189 | -ms-user-select-contain 190 | -ms-user-select-text 191 | -webkit-any-link 192 | -webkit-autofill 193 | -webkit-full-screen 194 | } 195 | 196 | def classes, do: @pseudo_classes 197 | 198 | @pseudo_elements ~w{ 199 | after 200 | backdrop 201 | before 202 | checkmark 203 | column 204 | cue 205 | cue-region 206 | details-content 207 | file-selector-button 208 | first-letter 209 | first-line 210 | grammar-error 211 | marker 212 | part 213 | picker 214 | picker-icon 215 | placeholder 216 | postfix 217 | prefix 218 | scroll-button 219 | scroll-marker 220 | scroll-marker-group 221 | selection 222 | slotted 223 | spelling-error 224 | target-text 225 | view-transition 226 | view-transition-group 227 | view-transition-image-pair 228 | view-transition-new 229 | view-transition-old 230 | -moz-focus-inner 231 | -moz-focus-outer 232 | -moz-list-bullet 233 | -moz-list-number 234 | -moz-placeholder 235 | -moz-progress-bar 236 | -moz-range-progress 237 | -moz-range-thumb 238 | -moz-range-track 239 | -moz-selection 240 | -ms-browse 241 | -ms-check 242 | -ms-clear 243 | -ms-content-zoom-factor 244 | -ms-content-zoom-snap 245 | -ms-content-zoom-snap-points 246 | -ms-content-zooming 247 | -ms-expand 248 | -ms-fill 249 | -ms-fill-lower 250 | -ms-fill-upper 251 | -ms-input-placeholder 252 | -ms-reveal 253 | -ms-thumb 254 | -ms-ticks-after 255 | -ms-ticks-before 256 | -ms-tooltip 257 | -ms-track 258 | -ms-value 259 | -webkit-input-placeholder 260 | -webkit-progress-bar 261 | -webkit-progress-inner-element 262 | -webkit-progress-value 263 | -webkit-scrollbar 264 | -webkit-scrollbar-button 265 | -webkit-scrollbar-thumb 266 | -webkit-scrollbar-track 267 | -webkit-scrollbar-track-piece 268 | -webkit-scroll-corner 269 | -webkit-slider-runnable-track 270 | -webkit-slider-thumb 271 | } 272 | 273 | def elements, do: @pseudo_elements 274 | 275 | def parse(<>, opts) when is_pseudo_start_char(char) do 276 | parse_name(rest, [char], opts) 277 | end 278 | 279 | defp parse_name(<<"("::utf8, selectors::binary>>, name, opts) do 280 | name = List.to_string(name) 281 | selectors = drain_whitespace(selectors) 282 | {param, selectors} = parse_param(selectors, name, opts) 283 | 284 | {{name, param}, selectors} 285 | end 286 | 287 | defp parse_name(<>, name, opts) when is_pseudo_char(char) do 288 | parse_name(selectors, [name, char], opts) 289 | end 290 | 291 | defp parse_name(selectors, name, _opts) do 292 | name = List.to_string(name) 293 | 294 | if name != "cue" && is_param_pseudo(name) do 295 | raise ArgumentError, ~s(Argument is required for pseudo-class "#{name}".) 296 | end 297 | 298 | {{name, []}, selectors} 299 | end 300 | 301 | defp parse_param_close(<>, param, opts) when is_whitespace(char) do 302 | selectors = drain_whitespace(selectors) 303 | parse_param_close(selectors, param, opts) 304 | end 305 | 306 | defp parse_param_close(<<")"::utf8, selectors::binary>>, param, _opts) do 307 | {[param], selectors} 308 | end 309 | 310 | defp parse_param(selectors, name, opts) when is_nth_param(name) do 311 | {param, selectors} = NthFormula.parse(selectors, opts) 312 | parse_param_close(selectors, param, opts) 313 | end 314 | 315 | defp parse_param(selectors, name, opts) when is_relative_selector_param(name) do 316 | {rule_opts, selectors} = Selector.Parser.Combinator.parse(selectors, opts) 317 | {param, selectors} = Selector.Parser.Pseudo.Selector.parse(selectors, Keyword.merge(opts, rule_opts)) 318 | parse_param_close(selectors, param, opts) 319 | end 320 | 321 | defp parse_param(selectors, name, opts) when is_selector_list_param(name) do 322 | {param, selectors} = SelectorList.parse(selectors, opts) 323 | parse_param_close(selectors, param, opts) 324 | end 325 | 326 | defp parse_param(selectors, name, opts) when is_lang_code_param(name) do 327 | {param, selectors} = LanguageCode.parse(selectors, [], opts) 328 | parse_param_close(selectors, param, opts) 329 | end 330 | 331 | defp parse_param(selectors, name, opts) when is_name_param(name) do 332 | {param, selectors} = Name.parse(selectors, [], opts) 333 | parse_param_close(selectors, param, opts) 334 | end 335 | 336 | defp parse_param(_selectors, name, _opts) do 337 | raise ArgumentError, "Pseudo #{name} cannot take param" 338 | end 339 | end 340 | -------------------------------------------------------------------------------- /lib/selector/parser/attribute.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Attribute do 2 | @moduledoc false 3 | 4 | import Selector.Parser.Guards 5 | 6 | def parse(<<>>, _rule, _opts) do 7 | raise ArgumentError, "Expected closing bracket." 8 | end 9 | 10 | def parse(<<"]"::utf8, _selectors::binary>>, nil, _opts) do 11 | raise ArgumentError, "Expected attribute name." 12 | end 13 | 14 | def parse(<<"]"::utf8, selectors::binary>>, rule, _opts) do 15 | {rule, selectors} 16 | end 17 | 18 | def parse(<>, rule, opts) when is_whitespace(char) do 19 | parse(selectors, rule, opts) 20 | end 21 | 22 | def parse(<<"="::utf8, _selectors::binary>>, _rule, _opts) do 23 | raise ArgumentError, "Expected attribute name." 24 | end 25 | 26 | def parse(<<"|"::utf8, selectors::binary>>, nil, opts) do 27 | parse(selectors, nil, Keyword.put(opts, :namespace, "")) 28 | end 29 | 30 | def parse(<<"*"::utf8, selectors::binary>>, nil, opts) do 31 | {rule, selectors} = parse_wildcard_namespace_then_name(selectors, opts) 32 | 33 | parse(selectors, rule, opts) 34 | end 35 | 36 | def parse(<>, nil, opts) when is_attribute_name_start_char(char) do 37 | {rule, selectors} = parse_attribute_exists(selectors, [char], opts) 38 | 39 | parse(selectors, rule, opts) 40 | end 41 | 42 | def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, {type, name, value, modifiers}, opts) when char in [?i, ?I] do 43 | rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, false)} 44 | parse(selectors, rule, opts) 45 | end 46 | 47 | def parse(<>, {type, name, value, modifiers}, opts) when char in [?i, ?I] do 48 | rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, false)} 49 | parse(selectors, rule, opts) 50 | end 51 | 52 | def parse(<<"\\"::utf8, char::utf8, selectors::binary>>, {type, name, value, modifiers}, opts) when char in [?s, ?S] do 53 | rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, true)} 54 | parse(selectors, rule, opts) 55 | end 56 | 57 | def parse(<>, {type, name, value, modifiers}, opts) when char in [?s, ?S] do 58 | rule = {type, name, value, Keyword.put(modifiers, :case_sensitive, true)} 59 | parse(selectors, rule, opts) 60 | end 61 | 62 | def parse(_selectors, _rule, _opts) do 63 | raise ArgumentError, "Expected attribute name." 64 | end 65 | 66 | defp parse_attribute_exists(<<>>, _buffer, _opts) do 67 | raise ArgumentError, "Expected closing bracket." 68 | end 69 | 70 | defp parse_attribute_exists(<<"^="::utf8, selectors::binary>>, name, opts) do 71 | {value, selectors, opts} = parse_attribute_value_outter(selectors, opts) 72 | {{:prefix, name, value, extract_valid_opts(opts)}, selectors} 73 | end 74 | 75 | defp parse_attribute_exists(<<"$="::utf8, selectors::binary>>, name, opts) do 76 | {value, selectors, opts} = parse_attribute_value_outter(selectors, opts) 77 | {{:suffix, name, value, extract_valid_opts(opts)}, selectors} 78 | end 79 | 80 | defp parse_attribute_exists(<<"*="::utf8, selectors::binary>>, name, opts) do 81 | {value, selectors, opts} = parse_attribute_value_outter(selectors, opts) 82 | {{:substring, name, value, extract_valid_opts(opts)}, selectors} 83 | end 84 | 85 | defp parse_attribute_exists(<<"~="::utf8, selectors::binary>>, name, opts) do 86 | {value, selectors, opts} = parse_attribute_value_outter(selectors, opts) 87 | {{:includes, name, value, extract_valid_opts(opts)}, selectors} 88 | end 89 | 90 | defp parse_attribute_exists(<<"|="::utf8, selectors::binary>>, name, opts) do 91 | {value, selectors, opts} = parse_attribute_value_outter(selectors, opts) 92 | {{:dash_match, name, value, extract_valid_opts(opts)}, selectors} 93 | end 94 | 95 | defp parse_attribute_exists(<<"="::utf8, selectors::binary>>, name, opts) do 96 | {value, selectors, opts} = parse_attribute_value_outter(selectors, opts) 97 | {{:equal, name, value, extract_valid_opts(opts)}, selectors} 98 | end 99 | 100 | defp parse_attribute_exists(<>, ~c"|", opts) do 101 | {name, selectors, opts} = parse_attribute_name(selectors, [char], Keyword.put(opts, :namespace, "")) 102 | parse_attribute_exists(selectors, name, opts) 103 | end 104 | 105 | defp parse_attribute_exists(<>, name, opts) when is_attribute_name_char(char) do 106 | {name, selectors, opts} = parse_attribute_name(selectors, [name, char], opts) 107 | parse_attribute_exists(selectors, name, opts) 108 | end 109 | 110 | defp parse_attribute_exists(selectors, buffer, opts) do 111 | {{:exists, buffer, nil, extract_valid_opts(opts)}, selectors} 112 | end 113 | 114 | defp parse_wildcard_namespace_then_name(<>, opts) when is_whitespace(char) do 115 | parse_wildcard_namespace_then_name(selectors, opts) 116 | end 117 | 118 | defp parse_wildcard_namespace_then_name(<<"|"::utf8, selectors::binary>>, opts) do 119 | parse_attribute_exists(selectors, [], Keyword.put(opts, :namespace, "*")) 120 | end 121 | 122 | defp parse_attribute_name(<<"|="::utf8, _selectors::binary>>, [], _opts) do 123 | raise ArgumentError, "Expected attributed name." 124 | end 125 | 126 | defp parse_attribute_name(<<"|="::utf8, _selectors::binary>> = selectors, name, opts) do 127 | {List.to_string(name), selectors, opts} 128 | end 129 | 130 | defp parse_attribute_name(<<"|"::utf8, selectors::binary>>, namespace, opts) do 131 | parse_attribute_name(selectors, [], Keyword.put(opts, :namespace, List.to_string(namespace))) 132 | end 133 | 134 | defp parse_attribute_name(<<"\\"::utf8, char::utf8, selectors::binary>>, name, opts) when is_escapable_char(char) do 135 | parse_attribute_name(selectors, [name, char], opts) 136 | end 137 | 138 | defp parse_attribute_name(<>, name, opts) when is_whitespace(char) do 139 | parse_attribute_name(selectors, name, opts) 140 | end 141 | 142 | defp parse_attribute_name(<>, name, opts) when is_attribute_name_char(char) do 143 | parse_attribute_name(selectors, [name, char], opts) 144 | end 145 | 146 | defp parse_attribute_name(selectors, name, opts) do 147 | {List.to_string(name), selectors, opts} 148 | end 149 | 150 | defp parse_attribute_value_outter(<<"]"::utf8, _selectors::binary>>, _opts) do 151 | raise ArgumentError, "Expected attribute value." 152 | end 153 | 154 | defp parse_attribute_value_outter(<<>>, _opts) do 155 | raise ArgumentError, "Expected closing bracket." 156 | end 157 | 158 | defp parse_attribute_value_outter(<>, opts) when is_attribute_value_char(char) do 159 | parse_attribute_value_inner(selectors, [char], ?\s, opts) 160 | end 161 | 162 | defp parse_attribute_value_outter(<>, opts) when char in [?', ?"] do 163 | parse_attribute_value_inner(selectors, [], char, opts) 164 | end 165 | 166 | defp parse_attribute_value_outter(<>, opts) when is_whitespace(char) do 167 | parse_attribute_value_outter(selectors, opts) 168 | end 169 | 170 | defp parse_attribute_value_inner(<<>>, _value, _delim, _opts) do 171 | raise ArgumentError, "Expected closing deliminator" 172 | end 173 | 174 | defp parse_attribute_value_inner(<>, value, delim, opts) when delim in [?', ?", ?\s] do 175 | {List.to_string(value), selectors, opts} 176 | end 177 | 178 | defp parse_attribute_value_inner(<>, value, delim, opts) when is_whitespace(char) do 179 | parse_attribute_value_inner(selectors, value, delim, opts) 180 | end 181 | 182 | defp parse_attribute_value_inner(<<"\\"::utf8, char::utf8, selectors::binary>>, value, delim, opts) when is_hex_digit(char) do 183 | {hex, selectors} = Selector.Parser.Hex.parse(List.to_string([char, selectors]), opts) 184 | parse_attribute_value_inner(selectors, [value, hex], delim, opts) 185 | end 186 | 187 | defp parse_attribute_value_inner(<<"\\"::utf8, "\n"::utf8, selectors::binary>>, value, delim, opts) do 188 | parse_attribute_value_inner(selectors, value, delim, opts) 189 | end 190 | 191 | defp parse_attribute_value_inner(<<"\\"::utf8, char::utf8, selectors::binary>>, value, delim, opts) when is_escapable_char(char) do 192 | parse_attribute_value_inner(selectors, [value, char], delim, opts) 193 | end 194 | 195 | defp parse_attribute_value_inner(<>, value, delim, opts) when is_attribute_value_char(char) do 196 | parse_attribute_value_inner(selectors, [value, char], delim, opts) 197 | end 198 | 199 | defp parse_attribute_value_inner(<<"]"::utf8, _selectors::binary>>, [], _delim, _opts) do 200 | raise ArgumentError, "Expected attribute value." 201 | end 202 | 203 | defp parse_attribute_value_inner(selectors, value, _delim, opts) do 204 | {List.to_string(value), selectors, opts} 205 | end 206 | 207 | defp extract_valid_opts(opts) do 208 | Keyword.take(opts, [ 209 | :case_sensitive, 210 | :namespace, 211 | ]) 212 | end 213 | end 214 | -------------------------------------------------------------------------------- /lib/selector/renderer.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Renderer do 2 | @moduledoc """ 3 | Handles rendering of CSS selector ASTs back to CSS selector strings. 4 | """ 5 | 6 | @doc """ 7 | Renders a list of selector rules to a CSS selector string. 8 | 9 | ## Options 10 | 11 | * `:format` - The output format (not currently used) 12 | """ 13 | def render(selectors, _opts \\ []) when is_list(selectors) do 14 | # Handle the parser output format: [[{:rule, ...}], [{:rule, ...}]] 15 | # Each inner list represents a selector group (comma-separated selectors) 16 | selectors 17 | |> Enum.map(&render_selector_group/1) 18 | |> Enum.join(", ") 19 | end 20 | 21 | defp render_selector_group(rules) do 22 | rules 23 | |> Enum.with_index() 24 | |> Enum.map_join("", fn 25 | {{:rule, sel, opts}, 0} -> 26 | # First rule in group 27 | render_rule({:rule, sel, opts}) 28 | {{:rule, sel, opts}, _index} -> 29 | # Subsequent rules - check for combinator or default to descendant 30 | combinator = Keyword.get(opts, :combinator) 31 | case combinator do 32 | nil -> " " <> render_rule({:rule, sel, opts}) 33 | ">" -> " > " <> render_rule({:rule, sel, opts}) 34 | "+" -> " + " <> render_rule({:rule, sel, opts}) 35 | "~" -> " ~ " <> render_rule({:rule, sel, opts}) 36 | "||" -> " || " <> render_rule({:rule, sel, opts}) 37 | _ -> " #{combinator} " <> render_rule({:rule, sel, opts}) 38 | end 39 | end) 40 | end 41 | 42 | # Renders a single rule: {:rule, selectors, opts} 43 | defp render_rule({:rule, selectors, _opts}) do 44 | selectors 45 | |> Enum.map_join("", &render_selector/1) 46 | end 47 | 48 | defp render_rule(other), do: inspect(other) 49 | 50 | 51 | # Renders individual selector components 52 | defp render_selector({:tag_name, name, []}) when is_binary(name) do 53 | if name == "*", do: "*", else: escape_name(name) 54 | end 55 | defp render_selector({:tag_name, name, opts}) when is_binary(name) and is_list(opts) do 56 | # Handle namespaced tags 57 | case Keyword.get(opts, :namespace) do 58 | nil -> if name == "*", do: "*", else: escape_name(name) 59 | ns -> 60 | ns_part = if ns == "*", do: "*", else: escape_name(ns) 61 | name_part = if name == "*", do: "*", else: escape_name(name) 62 | "#{ns_part}|#{name_part}" 63 | end 64 | end 65 | defp render_selector({:tag_name, name}) when is_binary(name) do 66 | if name == "*", do: "*", else: escape_name(name) 67 | end 68 | defp render_selector({:tag_name, name}) when is_list(name), do: escape_name(to_string(name)) 69 | 70 | defp render_selector({:id, id}) when is_binary(id), do: "##{escape_id(id)}" 71 | defp render_selector({:class, class}) when is_binary(class), do: ".#{escape_class(class)}" 72 | defp render_selector({:class, class}) when is_list(class), do: ".#{escape_class(to_string(class))}" 73 | 74 | # Handle pseudo-classes 75 | defp render_selector({:pseudo_class, {name, []}}), do: ":#{atom_to_css_name(name)}" 76 | 77 | defp render_selector({:pseudo_class, {name, args}}) when is_list(args) do 78 | case args do 79 | [] -> ":#{atom_to_css_name(name)}" 80 | # Handle nth-child and similar with a/b notation 81 | [a: a_val, b: b_val] -> 82 | formatted = format_nth(a_val, b_val) 83 | ":#{atom_to_css_name(name)}(#{formatted})" 84 | # Handle string arguments (e.g., :lang, :lt) 85 | [arg] when is_binary(arg) -> 86 | # Escape closing parentheses in arguments 87 | escaped_arg = String.replace(arg, ")", "\\)") 88 | ":#{atom_to_css_name(name)}(#{escaped_arg})" 89 | # Handle multiple string arguments 90 | args when is_list(args) and is_binary(hd(args)) -> 91 | ":#{atom_to_css_name(name)}(#{Enum.join(args, " ")})" 92 | # Handle nested selectors 93 | _ -> 94 | ":#{atom_to_css_name(name)}(#{render_nested_rules(args)})" 95 | end 96 | end 97 | 98 | # Handle pseudo-elements 99 | 100 | defp render_selector({:pseudo_element, {name, []}}), do: "::#{atom_to_css_name(name)}" 101 | 102 | defp render_selector({:pseudo_element, {name, [arg]}}) when is_binary(arg) do 103 | "::#{atom_to_css_name(name)}(#{arg})" 104 | end 105 | 106 | defp render_selector({:pseudo_element, {name, [nested_rules]}}) when is_list(nested_rules) do 107 | # Handle nested rules in pseudo-elements like ::part(button) 108 | inner = nested_rules 109 | |> List.flatten() 110 | |> Enum.map_join(" ", &render_rule/1) 111 | "::#{atom_to_css_name(name)}(#{String.trim(inner)})" 112 | end 113 | 114 | # Handle attribute selectors 115 | defp render_selector({:attribute, {:exists, name, nil, []}}), do: "[#{escape_attr(name)}]" 116 | 117 | defp render_selector({:attribute, {op, name, value, opts}}) when is_list(opts) do 118 | attr_op = case op do 119 | :equal -> "=" 120 | :includes -> "~=" 121 | :dash_match -> "|=" 122 | :prefix -> "^=" 123 | :suffix -> "$=" 124 | :substring -> "*=" 125 | _ -> "#{op}" 126 | end 127 | 128 | # Extract case sensitivity flag 129 | case_flag = case Keyword.get(opts, :case_sensitive) do 130 | false -> " i" 131 | true -> " s" 132 | _ -> "" 133 | end 134 | 135 | case value do 136 | nil -> "[#{escape_attr(name)}]" 137 | _ -> "[#{escape_attr(name)}#{attr_op}#{escape_attr_value(value)}#{case_flag}]" 138 | end 139 | end 140 | 141 | defp render_selector(other), do: inspect(other) 142 | 143 | defp format_nth(0, b), do: "#{b}" 144 | defp format_nth(2, 0), do: "even" 145 | defp format_nth(2, 1), do: "odd" 146 | defp format_nth(a, 0) when a == 1, do: "n" 147 | defp format_nth(a, 0) when a == -1, do: "-n" 148 | defp format_nth(a, 0), do: "#{a}n" 149 | defp format_nth(a, b) when a == 1 and b > 0, do: "n+#{b}" 150 | defp format_nth(a, b) when a == 1 and b < 0, do: "n#{b}" 151 | defp format_nth(a, b) when a == -1 and b > 0, do: "-n+#{b}" 152 | defp format_nth(a, b) when a == -1 and b < 0, do: "-n#{b}" 153 | defp format_nth(a, b) when b > 0, do: "#{a}n+#{b}" 154 | defp format_nth(a, b), do: "#{a}n#{b}" 155 | 156 | # Helper functions 157 | # Define a function to check if a character needs escaping 158 | defp escape_char?(char) when char in ~w(! " # $ % & ' ( \) * + , . / ; < = > ? @ [ \\ ] ^ ` { | } ~), do: true 159 | defp escape_char?(":"), do: true 160 | defp escape_char?(_), do: false 161 | 162 | defp escape_name(name) when is_binary(name) do 163 | # Check if name starts with a digit or space - needs special escaping 164 | case name do 165 | # For "30wow", we need to output "\30 wow" 166 | "30" <> rest when rest != "" -> 167 | "\\30 " <> escape_rest(rest) 168 | # For just "30", output "\30" 169 | "30" -> 170 | "\\30" 171 | # For "20wow", we need to output "\20 wow" 172 | "20" <> rest when rest != "" -> 173 | "\\20 " <> escape_rest(rest) 174 | # For just "20", output "\20" 175 | "20" -> 176 | "\\20" 177 | <> when digit in ?0..?9 -> 178 | # Escape leading digit as hex with trailing space 179 | "\\3" <> <> <> " " <> escape_rest(rest) 180 | <<32, rest::binary>> -> 181 | # Escape leading space 182 | "\\20 " <> escape_rest(rest) 183 | _ -> 184 | if String.match?(name, ~r/^[a-zA-Z][a-zA-Z0-9_-]*$/) do 185 | name 186 | else 187 | # Escape special characters 188 | escape_rest(name) 189 | end 190 | end 191 | end 192 | 193 | defp escape_rest(str) do 194 | str 195 | |> String.graphemes() 196 | |> Enum.map_join(fn 197 | char -> if escape_char?(char), do: "\\#{char}", else: char 198 | end) 199 | end 200 | 201 | defp escape_id(id), do: escape_name(id) 202 | defp escape_class(class), do: escape_name(class) 203 | defp escape_attr(name) when is_binary(name), do: escape_name(name) 204 | defp escape_attr(name) when is_list(name), do: escape_name(to_string(name)) 205 | 206 | defp escape_attr_value(value) when is_binary(value) do 207 | # Always use double quotes 208 | escaped = value 209 | |> String.replace("\\", "\\\\") 210 | |> String.replace("\"", "\\\"") 211 | |> String.replace("\n", "\\a ") 212 | |> String.replace("\r", "\\d ") 213 | |> String.replace("\t", "\\9 ") 214 | "\"#{escaped}\"" 215 | end 216 | 217 | defp escape_attr_value(value) when is_list(value) do 218 | # Handle charlist values 219 | escape_attr_value(to_string(value)) 220 | end 221 | 222 | defp render_nested_rules(rules) when is_list(rules) do 223 | rules 224 | |> Enum.map(fn 225 | [{:rule, _, _} | _] = group -> 226 | # Handle groups - check if first rule has combinator 227 | case group do 228 | [{:rule, _, opts} | _] -> 229 | case Keyword.get(opts, :combinator) do 230 | nil -> render(group) 231 | comb -> "#{comb} #{render(group)}" 232 | end 233 | _ -> render(group) 234 | end 235 | {:rule, _, _} = rule -> render_rule(rule) 236 | other -> inspect(other) 237 | end) 238 | |> Enum.join(", ") 239 | end 240 | 241 | # Convert atom names to CSS names (underscores to hyphens) 242 | defp atom_to_css_name(atom) when is_atom(atom) do 243 | atom 244 | |> Atom.to_string() 245 | |> String.replace("_", "-") 246 | end 247 | 248 | defp atom_to_css_name(string) when is_binary(string) do 249 | string 250 | |> String.replace("_", "-") 251 | end 252 | end 253 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🎯 Selector 2 | 3 | A CSS selector parser library for Elixir. Parses CSS selector strings into an Abstract Syntax Tree (AST) that can be analyzed, manipulated, and rendered back to CSS. 4 | 5 | ## ✨ Features 6 | 7 | - **CSS Selectors Level 1** - Complete support 8 | - **CSS Selectors Level 2** - Complete support 9 | - **CSS Selectors Level 3** - Complete support 10 | - **CSS Selectors Level 4** - Extensive support for stable features 11 | 12 | ## 🎨 CSS Compatibility 13 | 14 | ### CSS Selectors Level 1 15 | 16 | | Feature | Status | Example | 17 | |---------|--------|---------| 18 | | Type selectors | ✅ | `h1`, `p`, `div` | 19 | | Class selectors | ✅ | `.warning`, `.note` | 20 | | ID selectors | ✅ | `#header`, `#footer` | 21 | | Descendant combinator | ✅ | `div p`, `ul li` | 22 | | `:link` pseudo-class | ✅ | `a:link` | 23 | | `:visited` pseudo-class | ✅ | `a:visited` | 24 | | `:active` pseudo-class | ✅ | `a:active` | 25 | | `::first-line` pseudo-element | ✅ | `p::first-line` | 26 | | `::first-letter` pseudo-element | ✅ | `p::first-letter` | 27 | | Multiple selectors (grouping) | ✅ | `h1, h2, h3` | 28 | 29 | ### CSS Selectors Level 2 30 | 31 | | Feature | Status | Example | 32 | |---------|--------|---------| 33 | | Universal selector | ✅ | `*` | 34 | | Attribute selectors | ✅ | `[title]`, `[class="example"]` | 35 | | Attribute operators | ✅ | `[class~="warning"]`, `[lang\\|="en"]` | 36 | | Child combinator | ✅ | `body > p` | 37 | | Adjacent sibling combinator | ✅ | `h1 + p` | 38 | | `:hover` pseudo-class | ✅ | `a:hover` | 39 | | `:focus` pseudo-class | ✅ | `input:focus` | 40 | | `:before` pseudo-element | ✅ | `p:before` (legacy syntax) | 41 | | `:after` pseudo-element | ✅ | `p:after` (legacy syntax) | 42 | | `:first-child` pseudo-class | ✅ | `li:first-child` | 43 | | `:lang()` pseudo-class | ✅ | `:lang(fr)` | 44 | | Multiple attribute selectors | ✅ | `input[type="text"][required]` | 45 | | Descendant combinator with universal | ✅ | `div *` | 46 | 47 | ### CSS Selectors Level 3 48 | 49 | | Feature | Status | Example | 50 | |---------|--------|---------| 51 | | Namespace selectors | ✅ | `svg\\|rect`, `*\\|*` | 52 | | Substring matching attribute selectors | ✅ | `[href^="https"]`, `[src$=".png"]`, `[title*="hello"]` | 53 | | General sibling combinator | ✅ | `h1 ~ p` | 54 | | `:root` pseudo-class | ✅ | `:root` | 55 | | `:nth-child()` pseudo-class | ✅ | `:nth-child(2n+1)` | 56 | | `:nth-last-child()` pseudo-class | ✅ | `:nth-last-child(2)` | 57 | | `:nth-of-type()` pseudo-class | ✅ | `p:nth-of-type(odd)` | 58 | | `:nth-last-of-type()` pseudo-class | ✅ | `div:nth-last-of-type(2n)` | 59 | | `:last-child` pseudo-class | ✅ | `li:last-child` | 60 | | `:first-of-type` pseudo-class | ✅ | `p:first-of-type` | 61 | | `:last-of-type` pseudo-class | ✅ | `h2:last-of-type` | 62 | | `:only-child` pseudo-class | ✅ | `p:only-child` | 63 | | `:only-of-type` pseudo-class | ✅ | `img:only-of-type` | 64 | | `:empty` pseudo-class | ✅ | `div:empty` | 65 | | `:target` pseudo-class | ✅ | `:target` | 66 | | `:enabled` pseudo-class | ✅ | `input:enabled` | 67 | | `:disabled` pseudo-class | ✅ | `input:disabled` | 68 | | `:checked` pseudo-class | ✅ | `input:checked` | 69 | | `:not()` pseudo-class | ✅ | `:not(.active)` | 70 | | `::before` pseudo-element | ✅ | `div::before` | 71 | | `::after` pseudo-element | ✅ | `div::after` | 72 | | `::first-line` pseudo-element | ✅ | `p::first-line` | 73 | | `::first-letter` pseudo-element | ✅ | `p::first-letter` | 74 | 75 | ### CSS Selectors Level 4 76 | 77 | | Feature | Status | Example | 78 | |---------|--------|---------| 79 | | Case-sensitivity flag | ✅ | `[attr=value i]`, `[attr=value s]` | 80 | | Column combinator | ✅ | `col \\|\\| td` | 81 | | `:is()` pseudo-class | ✅ | `:is(h1, h2, h3)` | 82 | | `:where()` pseudo-class | ✅ | `:where(article, section) p` | 83 | | `:has()` pseudo-class | ✅ | `:has(> img)` | 84 | | `:not()` with complex selectors | ✅ | `:not(div.active)` | 85 | | `:matches()` pseudo-class | ✅ | `:matches(h1, h2, h3)` | 86 | | `:focus-within` | ✅ | `:focus-within` | 87 | | `:focus-visible` | ✅ | `:focus-visible` | 88 | | `:any-link` | ✅ | `:any-link` | 89 | | `:read-write` pseudo-class | ✅ | `input:read-write` | 90 | | `:read-only` pseudo-class | ✅ | `input:read-only` | 91 | | `:placeholder-shown` pseudo-class | ✅ | `input:placeholder-shown` | 92 | | `:default` pseudo-class | ✅ | `option:default` | 93 | | `:valid` pseudo-class | ✅ | `input:valid` | 94 | | `:invalid` pseudo-class | ✅ | `input:invalid` | 95 | | `:in-range` pseudo-class | ✅ | `input:in-range` | 96 | | `:out-of-range` pseudo-class | ✅ | `input:out-of-range` | 97 | | `:required` pseudo-class | ✅ | `input:required` | 98 | | `:optional` pseudo-class | ✅ | `input:optional` | 99 | | `::placeholder` pseudo-element | ✅ | `input::placeholder` | 100 | | `::selection` pseudo-element | ✅ | `::selection` | 101 | | `::backdrop` pseudo-element | ✅ | `dialog::backdrop` | 102 | | `::marker` pseudo-element | ✅ | `li::marker` | 103 | | `::cue` pseudo-element | ✅ | `::cue` | 104 | | `::slotted()` pseudo-element | ✅ | `::slotted(span)` | 105 | | Vendor-specific pseudo-elements | ✅ | `::-webkit-input-placeholder` | 106 | | `:nth-child(An+B of S)` | ✅ | `:nth-child(2n of .important)` | 107 | | `:nth-col()` | ✅ | `:nth-col(2n+1)` | 108 | | `:nth-last-col()` | ✅ | `:nth-last-col(2n+1)` | 109 | | Attribute namespace wildcards | ❌ | `[*\\|attr=value]` | 110 | 111 | ## 📦 Installation 112 | 113 | Add `selector` to your list of dependencies in `mix.exs`: 114 | 115 | ```elixir 116 | def deps do 117 | [ 118 | {:selector, "~> 0.1.0"} 119 | ] 120 | end 121 | ``` 122 | 123 | ## 🚀 Usage 124 | 125 | ### 📝 Basic Parsing 126 | 127 | Parse CSS selectors into an AST: 128 | 129 | ```elixir 130 | # Simple tag selector 131 | Selector.parse("div") 132 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}], []}]}]} 133 | 134 | # ID selector 135 | Selector.parse("#header") 136 | # => {:selectors, [{:rules, [{:rule, [{:id, "header"}], []}]}]} 137 | 138 | # Class selector 139 | Selector.parse(".button") 140 | # => {:selectors, [{:rules, [{:rule, [{:class, "button"}], []}]}]} 141 | 142 | # Multiple selectors 143 | Selector.parse("div, .button") 144 | # => {:selectors, [ 145 | # {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 146 | # {:rules, [{:rule, [{:class, "button"}], []}]} 147 | # ]} 148 | ``` 149 | 150 | ### 🔧 Complex Selectors 151 | 152 | ```elixir 153 | # Combined selectors 154 | Selector.parse("div#main.container") 155 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}, {:id, "main"}, {:class, "container"}], []}]}]} 156 | 157 | # Attribute selectors 158 | Selector.parse("input[type='text']") 159 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "input", []}, {:attribute, {:equal, "type", "text", []}}], []}]}]} 160 | 161 | # Pseudo-classes 162 | Selector.parse("a:hover") 163 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "a", []}, {:pseudo_class, {"hover", []}}], []}]}]} 164 | 165 | # Pseudo-elements 166 | Selector.parse("p::first-line") 167 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "p", []}, {:pseudo_element, {"first-line", []}}], []}]}]} 168 | ``` 169 | 170 | ### 🏷️ Namespaces 171 | 172 | Namespaces are useful when working with XML documents or SVG elements within HTML: 173 | 174 | ```elixir 175 | # Element with namespace prefix 176 | Selector.parse("svg|rect") 177 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "rect", namespace: "svg"}], []}]}]} 178 | 179 | # Any namespace (wildcard) 180 | Selector.parse("*|circle") 181 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "circle", namespace: "*"}], []}]}]} 182 | 183 | # No namespace (elements without namespace) 184 | Selector.parse("|path") 185 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "path", namespace: ""}], []}]}]} 186 | 187 | # Default namespace with universal selector 188 | Selector.parse("*|*") 189 | # => {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]} 190 | 191 | # Namespace in attribute selectors 192 | Selector.parse("[xlink|href]") 193 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:exists, "href", nil, namespace: "xlink"}}], []}]}]} 194 | 195 | # Namespace with attribute value 196 | Selector.parse("[xml|lang='en']") 197 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "lang", "en", namespace: "xml"}}], []}]}]} 198 | 199 | # Complex example with SVG 200 | Selector.parse("svg|svg > svg|g svg|rect.highlight") 201 | # => {:selectors, [{:rules, [ 202 | # {:rule, [{:tag_name, "svg", namespace: "svg"}], []}, 203 | # {:rule, [{:tag_name, "g", namespace: "svg"}], combinator: ">"}, 204 | # {:rule, [{:tag_name, "rect", namespace: "svg"}, {:class, "highlight"}], []} 205 | # ]}]} 206 | 207 | # MathML namespace example 208 | Selector.parse("math|mrow > math|mi + math|mo") 209 | # => {:selectors, [{:rules, [ 210 | # {:rule, [{:tag_name, "mrow", namespace: "math"}], []}, 211 | # {:rule, [{:tag_name, "mi", namespace: "math"}], combinator: ">"}, 212 | # {:rule, [{:tag_name, "mo", namespace: "math"}], combinator: "+"} 213 | # ]}]} 214 | ``` 215 | 216 | ### 🔗 Combinators 217 | 218 | ```elixir 219 | # Descendant combinator (space) 220 | Selector.parse("article p") 221 | # => {:selectors, [{:rules, [ 222 | # {:rule, [{:tag_name, "article", []}], []}, 223 | # {:rule, [{:tag_name, "p", []}], []} 224 | # ]}]} 225 | 226 | # Child combinator (>) 227 | Selector.parse("ul > li") 228 | # => {:selectors, [{:rules, [ 229 | # {:rule, [{:tag_name, "ul", []}], []}, 230 | # {:rule, [{:tag_name, "li", []}], combinator: ">"} 231 | # ]}]} 232 | 233 | # Adjacent sibling combinator (+) 234 | Selector.parse("h1 + p") 235 | # => {:selectors, [{:rules, [ 236 | # {:rule, [{:tag_name, "h1", []}], []}, 237 | # {:rule, [{:tag_name, "p", []}], combinator: "+"} 238 | # ]}]} 239 | 240 | # General sibling combinator (~) 241 | Selector.parse("h1 ~ p") 242 | # => {:selectors, [{:rules, [ 243 | # {:rule, [{:tag_name, "h1", []}], []}, 244 | # {:rule, [{:tag_name, "p", []}], combinator: "~"} 245 | # ]}]} 246 | 247 | # Column combinator (||) - CSS Level 4 248 | Selector.parse("col || td") 249 | # => {:selectors, [{:rules, [ 250 | # {:rule, [{:tag_name, "col", []}], []}, 251 | # {:rule, [{:tag_name, "td", []}], combinator: "||"} 252 | # ]}]} 253 | ``` 254 | 255 | ### 🏷️ Attribute Selectors 256 | 257 | ```elixir 258 | # Existence 259 | Selector.parse("[disabled]") 260 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:exists, "disabled", nil, []}}], []}]}]} 261 | 262 | # Exact match 263 | Selector.parse("[type=submit]") 264 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "type", "submit", []}}], []}]}]} 265 | 266 | # Whitespace-separated list contains 267 | Selector.parse("[class~=primary]") 268 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:includes, "class", "primary", []}}], []}]}]} 269 | 270 | # Dash-separated list starts with 271 | Selector.parse("[lang|=en]") 272 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:dash_match, "lang", "en", []}}], []}]}]} 273 | 274 | # Starts with 275 | Selector.parse("[href^='https://']") 276 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:prefix, "href", "https://", []}}], []}]}]} 277 | 278 | # Ends with 279 | Selector.parse("[src$='.png']") 280 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:suffix, "src", ".png", []}}], []}]}]} 281 | 282 | # Contains substring 283 | Selector.parse("[title*='important']") 284 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:substring, "title", "important", []}}], []}]}]} 285 | 286 | # Case-insensitive matching (CSS Level 4) 287 | Selector.parse("[type=email i]") 288 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "type", "email", case_sensitive: false}}], []}]}]} 289 | 290 | # Case-sensitive matching (CSS Level 4) 291 | Selector.parse("[class=Button s]") 292 | # => {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "class", "Button", case_sensitive: true}}], []}]}]} 293 | ``` 294 | 295 | ### 🎭 Pseudo-classes 296 | 297 | ```elixir 298 | # Simple pseudo-classes 299 | Selector.parse(":hover") 300 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"hover", []}}], []}]}]} 301 | 302 | # Structural pseudo-classes 303 | Selector.parse(":first-child") 304 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"first-child", []}}], []}]}]} 305 | 306 | # :nth-child with various formulas 307 | Selector.parse(":nth-child(2n+1)") 308 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}]}]} 309 | 310 | Selector.parse(":nth-child(odd)") 311 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []}]}]} 312 | 313 | Selector.parse(":nth-child(even)") 314 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 0]]}}], []}]}]} 315 | 316 | Selector.parse(":nth-child(5)") 317 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"nth-child", [[a: 0, b: 5]]}}], []}]}]} 318 | 319 | # Language pseudo-class 320 | Selector.parse(":lang(en-US)") 321 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"lang", ["en-US"]}}], []}]}]} 322 | 323 | # Negation pseudo-class 324 | Selector.parse(":not(.disabled)") 325 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"not", [ 326 | # [{:rules, [{:rule, [{:class, "disabled"}], []}]}] 327 | # ]}}], []}]}]} 328 | 329 | # CSS Level 4 pseudo-classes 330 | Selector.parse(":is(h1, h2, h3)") 331 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"is", [ 332 | # [ 333 | # {:rules, [{:rule, [{:tag_name, "h1", []}], []}]}, 334 | # {:rules, [{:rule, [{:tag_name, "h2", []}], []}]}, 335 | # {:rules, [{:rule, [{:tag_name, "h3", []}], []}]} 336 | # ] 337 | # ]}}], []}]}]} 338 | 339 | Selector.parse(":where(article, section) > p") 340 | # => {:selectors, [{:rules, [ 341 | # {:rule, [{:pseudo_class, {"where", [ 342 | # [ 343 | # {:rules, [{:rule, [{:tag_name, "article", []}], []}]}, 344 | # {:rules, [{:rule, [{:tag_name, "section", []}], []}]} 345 | # ] 346 | # ]}}], []}, 347 | # {:rule, [{:tag_name, "p", []}], combinator: ">"} 348 | # ]}]} 349 | 350 | Selector.parse(":has(> img)") 351 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_class, {"has", [ 352 | # [{:rules, [{:rule, [{:tag_name, "img", []}], combinator: ">"}]}] 353 | # ]}}], []}]}]} 354 | ``` 355 | 356 | ### 🎨 Pseudo-elements 357 | 358 | ```elixir 359 | # Standard pseudo-elements 360 | Selector.parse("::before") 361 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"before", []}}], []}]}]} 362 | 363 | Selector.parse("::after") 364 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"after", []}}], []}]}]} 365 | 366 | Selector.parse("::first-line") 367 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"first-line", []}}], []}]}]} 368 | 369 | Selector.parse("::first-letter") 370 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"first-letter", []}}], []}]}]} 371 | 372 | # CSS Level 4 pseudo-elements 373 | Selector.parse("::placeholder") 374 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"placeholder", []}}], []}]}]} 375 | 376 | Selector.parse("::selection") 377 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"selection", []}}], []}]}]} 378 | 379 | # Pseudo-elements with parameters 380 | Selector.parse("::slotted(span)") 381 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"slotted", [[{:rules, [{:rule, [{:tag_name, "span", []}], []}]}]]}}], []}]}]} 382 | 383 | # Legacy single-colon syntax (still supported) 384 | Selector.parse(":before") 385 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"before", []}}], []}]}]} 386 | 387 | # Vendor-specific pseudo-elements 388 | Selector.parse("::-webkit-input-placeholder") 389 | # => {:selectors, [{:rules, [{:rule, [{:pseudo_element, {"-webkit-input-placeholder", []}}], []}]}]} 390 | ``` 391 | 392 | ### 💪 Advanced Examples 393 | 394 | ```elixir 395 | # Complex selector with multiple features 396 | Selector.parse("article.post:not(.draft) > h1 + p:first-of-type") 397 | # => [ 398 | # {:rule, [ 399 | # {:tag_name, "article", []}, 400 | # {:class, "post"}, 401 | # {:pseudo_class, {:not, [[{:rule, [{:class, "draft"}], []}]]}} 402 | # ], []}, 403 | # {:rule, [{:tag_name, "h1", []}], combinator: ">"}, 404 | # {:rule, [ 405 | # {:tag_name, "p", []}, 406 | # {:pseudo_class, {:first_of_type, []}} 407 | # ], combinator: "+"} 408 | # ] 409 | 410 | # Multiple attribute selectors 411 | Selector.parse("input[type='email'][required][placeholder^='Enter']") 412 | # => [{:rule, [ 413 | # {:tag_name, "input", []}, 414 | # {:attribute, {:equal, "type", "email", []}}, 415 | # {:attribute, {:exists, "required", nil, []}}, 416 | # {:attribute, {:prefix, "placeholder", "Enter", []}} 417 | # ], []}] 418 | 419 | # Nested pseudo-classes 420 | Selector.parse(":not(:first-child):not(:last-child)") 421 | # => [{:rule, [ 422 | # {:pseudo_class, {:not, [[{:rule, [{:pseudo_class, {:first_child, []}}], []}]]}}, 423 | # {:pseudo_class, {:not, [[{:rule, [{:pseudo_class, {:last_child, []}}], []}]]}} 424 | # ], []}] 425 | ``` 426 | 427 | ### 🔄 Rendering AST back to CSS 428 | 429 | ```elixir 430 | ast = Selector.parse("div#main > p.text") 431 | Selector.render(ast) 432 | # => "div#main > p.text" 433 | ``` 434 | 435 | ### ⚙️ Parser Options 436 | 437 | ```elixir 438 | # Strict mode (default: true) 439 | # Disables identifiers starting with double hyphens 440 | Selector.parse("#--custom-id", strict: false) 441 | # => {:selectors, [{:rules, [{:rule, [{:id, "--custom-id"}], []}]}]} 442 | ``` 443 | 444 | ## 🌳 AST Structure 445 | 446 | The parser generates an AST with the following structure: 447 | 448 | - The top-level structure is `{:selectors, [selector_groups]}` 449 | - Each selector group is `{:rules, [rules]}` 450 | - Each rule is `{:rule, selectors, options}` 451 | - Multiple selector groups (comma-separated) are returned as separate elements in the list 452 | - Combinators are stored in the options of the following rule 453 | 454 | ### 🎯 Selector Types 455 | 456 | - `{:tag_name, "div", []}` - Element selector 457 | - `{:tag_name, "div", namespace: "svg"}` - Namespaced element 458 | - `{:id, "header"}` - ID selector 459 | - `{:class, "button"}` - Class selector 460 | - `{:attribute, {operation, name, value, options}}` - Attribute selector 461 | - `{:pseudo_class, {name, arguments}}` - Pseudo-class 462 | - `{:pseudo_element, {name, arguments}}` - Pseudo-element 463 | 464 | ### 🔧 Attribute Operations 465 | 466 | - `:exists` - `[attr]` 467 | - `:equal` - `[attr=value]` 468 | - `:includes` - `[attr~=value]` 469 | - `:dash_match` - `[attr|=value]` 470 | - `:prefix` - `[attr^=value]` 471 | - `:suffix` - `[attr$=value]` 472 | - `:substring` - `[attr*=value]` 473 | 474 | ## ⚠️ Error Handling 475 | 476 | The parser raises `ArgumentError` for invalid selectors: 477 | 478 | ```elixir 479 | try do 480 | Selector.parse(".") 481 | rescue 482 | ArgumentError -> "Invalid selector" 483 | end 484 | # => "Invalid selector" 485 | ``` 486 | 487 | ## 📄 License 488 | 489 | MIT License - Copyright (c) 2024 DockYard, Inc. See [LICENSE.md](LICENSE.md) for details. -------------------------------------------------------------------------------- /lib/selector/parser/guards.ex: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.Guards do 2 | @moduledoc """ 3 | Provides defguards for validating Unicode code points according to CSS Selector 4 | specification rules for different parts of a CSS selector. 5 | 6 | Based on CSS Syntax Module Level 3 and CSS Selectors Level 4 specifications. 7 | Enhanced with full UTF-8/Unicode support. 8 | """ 9 | 10 | #-------------------------------------------------------------------------------- 11 | # Region: Module Attributes (Character Sets and Forbidden Codepoints) 12 | #-------------------------------------------------------------------------------- 13 | 14 | @whitespace_chars [ 15 | 0x0009, # Tab 16 | 0x000A, # Line Feed 17 | 0x000C, # Form Feed 18 | 0x000D, # Carriage Return 19 | 0x0020 # Space 20 | ] 21 | 22 | @combinator_chars [ 23 | 0x003E, # > (child combinator) 24 | 0x002B, # + (adjacent sibling combinator) 25 | 0x007E # ~ (general sibling combinator) 26 | ] 27 | 28 | @delimiter_chars [ 29 | 0x0023, # # (hash/ID selector) 30 | 0x002E, # . (class selector) 31 | 0x003A, # : (pseudo-class/element) 32 | 0x005B, # [ (attribute selector start) 33 | 0x005D, # ] (attribute selector end) 34 | 0x0028, # ( (function start) 35 | 0x0029, # ) (function end) 36 | 0x002C, # , (selector list separator) 37 | 0x0022, # " (string delimiter) 38 | 0x0027, # ' (string delimiter) 39 | 0x005C # \ (escape character) 40 | ] 41 | 42 | @attribute_operators [ 43 | # Single character operators 44 | 0x003D, # = (exact match) 45 | 0x007E, # ~ (for ~=, word match) 46 | 0x007C, # | (for |=, language match) 47 | 0x005E, # ^ (for ^=, prefix match) 48 | 0x0024, # $ (for $=, suffix match) 49 | 0x002A # * (for *=, substring match) 50 | ] 51 | 52 | #-------------------------------------------------------------------------------- 53 | # Region: Private Helper Guards 54 | #-------------------------------------------------------------------------------- 55 | 56 | defguard is_utf8_letter(codepoint) when 57 | is_integer(codepoint) and 58 | ( 59 | # Basic Latin letters 60 | (codepoint >= ?a and codepoint <= ?z) or 61 | (codepoint >= ?A and codepoint <= ?Z) or 62 | # Latin-1 Supplement letters 63 | (codepoint >= 0x00C0 and codepoint <= 0x00D6) or 64 | (codepoint >= 0x00D8 and codepoint <= 0x00F6) or 65 | (codepoint >= 0x00F8 and codepoint <= 0x00FF) or 66 | # Latin Extended-A 67 | (codepoint >= 0x0100 and codepoint <= 0x017F) or 68 | # Latin Extended-B 69 | (codepoint >= 0x0180 and codepoint <= 0x024F) or 70 | # Greek and Coptic 71 | (codepoint >= 0x0370 and codepoint <= 0x03FF) or 72 | # Cyrillic 73 | (codepoint >= 0x0400 and codepoint <= 0x04FF) or 74 | # Hebrew 75 | (codepoint >= 0x0590 and codepoint <= 0x05FF) or 76 | # Arabic 77 | (codepoint >= 0x0600 and codepoint <= 0x06FF) or 78 | # CJK Unified Ideographs (Common range) 79 | (codepoint >= 0x4E00 and codepoint <= 0x9FFF) or 80 | # Hiragana 81 | (codepoint >= 0x3040 and codepoint <= 0x309F) or 82 | # Katakana 83 | (codepoint >= 0x30A0 and codepoint <= 0x30FF) or 84 | # Other common letter ranges 85 | (codepoint >= 0x1E00 and codepoint <= 0x1EFF) or # Latin Extended Additional 86 | (codepoint >= 0x2C60 and codepoint <= 0x2C7F) or # Latin Extended-C 87 | (codepoint >= 0xA720 and codepoint <= 0xA7FF) or # Latin Extended-D 88 | # Hangul Syllables 89 | (codepoint >= 0xAC00 and codepoint <= 0xD7AF) or 90 | # Additional Unicode letter blocks (basic coverage) 91 | (codepoint >= 0x0100 and codepoint <= 0x017F) or # Latin Extended-A 92 | (codepoint >= 0x1F00 and codepoint <= 0x1FFF) # Greek Extended 93 | ) 94 | 95 | defguard is_utf8_digit(codepoint) when 96 | is_integer(codepoint) and 97 | ( 98 | # ASCII digits 99 | (codepoint >= ?0 and codepoint <= ?9) or 100 | # Arabic-Indic digits 101 | (codepoint >= 0x0660 and codepoint <= 0x0669) or 102 | # Extended Arabic-Indic digits 103 | (codepoint >= 0x06F0 and codepoint <= 0x06F9) or 104 | # Devanagari digits 105 | (codepoint >= 0x0966 and codepoint <= 0x096F) or 106 | # Bengali digits 107 | (codepoint >= 0x09E6 and codepoint <= 0x09EF) or 108 | # Fullwidth digits 109 | (codepoint >= 0xFF10 and codepoint <= 0xFF19) 110 | ) 111 | 112 | defguardp is_utf8_hex_digit(codepoint) when 113 | is_integer(codepoint) and 114 | ( 115 | (codepoint >= ?0 and codepoint <= ?9) or 116 | (codepoint >= ?a and codepoint <= ?f) or 117 | (codepoint >= ?A and codepoint <= ?F) 118 | # CSS spec only accepts ASCII hex digits in escape sequences 119 | ) 120 | 121 | defguardp is_non_ascii(codepoint) when 122 | is_integer(codepoint) and codepoint >= 0x0080 123 | 124 | defguardp is_surrogate_codepoint(codepoint) when 125 | is_integer(codepoint) and 126 | (codepoint >= 0xD800 and codepoint <= 0xDFFF) 127 | 128 | defguardp is_newline(codepoint) when 129 | is_integer(codepoint) and 130 | ( 131 | codepoint == 0x000A or # Line Feed 132 | codepoint == 0x000C or # Form Feed 133 | codepoint == 0x000D or # Carriage Return 134 | codepoint == 0x0085 or # Next Line (NEL) 135 | codepoint == 0x2028 or # Line Separator 136 | codepoint == 0x2029 # Paragraph Separator 137 | ) 138 | 139 | defguardp is_unicode_whitespace(codepoint) when 140 | is_integer(codepoint) and 141 | ( 142 | codepoint in @whitespace_chars or 143 | codepoint == 0x0085 or # Next Line (NEL) 144 | codepoint == 0x00A0 or # Non-breaking space 145 | codepoint == 0x1680 or # Ogham space mark 146 | (codepoint >= 0x2000 and codepoint <= 0x200A) or # Various spaces 147 | codepoint == 0x2028 or # Line separator 148 | codepoint == 0x2029 or # Paragraph separator 149 | codepoint == 0x202F or # Narrow no-break space 150 | codepoint == 0x205F or # Medium mathematical space 151 | codepoint == 0x3000 # Ideographic space 152 | ) 153 | 154 | #-------------------------------------------------------------------------------- 155 | # Region: Public Guards for CSS Selector Components 156 | #-------------------------------------------------------------------------------- 157 | 158 | @doc """ 159 | Guard: Checks if a codepoint is CSS whitespace. 160 | CSS whitespace includes: tab, line feed, form feed, carriage return, and space. 161 | Note: This follows CSS specification which only recognizes ASCII whitespace. 162 | """ 163 | defguard is_whitespace(codepoint) when 164 | is_integer(codepoint) and codepoint in @whitespace_chars 165 | 166 | @doc """ 167 | Guard: Checks if a codepoint is Unicode whitespace (broader than CSS whitespace). 168 | Includes various Unicode whitespace characters beyond CSS specification. 169 | """ 170 | defguard is_unicode_whitespace_char(codepoint) when 171 | is_unicode_whitespace(codepoint) 172 | 173 | @doc """ 174 | Guard: Checks if a codepoint can start a CSS identifier. 175 | Valid start characters: UTF-8 letters, underscore, non-ASCII, or escaped characters. 176 | """ 177 | defguard is_identifier_start_char(codepoint) when 178 | is_integer(codepoint) and 179 | ( 180 | (is_utf8_letter(codepoint) or 181 | codepoint == ?_ or # underscore 182 | is_non_ascii(codepoint)) and 183 | not is_utf8_digit(codepoint) # explicitly exclude digits 184 | ) 185 | 186 | @doc """ 187 | Guard: Checks if a codepoint can continue a CSS identifier. 188 | Valid continuation characters: identifier start chars, UTF-8 digits, or hyphens. 189 | """ 190 | defguard is_identifier_char(codepoint) when 191 | is_integer(codepoint) and 192 | ( 193 | is_identifier_start_char(codepoint) or 194 | is_utf8_digit(codepoint) or 195 | codepoint == ?- # hyphen 196 | ) 197 | 198 | @doc """ 199 | Guard: Checks if a codepoint is valid inside a CSS string (excluding delimiters). 200 | Excludes the quote character, newlines, and unescaped backslashes. 201 | """ 202 | defguard is_string_char(codepoint) when 203 | is_integer(codepoint) and 204 | not ( 205 | codepoint == 0x0022 or # double quote 206 | codepoint == 0x0027 or # single quote 207 | codepoint == 0x005C or # backslash 208 | is_newline(codepoint) 209 | ) 210 | 211 | @doc """ 212 | Guard: Checks if a codepoint is a CSS combinator character. 213 | This includes single-character combinators: >, +, ~ 214 | Note: Whitespace (descendant combinator) is handled by is_whitespace/1 215 | Note: Column combinator || is two characters and must be handled at parser level 216 | """ 217 | defguard is_combinator_char(codepoint) when 218 | is_integer(codepoint) and codepoint in @combinator_chars 219 | 220 | @doc """ 221 | Guard: Checks if a codepoint is a CSS combinator character. 222 | Alias for is_combinator_char/1 for backward compatibility. 223 | """ 224 | defguard is_combinator(codepoint) when 225 | is_combinator_char(codepoint) 226 | 227 | @doc """ 228 | Guard: Checks if a codepoint is a CSS delimiter character. 229 | """ 230 | defguard is_delimiter(codepoint) when 231 | is_integer(codepoint) and codepoint in @delimiter_chars 232 | 233 | @doc """ 234 | Guard: Checks if a codepoint is part of a CSS attribute operator. 235 | """ 236 | defguard is_attribute_operator_char(codepoint) when 237 | is_integer(codepoint) and codepoint in @attribute_operators 238 | 239 | @doc """ 240 | Guard: Checks if a codepoint is a valid hexadecimal digit for CSS escape sequences. 241 | Only ASCII hex digits (0-9, a-f, A-F) are valid in CSS escape sequences. 242 | """ 243 | defguard is_hex_digit(codepoint) when 244 | is_utf8_hex_digit(codepoint) 245 | 246 | @doc """ 247 | Guard: Checks if a codepoint can be escaped in CSS. 248 | Any character except newlines can be escaped in CSS. 249 | """ 250 | defguard is_escapable_char(codepoint) when 251 | is_integer(codepoint) and 252 | not is_newline(codepoint) 253 | 254 | @doc """ 255 | Guard: Checks if a codepoint is valid for CSS ID selector content (after #). 256 | Must be a valid identifier character with UTF-8 support. 257 | """ 258 | defguard is_id_char(codepoint) when 259 | is_identifier_char(codepoint) 260 | 261 | @doc """ 262 | Guard: Checks if a codepoint can start a CSS ID selector content (after #). 263 | Must be a valid identifier start character with UTF-8 support. 264 | """ 265 | defguard is_id_start_char(codepoint) when 266 | is_identifier_start_char(codepoint) 267 | 268 | @doc """ 269 | Guard: Checks if a codepoint is valid for CSS class selector content (after .). 270 | Must be a valid identifier character with UTF-8 support. 271 | """ 272 | defguard is_class_char(codepoint) when 273 | is_identifier_char(codepoint) 274 | 275 | @doc """ 276 | Guard: Checks if a codepoint can start a CSS class selector content (after .). 277 | Must be a valid identifier start character with UTF-8 support. 278 | """ 279 | defguard is_class_start_char(codepoint) when 280 | is_identifier_start_char(codepoint) 281 | 282 | @doc """ 283 | Guard: Checks if a codepoint is valid for CSS element/type selector names. 284 | Must be a valid identifier character with UTF-8 support. 285 | """ 286 | defguard is_tag_name_char(codepoint) when 287 | is_identifier_char(codepoint) 288 | 289 | @doc """ 290 | Guard: Checks if a codepoint can start a CSS element/type selector name. 291 | Must be a valid identifier start character with UTF-8 support. 292 | """ 293 | defguard is_tag_name_start_char(codepoint) when 294 | is_identifier_start_char(codepoint) 295 | 296 | @doc """ 297 | Guard: Checks if a codepoint is valid for CSS attribute names. 298 | Must be a valid identifier character with UTF-8 support. 299 | """ 300 | defguard is_attribute_name_char(codepoint) when 301 | is_identifier_char(codepoint) 302 | 303 | @doc """ 304 | Guard: Checks if a codepoint can start a CSS attribute name. 305 | Must be a valid identifier start character with UTF-8 support. 306 | """ 307 | defguard is_attribute_name_start_char(codepoint) when 308 | is_identifier_start_char(codepoint) 309 | 310 | @doc """ 311 | Guard: Checks if a codepoint is valid for CSS pseudo-class/element names. 312 | Must be a valid identifier character with UTF-8 support. 313 | """ 314 | defguard is_pseudo_name_char(codepoint) when 315 | is_identifier_char(codepoint) 316 | 317 | @doc """ 318 | Guard: Checks if a codepoint can start a CSS pseudo-class/element name. 319 | Must be a valid identifier start character with UTF-8 support. 320 | """ 321 | defguard is_pseudo_name_start_char(codepoint) when 322 | is_identifier_start_char(codepoint) 323 | 324 | @doc """ 325 | Guard: Checks if a codepoint is valid for CSS function names. 326 | Must be a valid identifier character with UTF-8 support. 327 | """ 328 | defguard is_function_name_char(codepoint) when 329 | is_identifier_char(codepoint) 330 | 331 | @doc """ 332 | Guard: Checks if a codepoint can start a CSS function name. 333 | Must be a valid identifier start character with UTF-8 support. 334 | """ 335 | defguard is_function_name_start_char(codepoint) when 336 | is_identifier_start_char(codepoint) 337 | 338 | @doc """ 339 | Guard: Checks if a codepoint is a valid CSS number character. 340 | Includes UTF-8 digits, decimal point, plus, minus, and e/E for scientific notation. 341 | """ 342 | defguard is_number_char(codepoint) when 343 | is_integer(codepoint) and 344 | ( 345 | is_utf8_digit(codepoint) or 346 | codepoint == ?. or # decimal point 347 | codepoint == ?+ or # plus sign 348 | codepoint == ?- or # minus sign 349 | codepoint == ?e or # scientific notation 350 | codepoint == ?E # scientific notation 351 | ) 352 | 353 | @doc """ 354 | Guard: Checks if a codepoint can start a CSS number. 355 | Can start with UTF-8 digit, decimal point, plus, or minus. 356 | """ 357 | defguard is_number_start_char(codepoint) when 358 | is_integer(codepoint) and 359 | ( 360 | is_utf8_digit(codepoint) or 361 | codepoint == ?. or # decimal point 362 | codepoint == ?+ or # plus sign 363 | codepoint == ?- # minus sign 364 | ) 365 | 366 | @doc """ 367 | Guard: Checks if a codepoint is valid within CSS comment content. 368 | Note: This guard checks individual characters. The parser must handle 369 | the */ sequence detection at a higher level. 370 | All characters are valid in comments except when * and / appear together as */. 371 | """ 372 | defguard is_comment_char(codepoint) when 373 | is_integer(codepoint) # All characters are valid individually 374 | 375 | @doc """ 376 | Guard: Checks if a codepoint is valid for CSS attribute values. 377 | Attribute values can contain any character except: 378 | - The delimiter being used (quote or apostrophe) 379 | - Newlines (unless escaped) 380 | - Unescaped backslashes 381 | This guard assumes unquoted values and allows most characters. 382 | """ 383 | defguard is_attribute_value_char(codepoint) when 384 | is_integer(codepoint) and 385 | not ( 386 | codepoint == 0x005D or # ] (attribute selector end) 387 | codepoint == 0x0022 or # " (double quote) 388 | codepoint == 0x0027 or # ' (single quote) 389 | codepoint == 0x005C or # \ (backslash - needs escaping) 390 | is_newline(codepoint) or # Newlines 391 | is_whitespace(codepoint) # Whitespace (for unquoted values) 392 | ) 393 | 394 | @doc """ 395 | Guard: Checks if a codepoint can start any valid CSS selector. 396 | This includes: element names, class selectors, ID selectors, attribute selectors, 397 | pseudo-class/element selectors, universal selector, and whitespace. 398 | Enhanced with UTF-8 support. 399 | """ 400 | defguard is_selector_start_char(codepoint) when 401 | is_integer(codepoint) and 402 | ( 403 | is_tag_name_start_char(codepoint) or # Element/type selectors (div, span, etc.) 404 | codepoint == ?| or 405 | codepoint == ?. or # Class selector start 406 | codepoint == ?# or # ID selector start 407 | codepoint == ?[ or # Attribute selector start 408 | codepoint == ?: or # Pseudo-class/element start 409 | codepoint == ?* or # Universal selector 410 | codepoint == ?\\ or # Escape character (for escaped characters like \*) 411 | is_whitespace(codepoint) # Whitespace before selector 412 | ) 413 | 414 | #-------------------------------------------------------------------------------- 415 | # Region: Utility Guards for UTF-8 Character Classification 416 | #-------------------------------------------------------------------------------- 417 | 418 | @doc """ 419 | Guard: Checks if a codepoint is a UTF-8 letter. 420 | Covers major Unicode letter blocks including Latin, Greek, Cyrillic, Arabic, Hebrew, CJK, etc. 421 | """ 422 | defguard is_utf8_letter_char(codepoint) when 423 | is_utf8_letter(codepoint) 424 | 425 | @doc """ 426 | Guard: Checks if a codepoint is a UTF-8 digit. 427 | Includes ASCII digits and various Unicode digit systems. 428 | """ 429 | defguard is_utf8_digit_char(codepoint) when 430 | is_utf8_digit(codepoint) 431 | 432 | @doc """ 433 | Guard: Checks if a codepoint is valid as the first character of a pseudo-class or pseudo-element name. 434 | 435 | Pseudo-class and pseudo-element names follow the same rules as CSS identifiers, 436 | with one exception: they can also start with a hyphen for vendor prefixes. 437 | 438 | Valid starting characters are: 439 | - Any letter (a-z, A-Z) 440 | - Underscore (_) 441 | - Hyphen (-) for vendor-specific pseudo-classes like -webkit-scrollbar 442 | - Any non-ASCII character (Unicode >= 0x80) 443 | """ 444 | defguard is_pseudo_start_char(codepoint) when 445 | is_identifier_start_char(codepoint) or 446 | codepoint == ?- # Allow hyphen for vendor prefixes 447 | 448 | @doc """ 449 | Guard: Checks if a codepoint is valid within a pseudo-class or pseudo-element name. 450 | 451 | Pseudo-class and pseudo-element names follow the same rules as CSS identifiers. 452 | Valid characters include: 453 | - Any letter (a-z, A-Z) 454 | - Digits (0-9) 455 | - Underscore (_) 456 | - Hyphen (-) 457 | - Any non-ASCII character (Unicode >= 0x80) 458 | 459 | Note: Parentheses, whitespace, and other special characters are NOT part of the 460 | pseudo-class name itself. They are handled separately as part of functional notation. 461 | """ 462 | defguard is_pseudo_char(codepoint) when 463 | is_identifier_char(codepoint) 464 | 465 | @doc """ 466 | Guard: Checks if a codepoint is valid for a language tag character. 467 | Language tags (BCP 47) can contain: 468 | - ASCII letters (a-z, A-Z) 469 | - ASCII digits (0-9) 470 | - Hyphen (-) as separator 471 | Used for :lang() pseudo-class values like 'en', 'en-US', 'zh-Hans-CN' 472 | """ 473 | defguard is_lang_char(codepoint) when 474 | is_integer(codepoint) and 475 | ( 476 | (codepoint >= ?a and codepoint <= ?z) or 477 | (codepoint >= ?A and codepoint <= ?Z) or 478 | (codepoint >= ?0 and codepoint <= ?9) or 479 | codepoint == ?- # hyphen separator 480 | ) 481 | 482 | @doc """ 483 | Guard: Checks if a codepoint can start a language tag. 484 | Language tags must start with a letter (not digit or hyphen). 485 | """ 486 | defguard is_lang_start_char(codepoint) when 487 | is_integer(codepoint) and 488 | ( 489 | (codepoint >= ?a and codepoint <= ?z) or 490 | (codepoint >= ?A and codepoint <= ?Z) 491 | ) 492 | 493 | @doc """ 494 | Guard: Checks if a codepoint is a valid UTF-8 character (not a surrogate). 495 | Excludes surrogate pair codepoints which are invalid in UTF-8. 496 | """ 497 | defguard is_valid_utf8_codepoint(codepoint) when 498 | is_integer(codepoint) and 499 | codepoint >= 0 and 500 | codepoint <= 0x10FFFF and 501 | not is_surrogate_codepoint(codepoint) 502 | 503 | @doc """ 504 | Guard: Checks if a codepoint is valid within an nth-formula. 505 | Nth-formulas are used in pseudo-classes like :nth-child(), :nth-of-type(), etc. 506 | Valid characters include: digits (0-9), letters (n,o,d,e,v), operators (+,-), and CSS whitespace. 507 | Examples: '2n+1', 'odd', 'even', '3n-2', '-n+5' 508 | """ 509 | defguard is_nth_formula_char(codepoint) when 510 | is_integer(codepoint) and 511 | ( 512 | (codepoint >= ?0 and codepoint <= ?9) or # ASCII digits 513 | codepoint == ?n or codepoint == ?N or # Variable n (case-insensitive) 514 | codepoint == ?o or codepoint == ?O or # For "odd" keyword 515 | codepoint == ?d or codepoint == ?D or # For "odd" keyword 516 | codepoint == ?e or codepoint == ?E or # For "even" keyword 517 | codepoint == ?v or codepoint == ?V or # For "even" keyword 518 | codepoint == ?+ or # Plus operator/sign 519 | codepoint == ?- or # Minus operator/sign 520 | is_whitespace(codepoint) # CSS whitespace 521 | ) 522 | 523 | @doc """ 524 | Guard: Checks if a codepoint can start an nth-formula. 525 | Nth-formulas can start with: digits, signs (+/-), the variable n, keyword letters (o,e), or whitespace. 526 | Examples starting chars: '2' (2n+1), '+' (+n), '-' (-n+3), 'n' (n+1), 'o' (odd), 'e' (even) 527 | """ 528 | defguard is_nth_formula_starting_char(codepoint) when 529 | is_integer(codepoint) and 530 | ( 531 | (codepoint >= ?0 and codepoint <= ?9) or # ASCII digits (for integers/coefficients) 532 | codepoint == ?+ or # Plus sign (explicit positive) 533 | codepoint == ?- or # Minus sign (negative values) 534 | codepoint == ?n or codepoint == ?N or # Variable n (for "n+1", "n", etc.) 535 | codepoint == ?o or codepoint == ?O or # "odd" keyword 536 | codepoint == ?e or codepoint == ?E or # "even" keyword 537 | is_whitespace(codepoint) # Leading CSS whitespace allowed 538 | ) 539 | 540 | @doc """ 541 | Guard: Checks if a codepoint is any valid character that can appear in a CSS selector. 542 | This includes all characters that can appear in any part of a selector: 543 | - Identifier characters (letters, digits, underscore, hyphen, non-ASCII) 544 | - Delimiter characters (#, ., :, [, ], (, ), etc.) 545 | - Combinator characters (>, +, ~) 546 | - Whitespace characters 547 | - Attribute operators (=, ~, |, ^, $, *) 548 | - Quote characters (", ') 549 | - Escape character (\) 550 | - Universal selector (*) 551 | - Comma (selector separator) 552 | - Pipe (namespace separator) 553 | """ 554 | defguard is_selector_char(codepoint) when 555 | is_integer(codepoint) and 556 | ( 557 | # Identifier characters (covers element names, classes, IDs, attributes, pseudo-classes) 558 | is_identifier_char(codepoint) or 559 | 560 | # Delimiter characters 561 | codepoint in @delimiter_chars or 562 | 563 | # Combinator characters 564 | codepoint in @combinator_chars or 565 | 566 | # Whitespace characters 567 | codepoint in @whitespace_chars or 568 | 569 | # Attribute operators 570 | codepoint in @attribute_operators or 571 | 572 | # Special selector characters 573 | codepoint == ?| or # Namespace separator (also in column combinator ||) 574 | codepoint == ?* or # Universal selector 575 | codepoint == ?, or # Selector list separator 576 | codepoint == ?! or # For :not() and other negations 577 | codepoint == ?n or # For nth-child formulas (already covered by identifier_char) 578 | codepoint == ?+ or # For nth-child formulas and adjacent sibling 579 | 580 | # Characters that can appear in strings and attribute values 581 | is_utf8_letter(codepoint) or 582 | is_utf8_digit(codepoint) or 583 | is_non_ascii(codepoint) or 584 | 585 | # Common punctuation that might appear in attribute values or strings 586 | codepoint == ?/ or # URLs, paths 587 | codepoint == ?. or # Decimal points, URLs 588 | codepoint == ?? or # Query strings 589 | codepoint == ?& or # URLs 590 | codepoint == ?% or # Encoded characters 591 | codepoint == ?@ or # Emails, at-rules context 592 | codepoint == ?; or # Might appear in data attributes 593 | codepoint == ?{ or # Might appear in data attributes 594 | codepoint == ?} or # Might appear in data attributes 595 | codepoint == ?< or # Might appear in data attributes 596 | codepoint == ?> or # Also a combinator 597 | codepoint == ?` or # Template literals in data attributes 598 | codepoint == ?~ or # Also general sibling combinator 599 | 600 | # Escape sequences and special characters 601 | codepoint == ?\\ or # Escape character 602 | 603 | # Any other valid UTF-8 character that's not a control character 604 | (codepoint >= 0x0021 and codepoint <= 0x007E) or # Printable ASCII 605 | (codepoint >= 0x00A0 and is_valid_utf8_codepoint(codepoint)) # Non-ASCII Unicode 606 | ) 607 | 608 | end 609 | -------------------------------------------------------------------------------- /test/selector/parser_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Selector.ParserTest do 2 | @moduledoc """ 3 | Test suite for CSS selector parser. 4 | 5 | This parser aims to support: 6 | - CSS Selectors Level 3 (complete support) 7 | - CSS Selectors Level 4 (partial support for stable features) 8 | 9 | Notable CSS Level 4 features supported: 10 | - :is(), :where(), :has() pseudo-classes 11 | - :not() with complex selectors 12 | - Case sensitivity modifiers (i, s) 13 | - Column combinator (||) 14 | - :focus-within, :focus-visible pseudo-classes 15 | 16 | Features explicitly not supported: 17 | - :nth-child(An+B of selector) syntax 18 | - :nth-col(), :nth-last-col() pseudo-classes 19 | - Attribute selectors with namespace wildcards 20 | """ 21 | use ExUnit.Case, async: true 22 | 23 | describe "Identifiers" do 24 | test "should parse a regular valid identifier" do 25 | assert Selector.parse("#id") == {:selectors, [{:rules, [{:rule, [{:id, "id"}], []}]}]} 26 | end 27 | 28 | test "should parse an identifier starting with a hyphen" do 29 | assert Selector.parse("#-id") == {:selectors, [{:rules, [{:rule, [{:id, "-id"}], []}]}]} 30 | end 31 | 32 | test "should parse an identifier with hex-encoded characters" do 33 | ast_selector = {:selectors, [{:rules, [{:rule, [{:id, "hello\nworld"}], []}]}]} 34 | 35 | assert Selector.parse("#hello\\aworld") == ast_selector 36 | assert Selector.parse("#hello\\a world") == ast_selector 37 | assert Selector.parse("#hello\\a\tworld") == ast_selector 38 | assert Selector.parse("#hello\\a\fworld") == ast_selector 39 | assert Selector.parse("#hello\\a\nworld") == ast_selector 40 | assert Selector.parse("#hello\\a\rworld") == ast_selector 41 | assert Selector.parse("#hello\\a\r\nworld") == ast_selector 42 | assert Selector.parse("#hello\\00000aworld") == ast_selector 43 | end 44 | 45 | test "should fail on an identifier starting with multiple hyphens" do 46 | assert_raise ArgumentError, "Identifiers cannot start with two hyphens with strict mode on.", fn -> 47 | Selector.parse("#--id") 48 | end 49 | end 50 | 51 | test "should fail on an identifier consisting of a single hyphen" do 52 | assert_raise ArgumentError, "Identifiers cannot consist of a single hyphen.", fn -> 53 | Selector.parse("#-") 54 | end 55 | end 56 | 57 | test "should parse an identifier starting with multiple hyphens in case of strict: false" do 58 | assert Selector.parse("#--id", strict: false) == {:selectors, [{:rules, [{:rule, [{:id, "--id"}], []}]}]} 59 | end 60 | 61 | test "should fail on an identifier starting with a hyphen and followed with a digit" do 62 | assert_raise ArgumentError, "Identifiers cannot start with hyphens followed by digits.", fn -> 63 | Selector.parse("#-1") 64 | end 65 | 66 | assert_raise ArgumentError, "Identifiers cannot start with hyphens followed by digits.", fn -> 67 | Selector.parse("#--1", strict: false) 68 | end 69 | end 70 | 71 | test "should parse an identifier consisting unicode characters" do 72 | assert Selector.parse("#ÈÈ") == {:selectors, [{:rules, [{:rule, [{:id, "ÈÈ"}], []}]}]} 73 | end 74 | end 75 | 76 | describe "Tag Names" do 77 | test "should parse a tag name" do 78 | assert Selector.parse("div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", []}], []}]}]} 79 | end 80 | 81 | test "should parse a wildcard tag name" do 82 | assert Selector.parse("*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", []}], []}]}]} 83 | end 84 | 85 | test "should parse an escaped star" do 86 | assert Selector.parse("\\*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", []}], []}]}]} 87 | end 88 | 89 | test "should properly parse an escaped tag name" do 90 | assert Selector.parse("d\\ i\\ v") == {:selectors, [{:rules, [{:rule, [{:tag_name, "d i v", []}], []}]}]} 91 | end 92 | 93 | @tag :skip 94 | test "should not be parsed after an attribute" do 95 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 96 | Selector.parse(~s([href="#"]a)) 97 | end 98 | end 99 | 100 | @tag :skip 101 | test "should not be parsed after a pseudo-class" do 102 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 103 | Selector.parse(":nth-child(2n)a") 104 | end 105 | end 106 | 107 | @tag :skip 108 | test "should not be parsed after a pseudo-element" do 109 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 110 | Selector.parse(":unknown(hello)a") 111 | end 112 | end 113 | end 114 | 115 | describe "Namespaces" do 116 | test "should parse a namespace name" do 117 | assert Selector.parse("ns|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", namespace: "ns"}], []}]}]} 118 | end 119 | 120 | test "should parse no namespace" do 121 | assert Selector.parse("|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", namespace: ""}], []}]}]} 122 | end 123 | 124 | test "should parse wildcard namespace" do 125 | assert Selector.parse("*|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "div", namespace: "*"}], []}]}]} 126 | end 127 | 128 | test "should parse a wildcard namespace with a wildcard tag name" do 129 | assert Selector.parse("*|*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]} 130 | end 131 | 132 | test "should parse an escaped star" do 133 | assert Selector.parse("\\*|*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]} 134 | end 135 | 136 | test "should parse an escaped pipe" do 137 | assert Selector.parse("\\|div") == {:selectors, [{:rules, [{:rule, [{:tag_name, "|div", []}], []}]}]} 138 | end 139 | 140 | test "should parse two escaped stars" do 141 | assert Selector.parse("\\*|\\*") == {:selectors, [{:rules, [{:rule, [{:tag_name, "*", namespace: "*"}], []}]}]} 142 | end 143 | 144 | test "should properly parse an escaped namespace name" do 145 | assert Selector.parse("n\\ a\\ m|d\\ i\\ v") == {:selectors, [{:rules, [{:rule, [{:tag_name, "d i v", namespace: "n a m"}], []}]}]} 146 | end 147 | 148 | @tag :skip 149 | test "should not be parsed after an attribute" do 150 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 151 | Selector.parse(~s([href="#"]a|b)) 152 | end 153 | 154 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 155 | Selector.parse(~s([href="#"]|b)) 156 | end 157 | end 158 | 159 | @tag :skip 160 | test "should not accept a single hyphen" do 161 | assert_raise ArgumentError, fn -> 162 | Selector.parse("a - b") 163 | end 164 | end 165 | 166 | @tag :skip 167 | test "should not be parsed after a pseudo-class" do 168 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 169 | Selector.parse(":nth-child(2n)a|b") 170 | end 171 | 172 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 173 | Selector.parse(":nth-child(2n)|b") 174 | end 175 | end 176 | 177 | @tag :skip 178 | test "should not be parsed after a pseudo-element" do 179 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 180 | Selector.parse(":unknown(hello)a|b") 181 | end 182 | 183 | assert_raise ArgumentError, "Unexpected tag/namespace start.", fn -> 184 | Selector.parse(":unknown(hello)|b") 185 | end 186 | end 187 | end 188 | 189 | describe "Class Names" do 190 | test "should parse a single class name" do 191 | assert Selector.parse(".class") == {:selectors, [{:rules, [{:rule, [{:class, "class"}], []}]}]} 192 | end 193 | 194 | test "should parse multiple class names" do 195 | assert Selector.parse(".class1.class2") == {:selectors, [{:rules, [ 196 | {:rule, [{:class, "class1"}, {:class, "class2"}], []} 197 | ]}]} 198 | end 199 | 200 | test "should properly parse class names" do 201 | assert Selector.parse(".cla\\ ss\\.name") == {:selectors, [{:rules, [{:rule, [{:class, "cla ss.name"}], []}]}]} 202 | end 203 | 204 | test "should parse after tag names" do 205 | assert Selector.parse("div.class") == {:selectors, [{:rules, [ 206 | {:rule, [{:tag_name, "div", []}, {:class, "class"}], []} 207 | ]}]} 208 | end 209 | 210 | test "should parse after IDs" do 211 | assert Selector.parse("#id.class") == {:selectors, [{:rules, [ 212 | {:rule, [{:id, "id"}, {:class, "class"}], []} 213 | ]}]} 214 | end 215 | 216 | test "should parse after an attribute" do 217 | assert Selector.parse("[href].class") == {:selectors, [{:rules, [ 218 | {:rule, [{:attribute, {:exists, "href", nil, []}}, {:class, "class"}], []} 219 | ]}]} 220 | end 221 | 222 | test "should parse after a pseudo-class" do 223 | assert Selector.parse(":link.class") == {:selectors, [{:rules, [ 224 | {:rule, [{:pseudo_class, {"link", []}}, {:class, "class"}], []} 225 | ]}]} 226 | end 227 | 228 | test "should parse after a pseudo-element" do 229 | assert Selector.parse("::before.class") == {:selectors, [{:rules, [ 230 | {:rule, [{:pseudo_element, {"before", []}}, {:class, "class"}], []} 231 | ]}]} 232 | end 233 | 234 | test "should fail on empty class name" do 235 | assert_raise ArgumentError, "Expected class name.", fn -> 236 | Selector.parse(".") 237 | end 238 | 239 | assert_raise ArgumentError, "Expected class name.", fn -> 240 | Selector.parse(".1") 241 | end 242 | end 243 | 244 | test "should fail on a single hyphen" do 245 | assert_raise ArgumentError, "Expected class name.", fn -> 246 | Selector.parse(".-") 247 | end 248 | end 249 | end 250 | 251 | describe "IDs" do 252 | test "should parse a single ID" do 253 | assert Selector.parse("#id") == {:selectors, [{:rules, [{:rule, [{:id, "id"}], []}]}]} 254 | end 255 | 256 | test "should parse multiple IDs" do 257 | assert Selector.parse("#id1#id2") == {:selectors, [{:rules, [ 258 | {:rule, [{:id, "id1"}, {:id, "id2"}], []} 259 | ]}]} 260 | end 261 | 262 | test "should properly parse IDs" do 263 | assert Selector.parse("#id\\ name\\#\\ with\\ escapes") == {:selectors, [{:rules, [ 264 | {:rule, [{:id, "id name# with escapes"}], []} 265 | ]}]} 266 | end 267 | 268 | test "should parse after a tag name" do 269 | assert Selector.parse("div#id") == {:selectors, [{:rules, [ 270 | {:rule, [{:tag_name, "div", []}, {:id, "id"}], []} 271 | ]}]} 272 | end 273 | 274 | test "should parse after a class name" do 275 | assert Selector.parse(".class#id") == {:selectors, [{:rules, [ 276 | {:rule, [{:class, "class"}, {:id, "id"}], []} 277 | ]}]} 278 | end 279 | 280 | test "should parse mix of classes and ids" do 281 | assert Selector.parse(".class1#id1.class2#id2") == {:selectors, [{:rules, [ 282 | {:rule, [ 283 | {:class, "class1"}, 284 | {:id, "id1"}, 285 | {:class, "class2"}, 286 | {:id, "id2"} 287 | ], []} 288 | ]}]} 289 | end 290 | 291 | test "should parse after an attribute" do 292 | assert Selector.parse("[href]#id") == {:selectors, [{:rules, [ 293 | {:rule, [{:attribute, {:exists, "href", nil, []}}, {:id, "id"}], []} 294 | ]}]} 295 | end 296 | 297 | test "should parse after a pseudo-class" do 298 | assert Selector.parse(":link#id") == {:selectors, [{:rules, [ 299 | {:rule, [{:pseudo_class, {"link", []}}, {:id, "id"}], []} 300 | ]}]} 301 | end 302 | 303 | test "should parse after a pseudo-element" do 304 | assert Selector.parse("::before#id") == {:selectors, [{:rules, [ 305 | {:rule, [{:pseudo_element, {"before", []}}, {:id, "id"}], []} 306 | ]}]} 307 | end 308 | 309 | test "should fail on empty ID" do 310 | assert_raise ArgumentError, "Expected identifier.", fn -> 311 | Selector.parse("#") 312 | end 313 | end 314 | end 315 | 316 | describe "Attributes" do 317 | test "should parse a attribute" do 318 | assert Selector.parse("[attr]") == {:selectors, [{:rules, [ 319 | {:rule, [{:attribute, {:exists, "attr", nil, []}}], []} 320 | ]}]} 321 | end 322 | 323 | test "should parse a attribute with comparison" do 324 | assert Selector.parse("[attr=val]") == {:selectors, [{:rules, [ 325 | {:rule, [{:attribute, {:equal, "attr", "val", []}}], []} 326 | ]}]} 327 | end 328 | 329 | test "should parse a attribute with multibyte comparison" do 330 | assert Selector.parse("[attr|=val]") == {:selectors, [{:rules, [ 331 | {:rule, [{:attribute, {:dash_match, "attr", "val", []}}], []} 332 | ]}]} 333 | end 334 | 335 | test "should parse multiple attributes" do 336 | assert Selector.parse("[attr1][attr2]") == {:selectors, [{:rules, [ 337 | {:rule, [ 338 | {:attribute, {:exists, "attr1", nil, []}}, 339 | {:attribute, {:exists, "attr2", nil, []}} 340 | ], []} 341 | ]}]} 342 | end 343 | 344 | test "should properly parse attribute names" do 345 | assert Selector.parse("[attr\\ \\.name]") == {:selectors, [{:rules, [ 346 | {:rule, [{:attribute, {:exists, "attr .name", nil, []}}], []} 347 | ]}]} 348 | end 349 | 350 | test "should properly parse attribute values" do 351 | assert Selector.parse("[attr=val\\ \\ue]") == {:selectors, [{:rules, [ 352 | {:rule, [{:attribute, {:equal, "attr", "val ue", []}}], []} 353 | ]}]} 354 | end 355 | 356 | test "should properly parse case sensitivity modifiers" do 357 | assert Selector.parse("[attr=value \\i]") == {:selectors, [{:rules, [ 358 | {:rule, [{:attribute, {:equal, "attr", "value", case_sensitive: false}}], []} 359 | ]}]} 360 | end 361 | 362 | test "should properly handle whitespace" do 363 | assert Selector.parse("[ attr = value i ]") == {:selectors, [{:rules, [ 364 | {:rule, [{:attribute, {:equal, "attr", "value", case_sensitive: false}}], []} 365 | ]}]} 366 | end 367 | 368 | test "should properly parse double quotes" do 369 | # Testing escaped quote and literal backslashes (not escape sequences) 370 | assert Selector.parse(~s([ attr = "val\\"\\\\ue\\\\20" i ])) == {:selectors, [{:rules, [ 371 | {:rule, [{:attribute, {:equal, "attr", "val\"\\ue\\20", case_sensitive: false}}], []} 372 | ]}]} 373 | end 374 | 375 | test "should properly parse escapes" do 376 | ast_selector = {:selectors, [{:rules, [{:rule, [{:attribute, {:equal, "attr", "hello\nworld", []}}], []}]}]} 377 | 378 | assert Selector.parse(~s([attr="hello\\aworld"])) == ast_selector 379 | assert Selector.parse(~s([attr="hell\\o\\aworld"])) == ast_selector 380 | assert Selector.parse(~s([attr="hell\\\no\\aworld"])) == ast_selector 381 | assert Selector.parse(~s([attr="hello\\a world"])) == ast_selector 382 | assert Selector.parse(~s([attr="hello\\a\tworld"])) == ast_selector 383 | assert Selector.parse(~s([attr="hello\\a\fworld"])) == ast_selector 384 | assert Selector.parse(~s([attr="hello\\a\nworld"])) == ast_selector 385 | assert Selector.parse(~s([attr="hello\\a\rworld"])) == ast_selector 386 | assert Selector.parse(~s([attr="hello\\a\r\nworld"])) == ast_selector 387 | assert Selector.parse(~s([attr="hello\\00000aworld"])) == ast_selector 388 | end 389 | 390 | test "should properly parse single quotes" do 391 | assert Selector.parse("[ attr = 'val\\'\\ue\\20' i ]") == {:selectors, [{:rules, [ 392 | {:rule, [{:attribute, {:equal, "attr", "val'ue ", case_sensitive: false}}], []} 393 | ]}]} 394 | end 395 | 396 | test "should fail if attribute name is empty" do 397 | assert_raise ArgumentError, "Expected attribute name.", fn -> 398 | Selector.parse("[=a1]") 399 | end 400 | 401 | assert_raise ArgumentError, "Expected attribute name.", fn -> 402 | Selector.parse("[1=a1]") 403 | end 404 | end 405 | 406 | test "should fail if attribute value is empty" do 407 | assert_raise ArgumentError, "Expected attribute value.", fn -> 408 | Selector.parse("[a=]") 409 | end 410 | end 411 | 412 | test "should parse empty attribute values in quotes" do 413 | assert Selector.parse(~s([attr=""])) == {:selectors, [{:rules, [ 414 | {:rule, [{:attribute, {:equal, "attr", "", []}}], []} 415 | ]}]} 416 | assert Selector.parse("[attr='']") == {:selectors, [{:rules, [ 417 | {:rule, [{:attribute, {:equal, "attr", "", []}}], []} 418 | ]}]} 419 | end 420 | 421 | test "should parse case sensitivity modifier s" do 422 | assert Selector.parse("[attr=value s]") == {:selectors, [{:rules, [ 423 | {:rule, [{:attribute, {:equal, "attr", "value", case_sensitive: true}}], []} 424 | ]}]} 425 | end 426 | 427 | test "should parse after tag names" do 428 | assert Selector.parse("div[attr]") == {:selectors, [{:rules, [ 429 | {:rule, [{:tag_name, "div", []}, {:attribute, {:exists, "attr", nil, []}}], []} 430 | ]}]} 431 | end 432 | 433 | test "should parse after IDs" do 434 | assert Selector.parse("#id[attr]") == {:selectors, [{:rules, [ 435 | {:rule, [{:id, "id"}, {:attribute, {:exists, "attr", nil, []}}], []} 436 | ]}]} 437 | end 438 | 439 | test "should parse after classes" do 440 | assert Selector.parse(".class[attr]") == {:selectors, [{:rules, [ 441 | {:rule, [{:class, "class"}, {:attribute, {:exists, "attr", nil, []}}], []} 442 | ]}]} 443 | end 444 | 445 | test "should parse after a pseudo-class" do 446 | assert Selector.parse(":link[attr]") == {:selectors, [{:rules, [ 447 | {:rule, [{:pseudo_class, {"link", []}}, {:attribute, {:exists, "attr", nil, []}}], []} 448 | ]}]} 449 | end 450 | 451 | test "should parse after a pseudo-element" do 452 | assert Selector.parse("::before[attr]") == {:selectors, [{:rules, [ 453 | {:rule, [{:pseudo_element, {"before", []}}, {:attribute, {:exists, "attr", nil, []}}], []} 454 | ]}]} 455 | end 456 | 457 | test "should parse a named namespace" do 458 | assert Selector.parse("[ns|href]") == {:selectors, [{:rules, [ 459 | {:rule, [{:attribute, {:exists, "href", nil, namespace: "ns"}}], []} 460 | ]}]} 461 | 462 | assert Selector.parse("[ns|href=value]") == {:selectors, [{:rules, [ 463 | {:rule, [{:attribute, {:equal, "href", "value", namespace: "ns"}}], []} 464 | ]}]} 465 | end 466 | 467 | test "should parse a wildcard namespace" do 468 | assert Selector.parse("[*|href]") == {:selectors, [{:rules, [ 469 | {:rule, [{:attribute, {:exists, "href", nil, namespace: "*"}}], []} 470 | ]}]} 471 | 472 | assert Selector.parse("[*|href=value]") == {:selectors, [{:rules, [ 473 | {:rule, [{:attribute, {:equal, "href", "value", namespace: "*"}}], []} 474 | ]}]} 475 | end 476 | 477 | test "should parse an empty namespace" do 478 | assert Selector.parse("[|href]") == {:selectors, [{:rules, [ 479 | {:rule, [{:attribute, {:exists, "href", nil, namespace: ""}}], []} 480 | ]}]} 481 | 482 | assert Selector.parse("[|href=value]") == {:selectors, [{:rules, [ 483 | {:rule, [{:attribute, {:equal, "href", "value", namespace: ""}}], []} 484 | ]}]} 485 | end 486 | 487 | test "should fail on bracket mismatch" do 488 | assert_raise ArgumentError, "Expected closing bracket.", fn -> 489 | Selector.parse("[attr") 490 | end 491 | end 492 | 493 | test "should parse starting with match" do 494 | assert Selector.parse("[attr^=value]") == {:selectors, [{:rules, [ 495 | {:rule, [{:attribute, {:prefix, "attr", "value", []}}], []} 496 | ]}]} 497 | end 498 | 499 | test "should parse ending with match" do 500 | assert Selector.parse("[attr$=value]") == {:selectors, [{:rules, [ 501 | {:rule, [{:attribute, {:suffix, "attr", "value", []}}], []} 502 | ]}]} 503 | end 504 | 505 | test "should parse containing match" do 506 | assert Selector.parse("[attr*=value]") == {:selectors, [{:rules, [ 507 | {:rule, [{:attribute, {:substring, "attr", "value", []}}], []} 508 | ]}]} 509 | end 510 | 511 | test "should parse includes match" do 512 | assert Selector.parse("[attr~=value]") == {:selectors, [{:rules, [ 513 | {:rule, [{:attribute, {:includes, "attr", "value", []}}], []} 514 | ]}]} 515 | end 516 | end 517 | 518 | describe "Pseudo Classes" do 519 | test "should parse a pseudo-class" do 520 | assert Selector.parse(":link") == {:selectors, [{:rules, [ 521 | {:rule, [{:pseudo_class, {"link", []}}], []} 522 | ]}]} 523 | end 524 | 525 | test "should parse multiple pseudo classes" do 526 | assert Selector.parse(":link:visited") == {:selectors, [{:rules, [ 527 | {:rule, [ 528 | {:pseudo_class, {"link", []}}, 529 | {:pseudo_class, {"visited", []}} 530 | ], []} 531 | ]}]} 532 | end 533 | 534 | @tag :skip 535 | test "should properly parse pseudo classes" do 536 | assert Selector.parse(":\\l\\69\\n\\6b") == {:selectors, [{:rules, [ 537 | {:rule, [{:pseudo_class, {"link", []}}], []} 538 | ]}]} 539 | end 540 | 541 | test "should properly parse with 0n" do 542 | for formula <- [":nth-child(0n+5)", ":nth-child( 0n + 5 )", ":nth-child( 0n+5 )", 543 | ":nth-child(5)", ":nth-child( 5 )", ":nth-child( +5 )"] do 544 | assert Selector.parse(formula) == {:selectors, [{:rules, [ 545 | {:rule, [{:pseudo_class, {"nth-child", [[a: 0, b: 5]]}}], []} 546 | ]}]} 547 | end 548 | end 549 | 550 | test "should properly parse with 0n and negative B" do 551 | for formula <- [":nth-child(0n-5)", ":nth-child( 0n - 5 )", ":nth-child( 0n-5 )", 552 | ":nth-child(-5)", ":nth-child( -5 )"] do 553 | assert Selector.parse(formula) == {:selectors, [{:rules, [ 554 | {:rule, [{:pseudo_class, {"nth-child", [[a: 0, b: -5]]}}], []} 555 | ]}]} 556 | end 557 | end 558 | 559 | test "should properly parse with 0 B" do 560 | for formula <- [":nth-child(3n+0)", ":nth-child( 3\\n + 0 )", ":nth-child( 3\\6e+0 )", 561 | ":nth-child(3n)", ":nth-child( 3n )", ":nth-child( +3n )"] do 562 | assert Selector.parse(formula) == {:selectors, [{:rules, [ 563 | {:rule, [{:pseudo_class, {"nth-child", [[a: 3, b: 0]]}}], []} 564 | ]}]} 565 | end 566 | end 567 | 568 | test "should properly parse even" do 569 | for formula <- [":nth-child(even)", ":nth-child( even )", ":nth-child( 2n )"] do 570 | assert Selector.parse(formula) == {:selectors, [{:rules, [ 571 | {:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 0]]}}], []} 572 | ]}]} 573 | end 574 | end 575 | 576 | test "should properly parse odd" do 577 | for formula <- [":nth-child( 2n + 1 )", ":nth-child( odd )"] do 578 | assert Selector.parse(formula) == {:selectors, [{:rules, [ 579 | {:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []} 580 | ]}]} 581 | end 582 | end 583 | 584 | test "should properly handle whitespace" do 585 | assert Selector.parse(":lang( en )") == {:selectors, [{:rules, [ 586 | {:rule, [{:pseudo_class, {"lang", ["en"]}}], []} 587 | ]}]} 588 | end 589 | 590 | test "should parse after tag names" do 591 | assert Selector.parse("div:link") == {:selectors, [{:rules, [ 592 | {:rule, [{:tag_name, "div", []}, {:pseudo_class, {"link", []}}], []} 593 | ]}]} 594 | end 595 | 596 | test "should parse after IDs" do 597 | assert Selector.parse("#id:link") == {:selectors, [{:rules, [ 598 | {:rule, [{:id, "id"}, {:pseudo_class, {"link", []}}], []} 599 | ]}]} 600 | end 601 | 602 | test "should parse after classes" do 603 | assert Selector.parse(".class:link") == {:selectors, [{:rules, [ 604 | {:rule, [{:class, "class"}, {:pseudo_class, {"link", []}}], []} 605 | ]}]} 606 | end 607 | 608 | test "should parse nested selectors" do 609 | assert Selector.parse(":is(:lang(en), div)") == {:selectors, [{:rules, [ 610 | {:rule, [{:pseudo_class, {"is", [ 611 | [ 612 | {:rules, [{:rule, [{:pseudo_class, {"lang", ["en"]}}], []}]}, 613 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]} 614 | ] 615 | ]}}], []} 616 | ]}]} 617 | end 618 | 619 | test "should parse after an attribute" do 620 | assert Selector.parse("[href]:link") == {:selectors, [{:rules, [ 621 | {:rule, [{:attribute, {:exists, "href", nil, []}}, {:pseudo_class, {"link", []}}], []} 622 | ]}]} 623 | end 624 | 625 | test "should parse after a pseudo-element" do 626 | assert Selector.parse("::before:hover") == {:selectors, [{:rules, [ 627 | {:rule, [{:pseudo_element, {"before", []}}, {:pseudo_class, {"hover", []}}], []} 628 | ]}]} 629 | end 630 | 631 | test "should fail on a single hyphen" do 632 | assert_raise ArgumentError, "Identifiers cannot consist of a single hyphen.", fn -> 633 | Selector.parse(":-") 634 | end 635 | end 636 | 637 | test "should fail if argument required but not provided" do 638 | assert_raise ArgumentError, "Argument is required for pseudo-class \"not\".", fn -> 639 | Selector.parse(":not") 640 | end 641 | end 642 | 643 | test "should parse :nth functions" do 644 | assert Selector.parse(":nth-child(2n+1)") == {:selectors, [{:rules, [ 645 | {:rule, [{:pseudo_class, {"nth-child", [[a: 2, b: 1]]}}], []} 646 | ]}]} 647 | end 648 | 649 | test "should parse :nth-of-type functions" do 650 | assert Selector.parse(":nth-of-type(2n)") == {:selectors, [{:rules, [ 651 | {:rule, [{:pseudo_class, {"nth-of-type", [[a: 2, b: 0]]}}], []} 652 | ]}]} 653 | end 654 | 655 | test "should parse :nth-last-child functions" do 656 | assert Selector.parse(":nth-last-child(2n+1)") == {:selectors, [{:rules, [ 657 | {:rule, [{:pseudo_class, {"nth-last-child", [[a: 2, b: 1]]}}], []} 658 | ]}]} 659 | end 660 | 661 | test "should parse :nth-last-of-type functions" do 662 | assert Selector.parse(":nth-last-of-type(2n)") == {:selectors, [{:rules, [ 663 | {:rule, [{:pseudo_class, {"nth-last-of-type", [[a: 2, b: 0]]}}], []} 664 | ]}]} 665 | end 666 | 667 | test "should parse :not function with complex selectors" do 668 | assert Selector.parse(":not(div.class)") == {:selectors, [{:rules, [ 669 | {:rule, [{:pseudo_class, {"not", [ 670 | [{:rules, [{:rule, [{:tag_name, "div", []}, {:class, "class"}], []}]}] 671 | ]}}], []} 672 | ]}]} 673 | end 674 | 675 | test "should parse :is function" do 676 | assert Selector.parse(":is(div, .class)") == {:selectors, [{:rules, [ 677 | {:rule, [{:pseudo_class, {"is", [ 678 | [ 679 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 680 | {:rules, [{:rule, [{:class, "class"}], []}]} 681 | ] 682 | ]}}], []} 683 | ]}]} 684 | end 685 | 686 | test "should parse :where function" do 687 | assert Selector.parse(":where(div, .class)") == {:selectors, [{:rules, [ 688 | {:rule, [{:pseudo_class, {"where", [ 689 | [ 690 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 691 | {:rules, [{:rule, [{:class, "class"}], []}]} 692 | ] 693 | ]}}], []} 694 | ]}]} 695 | end 696 | 697 | test "should parse :has function" do 698 | assert Selector.parse(":has(> div)") == {:selectors, [{:rules, [ 699 | {:rule, [{:pseudo_class, {"has", [ 700 | [{:rules, [{:rule, [{:tag_name, "div", []}], combinator: ">"}]}] 701 | ]}}], []} 702 | ]}]} 703 | end 704 | 705 | test "should parse :matches function" do 706 | assert Selector.parse(":matches(div, .class)") == {:selectors, [{:rules, [ 707 | {:rule, [{:pseudo_class, {"matches", [ 708 | [ 709 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 710 | {:rules, [{:rule, [{:class, "class"}], []}]} 711 | ] 712 | ]}}], []} 713 | ]}]} 714 | end 715 | 716 | test "should parse language pseudo-class" do 717 | assert Selector.parse(":lang(en-US)") == {:selectors, [{:rules, [ 718 | {:rule, [{:pseudo_class, {"lang", ["en-US"]}}], []} 719 | ]}]} 720 | end 721 | 722 | test "should parse structural pseudo-classes" do 723 | assert Selector.parse(":first-child") == {:selectors, [{:rules, [ 724 | {:rule, [{:pseudo_class, {"first-child", []}}], []} 725 | ]}]} 726 | assert Selector.parse(":last-child") == {:selectors, [{:rules, [ 727 | {:rule, [{:pseudo_class, {"last-child", []}}], []} 728 | ]}]} 729 | assert Selector.parse(":only-child") == {:selectors, [{:rules, [ 730 | {:rule, [{:pseudo_class, {"only-child", []}}], []} 731 | ]}]} 732 | assert Selector.parse(":first-of-type") == {:selectors, [{:rules, [ 733 | {:rule, [{:pseudo_class, {"first-of-type", []}}], []} 734 | ]}]} 735 | assert Selector.parse(":last-of-type") == {:selectors, [{:rules, [ 736 | {:rule, [{:pseudo_class, {"last-of-type", []}}], []} 737 | ]}]} 738 | assert Selector.parse(":only-of-type") == {:selectors, [{:rules, [ 739 | {:rule, [{:pseudo_class, {"only-of-type", []}}], []} 740 | ]}]} 741 | end 742 | 743 | test "should parse tree-structural pseudo-classes" do 744 | assert Selector.parse(":root") == {:selectors, [{:rules, [ 745 | {:rule, [{:pseudo_class, {"root", []}}], []} 746 | ]}]} 747 | assert Selector.parse(":empty") == {:selectors, [{:rules, [ 748 | {:rule, [{:pseudo_class, {"empty", []}}], []} 749 | ]}]} 750 | end 751 | 752 | test "should parse UI state pseudo-classes" do 753 | assert Selector.parse(":checked") == {:selectors, [{:rules, [ 754 | {:rule, [{:pseudo_class, {"checked", []}}], []} 755 | ]}]} 756 | assert Selector.parse(":enabled") == {:selectors, [{:rules, [ 757 | {:rule, [{:pseudo_class, {"enabled", []}}], []} 758 | ]}]} 759 | assert Selector.parse(":disabled") == {:selectors, [{:rules, [ 760 | {:rule, [{:pseudo_class, {"disabled", []}}], []} 761 | ]}]} 762 | assert Selector.parse(":required") == {:selectors, [{:rules, [ 763 | {:rule, [{:pseudo_class, {"required", []}}], []} 764 | ]}]} 765 | assert Selector.parse(":optional") == {:selectors, [{:rules, [ 766 | {:rule, [{:pseudo_class, {"optional", []}}], []} 767 | ]}]} 768 | assert Selector.parse(":read-only") == {:selectors, [{:rules, [ 769 | {:rule, [{:pseudo_class, {"read-only", []}}], []} 770 | ]}]} 771 | assert Selector.parse(":read-write") == {:selectors, [{:rules, [ 772 | {:rule, [{:pseudo_class, {"read-write", []}}], []} 773 | ]}]} 774 | assert Selector.parse(":valid") == {:selectors, [{:rules, [ 775 | {:rule, [{:pseudo_class, {"valid", []}}], []} 776 | ]}]} 777 | assert Selector.parse(":invalid") == {:selectors, [{:rules, [ 778 | {:rule, [{:pseudo_class, {"invalid", []}}], []} 779 | ]}]} 780 | assert Selector.parse(":in-range") == {:selectors, [{:rules, [ 781 | {:rule, [{:pseudo_class, {"in-range", []}}], []} 782 | ]}]} 783 | assert Selector.parse(":out-of-range") == {:selectors, [{:rules, [ 784 | {:rule, [{:pseudo_class, {"out-of-range", []}}], []} 785 | ]}]} 786 | end 787 | 788 | test "should parse target and link pseudo-classes" do 789 | assert Selector.parse(":target") == {:selectors, [{:rules, [ 790 | {:rule, [{:pseudo_class, {"target", []}}], []} 791 | ]}]} 792 | assert Selector.parse(":link") == {:selectors, [{:rules, [ 793 | {:rule, [{:pseudo_class, {"link", []}}], []} 794 | ]}]} 795 | assert Selector.parse(":visited") == {:selectors, [{:rules, [ 796 | {:rule, [{:pseudo_class, {"visited", []}}], []} 797 | ]}]} 798 | assert Selector.parse(":hover") == {:selectors, [{:rules, [ 799 | {:rule, [{:pseudo_class, {"hover", []}}], []} 800 | ]}]} 801 | assert Selector.parse(":active") == {:selectors, [{:rules, [ 802 | {:rule, [{:pseudo_class, {"active", []}}], []} 803 | ]}]} 804 | assert Selector.parse(":focus") == {:selectors, [{:rules, [ 805 | {:rule, [{:pseudo_class, {"focus", []}}], []} 806 | ]}]} 807 | end 808 | 809 | test "should parse CSS Level 4 pseudo-classes" do 810 | assert Selector.parse(":any-link") == {:selectors, [{:rules, [ 811 | {:rule, [{:pseudo_class, {"any-link", []}}], []} 812 | ]}]} 813 | assert Selector.parse(":focus-within") == {:selectors, [{:rules, [ 814 | {:rule, [{:pseudo_class, {"focus-within", []}}], []} 815 | ]}]} 816 | assert Selector.parse(":focus-visible") == {:selectors, [{:rules, [ 817 | {:rule, [{:pseudo_class, {"focus-visible", []}}], []} 818 | ]}]} 819 | end 820 | end 821 | 822 | describe "Pseudo Elements" do 823 | test "should parse a pseudo-class" do 824 | assert Selector.parse("::before") == {:selectors, [{:rules, [ 825 | {:rule, [{:pseudo_element, {"before", []}}], []} 826 | ]}]} 827 | end 828 | 829 | test "should parse a parametrized pseudo-element" do 830 | assert Selector.parse("::slotted(span)") == {:selectors, [{:rules, [ 831 | {:rule, [{:pseudo_element, {"slotted", [[{:rules, [{:rule, [{:tag_name, "span", []}], []}]}]]}}], []} 832 | ]}]} 833 | end 834 | 835 | test "should parse pseudo-elements with content" do 836 | assert Selector.parse("::after") == {:selectors, [{:rules, [ 837 | {:rule, [{:pseudo_element, {"after", []}}], []} 838 | ]}]} 839 | end 840 | 841 | test "should parse ::before and ::after" do 842 | assert Selector.parse("::before") == {:selectors, [{:rules, [ 843 | {:rule, [{:pseudo_element, {"before", []}}], []} 844 | ]}]} 845 | 846 | assert Selector.parse("::after") == {:selectors, [{:rules, [ 847 | {:rule, [{:pseudo_element, {"after", []}}], []} 848 | ]}]} 849 | end 850 | 851 | test "should parse ::first-line and ::first-letter" do 852 | assert Selector.parse("::first-line") == {:selectors, [{:rules, [ 853 | {:rule, [{:pseudo_element, {"first-line", []}}], []} 854 | ]}]} 855 | 856 | assert Selector.parse("::first-letter") == {:selectors, [{:rules, [ 857 | {:rule, [{:pseudo_element, {"first-letter", []}}], []} 858 | ]}]} 859 | end 860 | 861 | test "should parse modern double-colon syntax" do 862 | assert Selector.parse("::selection") == {:selectors, [{:rules, [ 863 | {:rule, [{:pseudo_element, {"selection", []}}], []} 864 | ]}]} 865 | end 866 | 867 | test "should parse legacy single-colon syntax" do 868 | assert Selector.parse(":before") == {:selectors, [{:rules, [ 869 | {:rule, [{:pseudo_element, {"before", []}}], []} 870 | ]}]} 871 | end 872 | 873 | test "should parse pseudo-elements with tag names" do 874 | assert Selector.parse("div::before") == {:selectors, [{:rules, [ 875 | {:rule, [{:tag_name, "div", []}, {:pseudo_element, {"before", []}}], []} 876 | ]}]} 877 | end 878 | 879 | test "should parse pseudo-elements with class names" do 880 | assert Selector.parse(".class::before") == {:selectors, [{:rules, [ 881 | {:rule, [{:class, "class"}, {:pseudo_element, {"before", []}}], []} 882 | ]}]} 883 | end 884 | 885 | test "should parse pseudo-elements with IDs" do 886 | assert Selector.parse("#id::before") == {:selectors, [{:rules, [ 887 | {:rule, [{:id, "id"}, {:pseudo_element, {"before", []}}], []} 888 | ]}]} 889 | end 890 | 891 | test "should parse pseudo-elements with attributes" do 892 | assert Selector.parse("[attr]::before") == {:selectors, [{:rules, [ 893 | {:rule, [{:attribute, {:exists, "attr", nil, []}}, {:pseudo_element, {"before", []}}], []} 894 | ]}]} 895 | end 896 | 897 | test "should fail on invalid pseudo-element syntax" do 898 | assert_raise ArgumentError, "Invalid pseudo-element syntax.", fn -> 899 | Selector.parse("::invalid-element") 900 | end 901 | end 902 | 903 | test "should handle vendor-specific pseudo-elements" do 904 | assert Selector.parse("::-webkit-input-placeholder") == {:selectors, [{:rules, [ 905 | {:rule, [{:pseudo_element, {"-webkit-input-placeholder", []}}], []} 906 | ]}]} 907 | end 908 | 909 | test "should parse CSS Level 4 pseudo-elements" do 910 | assert Selector.parse("::placeholder") == {:selectors, [{:rules, [ 911 | {:rule, [{:pseudo_element, {"placeholder", []}}], []} 912 | ]}]} 913 | assert Selector.parse("::backdrop") == {:selectors, [{:rules, [ 914 | {:rule, [{:pseudo_element, {"backdrop", []}}], []} 915 | ]}]} 916 | assert Selector.parse("::marker") == {:selectors, [{:rules, [ 917 | {:rule, [{:pseudo_element, {"marker", []}}], []} 918 | ]}]} 919 | assert Selector.parse("::cue") == {:selectors, [{:rules, [ 920 | {:rule, [{:pseudo_element, {"cue", []}}], []} 921 | ]}]} 922 | end 923 | 924 | # Note: While CSS3 specifies pseudo-elements should be at the end, 925 | # this parser allows selectors after pseudo-elements for flexibility 926 | # and future compatibility with CSS4 where some pseudo-elements 927 | # can be followed by pseudo-classes 928 | end 929 | 930 | describe "Multiple rules" do 931 | test "should parse multiple rules" do 932 | assert Selector.parse("div,.class") == {:selectors, [ 933 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 934 | {:rules, [{:rule, [{:class, "class"}], []}]} 935 | ]} 936 | end 937 | 938 | test "should parse comma-separated selectors" do 939 | assert Selector.parse(" div , .class ") == {:selectors, [ 940 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 941 | {:rules, [{:rule, [{:class, "class"}], []}]} 942 | ]} 943 | end 944 | 945 | test "should handle whitespace in multiple rules" do 946 | assert Selector.parse("div, .class, #id") == {:selectors, [ 947 | {:rules, [{:rule, [{:tag_name, "div", []}], []}]}, 948 | {:rules, [{:rule, [{:class, "class"}], []}]}, 949 | {:rules, [{:rule, [{:id, "id"}], []}]} 950 | ]} 951 | end 952 | 953 | test "should parse complex multiple rule combinations" do 954 | assert_raise ArgumentError, "Expected selector but end of input reached.", fn -> 955 | Selector.parse("div, .class,") 956 | end 957 | 958 | assert_raise ArgumentError, "Cannot parse: $", fn -> 959 | Selector.parse("div, .class, $") 960 | end 961 | end 962 | end 963 | 964 | describe "Complex selectors" do 965 | test "should parse selectors with all features combined" do 966 | assert Selector.parse("ns|tag#id.class1.class2[attr=value]:hover::before") == {:selectors, [{:rules, [ 967 | {:rule, [ 968 | {:tag_name, "tag", namespace: "ns"}, 969 | {:id, "id"}, 970 | {:class, "class1"}, 971 | {:class, "class2"}, 972 | {:attribute, {:equal, "attr", "value", []}}, 973 | {:pseudo_class, {"hover", []}}, 974 | {:pseudo_element, {"before", []}} 975 | ], []} 976 | ]}]} 977 | end 978 | 979 | test "should parse complex selectors with multiple attributes" do 980 | assert Selector.parse("div[id][class~=test][data-value^=prefix]") == {:selectors, [{:rules, [ 981 | {:rule, [ 982 | {:tag_name, "div", []}, 983 | {:attribute, {:exists, "id", nil, []}}, 984 | {:attribute, {:includes, "class", "test", []}}, 985 | {:attribute, {:prefix, "data-value", "prefix", []}} 986 | ], []} 987 | ]}]} 988 | end 989 | end 990 | 991 | describe "Nested rules" do 992 | test "should parse nested rules" do 993 | assert Selector.parse("div .class") == {:selectors, [{:rules, [ 994 | {:rule, [{:tag_name, "div", []}], []}, 995 | {:rule, [{:class, "class"}], []} 996 | ]}]} 997 | end 998 | 999 | test "should parse descendant combinators" do 1000 | assert Selector.parse(" div > .class ") == {:selectors, [{:rules, [ 1001 | {:rule, [{:tag_name, "div", []}], []}, 1002 | {:rule, [{:class, "class"}], combinator: ">"} 1003 | ]}]} 1004 | end 1005 | 1006 | test "should parse child combinators" do 1007 | assert Selector.parse("div>.class") == {:selectors, [{:rules, [ 1008 | {:rule, [{:tag_name, "div", []}], []}, 1009 | {:rule, [{:class, "class"}], combinator: ">"} 1010 | ]}]} 1011 | end 1012 | 1013 | test "should parse sibling combinators" do 1014 | assert Selector.parse("div~.class") == {:selectors, [{:rules, [ 1015 | {:rule, [{:tag_name, "div", []}], []}, 1016 | {:rule, [{:class, "class"}], combinator: "~"} 1017 | ]}]} 1018 | end 1019 | 1020 | test "should parse adjacent sibling combinators" do 1021 | assert Selector.parse("div+.class") == {:selectors, [{:rules, [ 1022 | {:rule, [{:tag_name, "div", []}], []}, 1023 | {:rule, [{:class, "class"}], combinator: "+"} 1024 | ]}]} 1025 | end 1026 | 1027 | test "should handle complex nesting patterns" do 1028 | assert Selector.parse("div||.class") == {:selectors, [{:rules, [ 1029 | {:rule, [{:tag_name, "div", []}], []}, 1030 | {:rule, [{:class, "class"}], combinator: "||"} 1031 | ]}]} 1032 | 1033 | assert Selector.parse(" div || .class ") == {:selectors, [{:rules, [ 1034 | {:rule, [{:tag_name, "div", []}], []}, 1035 | {:rule, [{:class, "class"}], combinator: "||"} 1036 | ]}]} 1037 | end 1038 | end 1039 | 1040 | describe "Edge cases and error handling" do 1041 | test "should handle various Unicode whitespace" do 1042 | # Non-breaking space is NOT treated as a combinator in CSS 1043 | # It's part of the identifier 1044 | assert Selector.parse("div\u00A0.class") == {:selectors, [{:rules, [ 1045 | {:rule, [{:tag_name, "div\u00A0", []}, {:class, "class"}], []} 1046 | ]}]} 1047 | end 1048 | 1049 | test "should validate combinator placement" do 1050 | assert_raise ArgumentError, fn -> 1051 | Selector.parse("div > > span") 1052 | end 1053 | 1054 | assert_raise ArgumentError, fn -> 1055 | Selector.parse("> div") 1056 | end 1057 | end 1058 | 1059 | test "should handle deeply nested selectors" do 1060 | nested = ":not(:not(:not(:not(:not(.class)))))" 1061 | assert Selector.parse(nested) == {:selectors, [{:rules, [ 1062 | {:rule, [{:pseudo_class, {"not", [ 1063 | [{:rules, [{:rule, [{:pseudo_class, {"not", [ 1064 | [{:rules, [{:rule, [{:pseudo_class, {"not", [ 1065 | [{:rules, [{:rule, [{:pseudo_class, {"not", [ 1066 | [{:rules, [{:rule, [{:pseudo_class, {"not", [ 1067 | [{:rules, [{:rule, [{:class, "class"}], []}]}] 1068 | ]}}], []}]}] 1069 | ]}}], []}]}] 1070 | ]}}], []}]}] 1071 | ]}}], []}]}] 1072 | ]}}], []} 1073 | ]}]} 1074 | end 1075 | 1076 | test "should handle extremely long identifiers" do 1077 | # Parser truncates identifiers to 255 characters 1078 | id = String.duplicate("a", 1000) 1079 | assert Selector.parse("##{id}") == {:selectors, [{:rules, [ 1080 | {:rule, [{:id, id}], []} 1081 | ]}]} 1082 | end 1083 | 1084 | test "should parse nth-child with negative coefficients" do 1085 | assert Selector.parse(":nth-child(-n+3)") == {:selectors, [{:rules, [ 1086 | {:rule, [{:pseudo_class, {"nth-child", [[a: -1, b: 3]]}}], []} 1087 | ]}]} 1088 | end 1089 | 1090 | test "should handle escape sequences in different contexts" do 1091 | # Escaped characters in ID 1092 | assert Selector.parse("#\\31 23") == {:selectors, [{:rules, [ 1093 | {:rule, [{:id, "123"}], []} 1094 | ]}]} 1095 | 1096 | # Escaped characters in class 1097 | assert Selector.parse(".\\@media") == {:selectors, [{:rules, [ 1098 | {:rule, [{:class, "@media"}], []} 1099 | ]}]} 1100 | 1101 | # Escaped characters in attribute 1102 | assert Selector.parse("[data-\\@attr]") == {:selectors, [{:rules, [ 1103 | {:rule, [{:attribute, {:exists, "data-@attr", nil, []}}], []} 1104 | ]}]} 1105 | end 1106 | end 1107 | end 1108 | -------------------------------------------------------------------------------- /test/selector/parser/guards_test.exs: -------------------------------------------------------------------------------- 1 | defmodule Selector.Parser.GuardsTest do 2 | use ExUnit.Case, async: true 3 | import Selector.Parser.Guards 4 | 5 | describe "is_whitespace/1" do 6 | test "recognizes CSS whitespace characters" do 7 | assert is_whitespace(0x0009) # Tab 8 | assert is_whitespace(0x000A) # Line Feed 9 | assert is_whitespace(0x000C) # Form Feed 10 | assert is_whitespace(0x000D) # Carriage Return 11 | assert is_whitespace(0x0020) # Space 12 | end 13 | 14 | test "rejects non-whitespace characters" do 15 | refute is_whitespace(?a) 16 | refute is_whitespace(?1) 17 | refute is_whitespace(?.) 18 | refute is_whitespace(0x00A0) # Non-breaking space (not CSS whitespace) 19 | end 20 | end 21 | 22 | describe "is_identifier_start_char/1" do 23 | test "accepts ASCII letters" do 24 | assert is_identifier_start_char(?a) 25 | assert is_identifier_start_char(?z) 26 | assert is_identifier_start_char(?A) 27 | assert is_identifier_start_char(?Z) 28 | end 29 | 30 | test "accepts underscore" do 31 | assert is_identifier_start_char(?_) 32 | end 33 | 34 | test "accepts non-ASCII characters" do 35 | assert is_identifier_start_char(0x00C0) # À 36 | assert is_identifier_start_char(0x4E2D) # 中 (Chinese character) 37 | end 38 | 39 | test "accepts UTF-8 letters from various scripts" do 40 | assert is_identifier_start_char(0x00E9) # é (Latin-1 Supplement) 41 | assert is_identifier_start_char(0x0391) # Α (Greek) 42 | assert is_identifier_start_char(0x0410) # А (Cyrillic) 43 | assert is_identifier_start_char(0x05D0) # א (Hebrew) 44 | assert is_identifier_start_char(0x0627) # ا (Arabic) 45 | assert is_identifier_start_char(0x3042) # あ (Hiragana) 46 | assert is_identifier_start_char(0x30A2) # ア (Katakana) 47 | assert is_identifier_start_char(0xAC00) # 가 (Hangul) 48 | end 49 | 50 | test "rejects digits" do 51 | refute is_identifier_start_char(?0) 52 | refute is_identifier_start_char(?9) 53 | refute is_identifier_start_char(0x0660) # Arabic-Indic digit 54 | end 55 | 56 | test "rejects hyphens" do 57 | refute is_identifier_start_char(?-) 58 | end 59 | 60 | test "rejects ASCII control characters" do 61 | refute is_identifier_start_char(0x001F) 62 | end 63 | end 64 | 65 | describe "is_identifier_char/1" do 66 | test "accepts identifier start characters" do 67 | assert is_identifier_char(?a) 68 | assert is_identifier_char(?_) 69 | assert is_identifier_char(0x00C0) 70 | end 71 | 72 | test "accepts UTF-8 digits" do 73 | assert is_identifier_char(?0) 74 | assert is_identifier_char(?5) 75 | assert is_identifier_char(?9) 76 | assert is_identifier_char(0x0660) # Arabic-Indic digit 77 | assert is_identifier_char(0x06F0) # Extended Arabic-Indic digit 78 | assert is_identifier_char(0x0966) # Devanagari digit 79 | assert is_identifier_char(0xFF10) # Fullwidth digit 80 | end 81 | 82 | test "accepts hyphens" do 83 | assert is_identifier_char(?-) 84 | end 85 | 86 | test "rejects special characters" do 87 | refute is_identifier_char(?.) 88 | refute is_identifier_char(?#) 89 | refute is_identifier_char(?@) 90 | end 91 | end 92 | 93 | describe "is_string_char/1" do 94 | test "accepts regular characters" do 95 | assert is_string_char(?a) 96 | assert is_string_char(?1) 97 | assert is_string_char(?!) 98 | assert is_string_char(0x00C0) 99 | end 100 | 101 | test "accepts UTF-8 characters" do 102 | assert is_string_char(0x4E2D) # 中 (Chinese) 103 | assert is_string_char(0x0391) # Α (Greek) 104 | assert is_string_char(0x0627) # ا (Arabic) 105 | end 106 | 107 | test "rejects quote characters" do 108 | refute is_string_char(0x0022) # Double quote 109 | refute is_string_char(0x0027) # Single quote 110 | end 111 | 112 | test "rejects backslash" do 113 | refute is_string_char(0x005C) 114 | end 115 | 116 | test "rejects newlines including Unicode newlines" do 117 | refute is_string_char(0x000A) # Line Feed 118 | refute is_string_char(0x000C) # Form Feed 119 | refute is_string_char(0x000D) # Carriage Return 120 | refute is_string_char(0x0085) # Next Line (NEL) 121 | refute is_string_char(0x2028) # Line Separator 122 | refute is_string_char(0x2029) # Paragraph Separator 123 | end 124 | end 125 | 126 | describe "is_combinator_char/1" do 127 | test "recognizes single-character combinator characters" do 128 | assert is_combinator_char(0x003E) # > (child combinator) 129 | assert is_combinator_char(0x002B) # + (adjacent sibling combinator) 130 | assert is_combinator_char(0x007E) # ~ (general sibling combinator) 131 | end 132 | 133 | test "rejects non-combinator characters" do 134 | refute is_combinator_char(?a) 135 | refute is_combinator_char(?1) 136 | refute is_combinator_char(0x0020) # Space (descendant combinator handled separately) 137 | refute is_combinator_char(?|) # Pipe (column combinator needs two ||) 138 | end 139 | 140 | test "rejects other special characters" do 141 | refute is_combinator_char(?.) 142 | refute is_combinator_char(?#) 143 | refute is_combinator_char(?:) 144 | refute is_combinator_char(?[) 145 | refute is_combinator_char(?]) 146 | refute is_combinator_char(?=) 147 | end 148 | end 149 | 150 | describe "is_combinator/1 (backward compatibility)" do 151 | test "recognizes combinator characters" do 152 | assert is_combinator(0x003E) # > 153 | assert is_combinator(0x002B) # + 154 | assert is_combinator(0x007E) # ~ 155 | end 156 | 157 | test "rejects non-combinator characters" do 158 | refute is_combinator(?a) 159 | refute is_combinator(?1) 160 | refute is_combinator(0x0020) # Space (descendant combinator handled separately) 161 | end 162 | end 163 | 164 | describe "is_delimiter/1" do 165 | test "recognizes delimiter characters" do 166 | assert is_delimiter(0x0023) # # 167 | assert is_delimiter(0x002E) # . 168 | assert is_delimiter(0x003A) # : 169 | assert is_delimiter(0x005B) # [ 170 | assert is_delimiter(0x005D) # ] 171 | assert is_delimiter(0x0028) # ( 172 | assert is_delimiter(0x0029) # ) 173 | assert is_delimiter(0x002C) # , 174 | assert is_delimiter(0x0022) # " 175 | assert is_delimiter(0x0027) # ' 176 | assert is_delimiter(0x005C) # \ 177 | end 178 | 179 | test "rejects non-delimiter characters" do 180 | refute is_delimiter(?a) 181 | refute is_delimiter(?1) 182 | refute is_delimiter(?=) 183 | end 184 | end 185 | 186 | describe "is_attribute_operator_char/1" do 187 | test "recognizes attribute operator characters" do 188 | assert is_attribute_operator_char(0x003D) # = 189 | assert is_attribute_operator_char(0x007E) # ~ 190 | assert is_attribute_operator_char(0x007C) # | 191 | assert is_attribute_operator_char(0x005E) # ^ 192 | assert is_attribute_operator_char(0x0024) # $ 193 | assert is_attribute_operator_char(0x002A) # * 194 | end 195 | 196 | test "rejects non-operator characters" do 197 | refute is_attribute_operator_char(?a) 198 | refute is_attribute_operator_char(?1) 199 | refute is_attribute_operator_char(?!) 200 | end 201 | end 202 | 203 | describe "is_hex_digit/1" do 204 | test "recognizes ASCII hexadecimal digits" do 205 | assert is_hex_digit(?0) 206 | assert is_hex_digit(?9) 207 | assert is_hex_digit(?a) 208 | assert is_hex_digit(?f) 209 | assert is_hex_digit(?A) 210 | assert is_hex_digit(?F) 211 | end 212 | 213 | test "rejects fullwidth hexadecimal digits" do 214 | refute is_hex_digit(0xFF10) # Fullwidth 0 215 | refute is_hex_digit(0xFF19) # Fullwidth 9 216 | refute is_hex_digit(0xFF21) # Fullwidth A 217 | refute is_hex_digit(0xFF26) # Fullwidth F 218 | refute is_hex_digit(0xFF41) # Fullwidth a 219 | refute is_hex_digit(0xFF46) # Fullwidth f 220 | end 221 | 222 | test "rejects non-hex characters" do 223 | refute is_hex_digit(?g) 224 | refute is_hex_digit(?G) 225 | refute is_hex_digit(?!) 226 | end 227 | end 228 | 229 | describe "is_escapable_char/1" do 230 | test "accepts most characters" do 231 | assert is_escapable_char(?a) 232 | assert is_escapable_char(?1) 233 | assert is_escapable_char(?!) 234 | assert is_escapable_char(?#) 235 | assert is_escapable_char(0x0020) # Space 236 | end 237 | 238 | test "accepts UTF-8 characters" do 239 | assert is_escapable_char(0x4E2D) # 中 (Chinese) 240 | assert is_escapable_char(0x0391) # Α (Greek) 241 | end 242 | 243 | test "rejects newlines including Unicode newlines" do 244 | refute is_escapable_char(0x000A) # Line Feed 245 | refute is_escapable_char(0x000C) # Form Feed 246 | refute is_escapable_char(0x000D) # Carriage Return 247 | refute is_escapable_char(0x0085) # Next Line (NEL) 248 | refute is_escapable_char(0x2028) # Line Separator 249 | refute is_escapable_char(0x2029) # Paragraph Separator 250 | end 251 | end 252 | 253 | describe "id selector guards" do 254 | test "is_id_start_char/1 follows identifier start rules" do 255 | assert is_id_start_char(?a) 256 | assert is_id_start_char(?_) 257 | assert is_id_start_char(0x00C0) 258 | assert is_id_start_char(0x4E2D) # 中 (Chinese) 259 | refute is_id_start_char(?1) 260 | refute is_id_start_char(?-) 261 | end 262 | 263 | test "is_id_char/1 follows identifier rules" do 264 | assert is_id_char(?a) 265 | assert is_id_char(?1) 266 | assert is_id_char(?-) 267 | assert is_id_char(?_) 268 | assert is_id_char(0x4E2D) # 中 (Chinese) 269 | assert is_id_char(0x0660) # Arabic-Indic digit 270 | refute is_id_char(?.) 271 | refute is_id_char(?#) 272 | end 273 | end 274 | 275 | describe "class selector guards" do 276 | test "is_class_start_char/1 follows identifier start rules" do 277 | assert is_class_start_char(?a) 278 | assert is_class_start_char(?_) 279 | assert is_class_start_char(0x0391) # Α (Greek) 280 | refute is_class_start_char(?1) 281 | refute is_class_start_char(?-) 282 | end 283 | 284 | test "is_class_char/1 follows identifier rules" do 285 | assert is_class_char(?a) 286 | assert is_class_char(?1) 287 | assert is_class_char(?-) 288 | assert is_class_char(0x3042) # あ (Hiragana) 289 | refute is_class_char(?.) 290 | end 291 | end 292 | 293 | describe "element selector guards" do 294 | test "is_tag_name_start_char/1 follows identifier start rules" do 295 | assert is_tag_name_start_char(?d) # div 296 | assert is_tag_name_start_char(?s) # span 297 | assert is_tag_name_start_char(0x30A2) # ア (Katakana) 298 | refute is_tag_name_start_char(?1) 299 | end 300 | 301 | test "is_tag_name_char/1 follows identifier rules" do 302 | assert is_tag_name_char(?d) 303 | assert is_tag_name_char(?1) 304 | assert is_tag_name_char(?-) 305 | assert is_tag_name_char(0xAC00) # 가 (Hangul) 306 | end 307 | end 308 | 309 | describe "attribute selector guards" do 310 | test "is_attribute_name_start_char/1 follows identifier start rules" do 311 | assert is_attribute_name_start_char(?c) # class 312 | assert is_attribute_name_start_char(?d) # data-* 313 | assert is_attribute_name_start_char(0x0627) # ا (Arabic) 314 | refute is_attribute_name_start_char(?1) 315 | end 316 | 317 | test "is_attribute_name_char/1 follows identifier rules" do 318 | assert is_attribute_name_char(?c) 319 | assert is_attribute_name_char(?1) 320 | assert is_attribute_name_char(?-) # data-attribute 321 | assert is_attribute_name_char(0x05D0) # א (Hebrew) 322 | end 323 | end 324 | 325 | describe "pseudo selector guards" do 326 | test "is_pseudo_name_start_char/1 follows identifier start rules" do 327 | assert is_pseudo_name_start_char(?h) # hover 328 | assert is_pseudo_name_start_char(?f) # first-child 329 | assert is_pseudo_name_start_char(0x0410) # А (Cyrillic) 330 | refute is_pseudo_name_start_char(?1) 331 | end 332 | 333 | test "is_pseudo_name_char/1 follows identifier rules" do 334 | assert is_pseudo_name_char(?h) 335 | assert is_pseudo_name_char(?1) 336 | assert is_pseudo_name_char(?-) # first-child 337 | assert is_pseudo_name_char(0x4E2D) # 中 (Chinese) 338 | end 339 | end 340 | 341 | describe "function guards" do 342 | test "is_function_name_start_char/1 follows identifier start rules" do 343 | assert is_function_name_start_char(?n) # nth-child 344 | assert is_function_name_start_char(?u) # url 345 | assert is_function_name_start_char(0x00E9) # é 346 | refute is_function_name_start_char(?1) 347 | end 348 | 349 | test "is_function_name_char/1 follows identifier rules" do 350 | assert is_function_name_char(?n) 351 | assert is_function_name_char(?1) 352 | assert is_function_name_char(?-) # nth-child 353 | assert is_function_name_char(0x0391) # Α (Greek) 354 | end 355 | end 356 | 357 | describe "is_number_char/1" do 358 | test "accepts UTF-8 digits" do 359 | assert is_number_char(?0) 360 | assert is_number_char(?5) 361 | assert is_number_char(?9) 362 | assert is_number_char(0x0660) # Arabic-Indic digit 363 | assert is_number_char(0x0966) # Devanagari digit 364 | assert is_number_char(0xFF10) # Fullwidth digit 365 | end 366 | 367 | test "accepts decimal point" do 368 | assert is_number_char(?.) 369 | end 370 | 371 | test "accepts signs" do 372 | assert is_number_char(?+) 373 | assert is_number_char(?-) 374 | end 375 | 376 | test "accepts scientific notation" do 377 | assert is_number_char(?e) 378 | assert is_number_char(?E) 379 | end 380 | 381 | test "rejects other characters" do 382 | refute is_number_char(?a) 383 | refute is_number_char(?!) 384 | end 385 | end 386 | 387 | describe "is_number_start_char/1" do 388 | test "accepts UTF-8 digits" do 389 | assert is_number_start_char(?0) 390 | assert is_number_start_char(?9) 391 | assert is_number_start_char(0x0660) # Arabic-Indic digit 392 | assert is_number_start_char(0xFF10) # Fullwidth digit 393 | end 394 | 395 | test "accepts decimal point" do 396 | assert is_number_start_char(?.) 397 | end 398 | 399 | test "accepts signs" do 400 | assert is_number_start_char(?+) 401 | assert is_number_start_char(?-) 402 | end 403 | 404 | test "rejects scientific notation at start" do 405 | refute is_number_start_char(?e) 406 | refute is_number_start_char(?E) 407 | end 408 | end 409 | 410 | describe "is_comment_char/1" do 411 | test "accepts most characters" do 412 | assert is_comment_char(?a) 413 | assert is_comment_char(?1) 414 | assert is_comment_char(?!) 415 | assert is_comment_char(0x0020) 416 | end 417 | 418 | test "accepts UTF-8 characters" do 419 | assert is_comment_char(0x4E2D) # 中 (Chinese) 420 | assert is_comment_char(0x0391) # Α (Greek) 421 | end 422 | 423 | test "accepts all characters (sequence detection happens at parser level)" do 424 | assert is_comment_char(0x002A) # * (valid individually) 425 | assert is_comment_char(0x002F) # / (valid individually) 426 | # Note: The parser must handle */ sequence detection 427 | end 428 | end 429 | 430 | describe "is_attribute_value_char/1" do 431 | test "accepts regular characters" do 432 | assert is_attribute_value_char(?a) 433 | assert is_attribute_value_char(?z) 434 | assert is_attribute_value_char(?A) 435 | assert is_attribute_value_char(?Z) 436 | assert is_attribute_value_char(?0) 437 | assert is_attribute_value_char(?9) 438 | end 439 | 440 | test "accepts special characters commonly used in attribute values" do 441 | assert is_attribute_value_char(?!) 442 | assert is_attribute_value_char(?@) 443 | assert is_attribute_value_char(?#) 444 | assert is_attribute_value_char(?$) 445 | assert is_attribute_value_char(?%) 446 | assert is_attribute_value_char(?^) 447 | assert is_attribute_value_char(?&) 448 | assert is_attribute_value_char(?*) 449 | assert is_attribute_value_char(?() 450 | assert is_attribute_value_char(?)) 451 | assert is_attribute_value_char(?-) 452 | assert is_attribute_value_char(?_) 453 | assert is_attribute_value_char(?=) 454 | assert is_attribute_value_char(?+) 455 | assert is_attribute_value_char(?{) 456 | assert is_attribute_value_char(?}) 457 | assert is_attribute_value_char(?[) 458 | assert is_attribute_value_char(?|) 459 | assert is_attribute_value_char(?;) 460 | assert is_attribute_value_char(?:) 461 | assert is_attribute_value_char(?<) 462 | assert is_attribute_value_char(?>) 463 | assert is_attribute_value_char(?.) 464 | assert is_attribute_value_char(?,) 465 | assert is_attribute_value_char(?/) 466 | assert is_attribute_value_char(??) 467 | assert is_attribute_value_char(?`) 468 | assert is_attribute_value_char(?~) 469 | end 470 | 471 | test "accepts UTF-8 characters" do 472 | assert is_attribute_value_char(0x00C0) # À 473 | assert is_attribute_value_char(0x4E2D) # 中 (Chinese) 474 | assert is_attribute_value_char(0x0391) # Α (Greek) 475 | assert is_attribute_value_char(0x0410) # А (Cyrillic) 476 | assert is_attribute_value_char(0x05D0) # א (Hebrew) 477 | assert is_attribute_value_char(0x0627) # ا (Arabic) 478 | assert is_attribute_value_char(0x3042) # あ (Hiragana) 479 | assert is_attribute_value_char(0x30A2) # ア (Katakana) 480 | assert is_attribute_value_char(0xAC00) # 가 (Hangul) 481 | end 482 | 483 | test "rejects attribute selector end bracket" do 484 | refute is_attribute_value_char(0x005D) # ] 485 | end 486 | 487 | test "rejects quote characters" do 488 | refute is_attribute_value_char(0x0022) # " (double quote) 489 | refute is_attribute_value_char(0x0027) # ' (single quote) 490 | end 491 | 492 | test "rejects backslash (needs escaping)" do 493 | refute is_attribute_value_char(0x005C) # \ 494 | end 495 | 496 | test "rejects newlines including Unicode newlines" do 497 | refute is_attribute_value_char(0x000A) # Line Feed 498 | refute is_attribute_value_char(0x000C) # Form Feed 499 | refute is_attribute_value_char(0x000D) # Carriage Return 500 | refute is_attribute_value_char(0x0085) # Next Line (NEL) 501 | refute is_attribute_value_char(0x2028) # Line Separator 502 | refute is_attribute_value_char(0x2029) # Paragraph Separator 503 | end 504 | 505 | test "rejects whitespace (for unquoted values)" do 506 | refute is_attribute_value_char(0x0009) # Tab 507 | refute is_attribute_value_char(0x000A) # Line Feed 508 | refute is_attribute_value_char(0x000C) # Form Feed 509 | refute is_attribute_value_char(0x000D) # Carriage Return 510 | refute is_attribute_value_char(0x0020) # Space 511 | end 512 | end 513 | 514 | describe "is_selector_start_char/1" do 515 | test "accepts element name start characters" do 516 | assert is_selector_start_char(?d) # div 517 | assert is_selector_start_char(?s) # span 518 | assert is_selector_start_char(?_) # custom elements 519 | assert is_selector_start_char(?|) # ns 520 | assert is_selector_start_char(0x4E2D) # 中 (Chinese element name) 521 | end 522 | 523 | test "accepts selector prefix characters" do 524 | assert is_selector_start_char(?.) # .class 525 | assert is_selector_start_char(?#) # #id 526 | assert is_selector_start_char(?[) # [attr] 527 | assert is_selector_start_char(?:) # :pseudo (including :is(), :not(), etc.) 528 | assert is_selector_start_char(?*) # * (universal selector) 529 | end 530 | 531 | test "accepts colon for pseudo-class selectors" do 532 | # This is a specific test to ensure : works for selectors like :is(div) 533 | assert is_selector_start_char(?:) 534 | end 535 | 536 | test "accepts whitespace" do 537 | assert is_selector_start_char(0x0020) # Space 538 | assert is_selector_start_char(0x0009) # Tab 539 | assert is_selector_start_char(0x000A) # Line Feed 540 | end 541 | 542 | test "rejects invalid start characters" do 543 | refute is_selector_start_char(?1) # Numbers can't start selectors 544 | refute is_selector_start_char(?-) # Hyphens can't start selectors 545 | refute is_selector_start_char(?!) # Invalid characters 546 | refute is_selector_start_char(?=) # Operators 547 | end 548 | end 549 | 550 | # UTF-8 utility guards are not implemented in the original code 551 | # describe "UTF-8 utility guards" do 552 | # # These tests are commented out as the corresponding guards are not implemented 553 | # # in the original code. If you need this functionality, you'll need to implement 554 | # # the guards in Selector.Parser.Guards. 555 | # end 556 | 557 | describe "integration tests" do 558 | test "can validate common CSS selector patterns" do 559 | # Element selector: "div" 560 | assert is_selector_start_char(?d) 561 | assert is_tag_name_char(?i) 562 | assert is_tag_name_char(?v) 563 | 564 | # Class selector: ".my-class" 565 | assert is_selector_start_char(?.) 566 | assert is_class_start_char(?m) 567 | assert is_class_char(?y) 568 | assert is_class_char(?-) 569 | assert is_class_char(?c) 570 | 571 | # ID selector: "#user_123" 572 | assert is_selector_start_char(?#) 573 | assert is_id_start_char(?u) 574 | assert is_id_char(?s) 575 | assert is_id_char(?e) 576 | assert is_id_char(?r) 577 | assert is_id_char(?_) 578 | assert is_id_char(?1) 579 | 580 | # Pseudo-class: ":nth-child" 581 | assert is_selector_start_char(?:) 582 | assert is_pseudo_name_start_char(?n) 583 | assert is_pseudo_name_char(?t) 584 | assert is_pseudo_name_char(?h) 585 | assert is_pseudo_name_char(?-) 586 | assert is_pseudo_name_char(?c) 587 | 588 | # Attribute selector: "[data-value='test']" 589 | assert is_selector_start_char(?[) 590 | assert is_attribute_name_start_char(?d) 591 | assert is_attribute_name_char(?a) 592 | assert is_attribute_name_char(?t) 593 | assert is_attribute_name_char(?a) 594 | assert is_attribute_name_char(?-) 595 | assert is_attribute_operator_char(?=) 596 | assert is_string_char(?t) 597 | assert is_string_char(?e) 598 | assert is_string_char(?s) 599 | assert is_string_char(?t) 600 | end 601 | 602 | test "can validate international CSS selector patterns" do 603 | # Chinese element selector: "标题" 604 | assert is_selector_start_char(0x6807) # 标 605 | assert is_tag_name_char(0x9898) # 题 606 | 607 | # Greek class selector: ".Αλφα" 608 | assert is_selector_start_char(?.) 609 | assert is_class_start_char(0x0391) # Α 610 | assert is_class_char(0x03BB) # λ 611 | assert is_class_char(0x03C6) # φ 612 | assert is_class_char(0x03B1) # α 613 | 614 | # Arabic ID selector: "#مثال" 615 | assert is_selector_start_char(?#) 616 | assert is_id_start_char(0x0645) # م 617 | assert is_id_char(0x062B) # ث 618 | assert is_id_char(0x0627) # ا 619 | assert is_id_char(0x0644) # ل 620 | 621 | # Japanese attribute with Arabic-Indic numbers: "[データ-値='١٢٣']" 622 | assert is_selector_start_char(?[) 623 | assert is_attribute_name_start_char(0x30C7) # デ 624 | assert is_attribute_name_char(0x30FC) # ー 625 | assert is_attribute_name_char(0x30BF) # タ 626 | assert is_attribute_name_char(?-) 627 | assert is_attribute_name_char(0x5024) # 値 628 | assert is_attribute_operator_char(?=) 629 | assert is_string_char(0x0661) # ١ (Arabic-Indic digit 1) 630 | assert is_string_char(0x0662) # ٢ (Arabic-Indic digit 2) 631 | assert is_string_char(0x0663) # ٣ (Arabic-Indic digit 3) 632 | end 633 | end 634 | 635 | describe "is_pseudo_start_char/1" do 636 | test "accepts ASCII letters as first character" do 637 | assert is_pseudo_start_char(?a) 638 | assert is_pseudo_start_char(?z) 639 | assert is_pseudo_start_char(?A) 640 | assert is_pseudo_start_char(?Z) 641 | end 642 | 643 | test "accepts underscore as first character" do 644 | assert is_pseudo_start_char(?_) 645 | end 646 | 647 | test "accepts hyphen as first character (for vendor prefixes)" do 648 | assert is_pseudo_start_char(?-) # -webkit-scrollbar, -moz-placeholder, etc. 649 | end 650 | 651 | test "accepts non-ASCII characters as first character" do 652 | assert is_pseudo_start_char(0x00C0) # À 653 | assert is_pseudo_start_char(0x4E2D) # 中 (Chinese character) 654 | assert is_pseudo_start_char(0x30D2) # ヒ (Katakana) 655 | end 656 | 657 | test "rejects digits as first character" do 658 | refute is_pseudo_start_char(?0) 659 | refute is_pseudo_start_char(?9) 660 | end 661 | 662 | test "rejects other special characters as first character" do 663 | refute is_pseudo_start_char(?() 664 | refute is_pseudo_start_char(?)) 665 | refute is_pseudo_start_char(?\s) 666 | refute is_pseudo_start_char(?.) 667 | refute is_pseudo_start_char(?@) 668 | end 669 | end 670 | 671 | describe "is_pseudo_char/1" do 672 | test "accepts all valid start characters" do 673 | assert is_pseudo_char(?a) 674 | assert is_pseudo_char(?Z) 675 | assert is_pseudo_char(?-) 676 | assert is_pseudo_char(?_) 677 | assert is_pseudo_char(0x4E2D) # 中 (Chinese character) 678 | end 679 | 680 | test "additionally accepts digits" do 681 | assert is_pseudo_char(?0) 682 | assert is_pseudo_char(?9) 683 | end 684 | 685 | test "rejects parentheses (not part of pseudo-class name)" do 686 | refute is_pseudo_char(?() 687 | refute is_pseudo_char(?)) 688 | end 689 | 690 | test "rejects whitespace (not part of pseudo-class name)" do 691 | refute is_pseudo_char(?\s) 692 | refute is_pseudo_char(0x0009) # Tab 693 | end 694 | 695 | test "rejects invalid characters" do 696 | refute is_pseudo_char(?@) 697 | refute is_pseudo_char(?[) 698 | refute is_pseudo_char(?]) 699 | refute is_pseudo_char(?{) 700 | refute is_pseudo_char(?}) 701 | refute is_pseudo_char(?=) 702 | refute is_pseudo_char(?~) 703 | refute is_pseudo_char(?+) # Plus sign not part of name 704 | end 705 | end 706 | 707 | describe "pseudo-class examples" do 708 | test ":hover example" do 709 | assert is_pseudo_start_char(?h) 710 | assert is_pseudo_char(?o) 711 | assert is_pseudo_char(?v) 712 | assert is_pseudo_char(?e) 713 | assert is_pseudo_char(?r) 714 | end 715 | 716 | test ":nth-child(2n+1) example" do 717 | assert is_pseudo_start_char(?n) 718 | assert is_pseudo_char(?t) 719 | assert is_pseudo_char(?h) 720 | assert is_pseudo_char(?-) 721 | assert is_pseudo_char(?c) 722 | assert is_pseudo_char(?h) 723 | assert is_pseudo_char(?i) 724 | assert is_pseudo_char(?l) 725 | assert is_pseudo_char(?d) 726 | # Note: The parentheses and content are NOT part of the pseudo-class name 727 | # They would be parsed separately as functional notation 728 | refute is_pseudo_char(?() 729 | # The following would be parsed as part of the argument, not the name 730 | refute is_pseudo_char(?+) 731 | refute is_pseudo_char(?)) 732 | end 733 | 734 | test ":lang(fr) example" do 735 | assert is_pseudo_start_char(?l) 736 | assert is_pseudo_char(?a) 737 | assert is_pseudo_char(?n) 738 | assert is_pseudo_char(?g) 739 | # Parentheses are not part of the pseudo-class name 740 | refute is_pseudo_char(?() 741 | refute is_pseudo_char(?)) 742 | end 743 | end 744 | 745 | describe "is_lang_char/1" do 746 | test "accepts ASCII letters" do 747 | assert is_lang_char(?a) 748 | assert is_lang_char(?z) 749 | assert is_lang_char(?A) 750 | assert is_lang_char(?Z) 751 | end 752 | 753 | test "accepts ASCII digits" do 754 | assert is_lang_char(?0) 755 | assert is_lang_char(?9) 756 | end 757 | 758 | test "accepts hyphen as separator" do 759 | assert is_lang_char(?-) 760 | end 761 | 762 | test "rejects non-ASCII letters" do 763 | refute is_lang_char(0x00E9) # é 764 | refute is_lang_char(0x4E2D) # 中 765 | refute is_lang_char(0x0391) # Α (Greek) 766 | end 767 | 768 | test "rejects non-ASCII digits" do 769 | refute is_lang_char(0x0660) # Arabic-Indic digit 770 | refute is_lang_char(0xFF10) # Fullwidth digit 771 | end 772 | 773 | test "rejects other characters" do 774 | refute is_lang_char(?_) 775 | refute is_lang_char(?.) 776 | refute is_lang_char(?@) 777 | refute is_lang_char(?!) 778 | refute is_lang_char(?\s) 779 | end 780 | end 781 | 782 | describe "is_lang_start_char/1" do 783 | test "accepts ASCII letters" do 784 | assert is_lang_start_char(?a) 785 | assert is_lang_start_char(?z) 786 | assert is_lang_start_char(?A) 787 | assert is_lang_start_char(?Z) 788 | end 789 | 790 | test "rejects digits" do 791 | refute is_lang_start_char(?0) 792 | refute is_lang_start_char(?9) 793 | end 794 | 795 | test "rejects hyphen" do 796 | refute is_lang_start_char(?-) 797 | end 798 | 799 | test "rejects non-ASCII characters" do 800 | refute is_lang_start_char(0x00E9) # é 801 | refute is_lang_start_char(0x4E2D) # 中 802 | end 803 | end 804 | 805 | describe "language tag examples" do 806 | test "simple language codes" do 807 | # "en" 808 | assert is_lang_start_char(?e) 809 | assert is_lang_char(?n) 810 | 811 | # "fr" 812 | assert is_lang_start_char(?f) 813 | assert is_lang_char(?r) 814 | end 815 | 816 | test "language with region codes" do 817 | # "en-US" 818 | assert is_lang_start_char(?e) 819 | assert is_lang_char(?n) 820 | assert is_lang_char(?-) 821 | assert is_lang_char(?U) 822 | assert is_lang_char(?S) 823 | 824 | # "pt-BR" 825 | assert is_lang_start_char(?p) 826 | assert is_lang_char(?t) 827 | assert is_lang_char(?-) 828 | assert is_lang_char(?B) 829 | assert is_lang_char(?R) 830 | end 831 | 832 | test "complex language tags" do 833 | # "zh-Hans-CN" (Chinese, Simplified script, China) 834 | for char <- String.to_charlist("zh-Hans-CN") do 835 | assert is_lang_char(char) 836 | end 837 | 838 | # "en-GB-oed" (English, Great Britain, Oxford English Dictionary spelling) 839 | for char <- String.to_charlist("en-GB-oed") do 840 | assert is_lang_char(char) 841 | end 842 | end 843 | end 844 | 845 | describe "is_selector_char/1" do 846 | test "accepts all identifier characters" do 847 | assert is_selector_char(?a) 848 | assert is_selector_char(?Z) 849 | assert is_selector_char(?0) 850 | assert is_selector_char(?9) 851 | assert is_selector_char(?_) 852 | assert is_selector_char(?-) 853 | assert is_selector_char(0x4E2D) # 中 (Chinese) 854 | assert is_selector_char(0x0391) # Α (Greek) 855 | end 856 | 857 | test "accepts all delimiter characters" do 858 | assert is_selector_char(?#) # ID selector 859 | assert is_selector_char(?.) # Class selector 860 | assert is_selector_char(?:) # Pseudo-class/element 861 | assert is_selector_char(?[) # Attribute start 862 | assert is_selector_char(?]) # Attribute end 863 | assert is_selector_char(?() # Function start 864 | assert is_selector_char(?)) # Function end 865 | assert is_selector_char(?,) # Selector separator 866 | assert is_selector_char(?") # String delimiter 867 | assert is_selector_char(?') # String delimiter 868 | assert is_selector_char(?\\) # Escape character 869 | end 870 | 871 | test "accepts all combinator characters" do 872 | assert is_selector_char(?>) # Child combinator 873 | assert is_selector_char(?+) # Adjacent sibling 874 | assert is_selector_char(?~) # General sibling 875 | end 876 | 877 | test "accepts whitespace characters" do 878 | assert is_selector_char(0x0009) # Tab 879 | assert is_selector_char(0x000A) # Line Feed 880 | assert is_selector_char(0x000C) # Form Feed 881 | assert is_selector_char(0x000D) # Carriage Return 882 | assert is_selector_char(0x0020) # Space 883 | end 884 | 885 | test "accepts attribute operator characters" do 886 | assert is_selector_char(?=) # Equal 887 | assert is_selector_char(?~) # Includes (~=) 888 | assert is_selector_char(?|) # Dash match (|=) 889 | assert is_selector_char(?^) # Prefix (^=) 890 | assert is_selector_char(?$) # Suffix ($=) 891 | assert is_selector_char(?*) # Substring (*=) 892 | end 893 | 894 | test "accepts special selector characters" do 895 | assert is_selector_char(?*) # Universal selector 896 | assert is_selector_char(?|) # Namespace separator 897 | assert is_selector_char(?!) # For :not() 898 | end 899 | 900 | test "accepts common punctuation for attribute values and strings" do 901 | assert is_selector_char(?/) 902 | assert is_selector_char(??) 903 | assert is_selector_char(?&) 904 | assert is_selector_char(?%) 905 | assert is_selector_char(?@) 906 | assert is_selector_char(?;) 907 | assert is_selector_char(?{) 908 | assert is_selector_char(?}) 909 | assert is_selector_char(?<) 910 | assert is_selector_char(?>) 911 | assert is_selector_char(?`) 912 | end 913 | 914 | test "accepts UTF-8 characters from various scripts" do 915 | assert is_selector_char(0x00E9) # é (Latin-1 Supplement) 916 | assert is_selector_char(0x0410) # А (Cyrillic) 917 | assert is_selector_char(0x05D0) # א (Hebrew) 918 | assert is_selector_char(0x0627) # ا (Arabic) 919 | assert is_selector_char(0x3042) # あ (Hiragana) 920 | assert is_selector_char(0x30A2) # ア (Katakana) 921 | assert is_selector_char(0xAC00) # 가 (Hangul) 922 | assert is_selector_char(0x0660) # ٠ (Arabic-Indic digit) 923 | assert is_selector_char(0xFF10) # 0 (Fullwidth digit) 924 | end 925 | 926 | test "accepts printable ASCII characters" do 927 | for codepoint <- 0x0021..0x007E do 928 | assert is_selector_char(codepoint), "Failed for codepoint #{codepoint} (#{<>})" 929 | end 930 | end 931 | 932 | test "rejects null character" do 933 | refute is_selector_char(0x0000) 934 | end 935 | 936 | test "rejects control characters below space (except whitespace)" do 937 | refute is_selector_char(0x0001) 938 | refute is_selector_char(0x0002) 939 | refute is_selector_char(0x0007) # Bell 940 | refute is_selector_char(0x0008) # Backspace 941 | refute is_selector_char(0x000B) # Vertical Tab (not CSS whitespace) 942 | refute is_selector_char(0x000E) 943 | refute is_selector_char(0x000F) 944 | refute is_selector_char(0x001F) 945 | end 946 | 947 | test "rejects DEL character" do 948 | refute is_selector_char(0x007F) 949 | end 950 | 951 | test "rejects surrogate codepoints" do 952 | refute is_selector_char(0xD800) 953 | refute is_selector_char(0xDBFF) 954 | refute is_selector_char(0xDC00) 955 | refute is_selector_char(0xDFFF) 956 | end 957 | 958 | test "accepts non-breaking space and other Unicode spaces" do 959 | assert is_selector_char(0x00A0) # Non-breaking space 960 | assert is_selector_char(0x2000) # En quad 961 | assert is_selector_char(0x3000) # Ideographic space 962 | end 963 | 964 | test "is_nth_formula_char/1 accepts ASCII digits" do 965 | assert is_nth_formula_char(?0) 966 | assert is_nth_formula_char(?5) 967 | assert is_nth_formula_char(?9) 968 | end 969 | 970 | test "is_nth_formula_char/1 accepts variable n (case-insensitive)" do 971 | assert is_nth_formula_char(?n) 972 | assert is_nth_formula_char(?N) 973 | end 974 | 975 | test "is_nth_formula_char/1 accepts letters for odd/even keywords (case-insensitive)" do 976 | assert is_nth_formula_char(?o) # odd 977 | assert is_nth_formula_char(?O) 978 | assert is_nth_formula_char(?d) # odd 979 | assert is_nth_formula_char(?D) 980 | assert is_nth_formula_char(?e) # even 981 | assert is_nth_formula_char(?E) 982 | assert is_nth_formula_char(?v) # even 983 | assert is_nth_formula_char(?V) 984 | end 985 | 986 | test "is_nth_formula_char/1 accepts operators and signs" do 987 | assert is_nth_formula_char(?+) 988 | assert is_nth_formula_char(?-) 989 | end 990 | 991 | test "is_nth_formula_char/1 accepts CSS whitespace" do 992 | assert is_nth_formula_char(0x0020) # Space 993 | assert is_nth_formula_char(0x0009) # Tab 994 | assert is_nth_formula_char(0x000A) # Line Feed 995 | assert is_nth_formula_char(0x000C) # Form Feed 996 | assert is_nth_formula_char(0x000D) # Carriage Return 997 | end 998 | 999 | test "is_nth_formula_char/1 rejects other letters" do 1000 | refute is_nth_formula_char(?a) 1001 | refute is_nth_formula_char(?z) 1002 | refute is_nth_formula_char(?A) 1003 | refute is_nth_formula_char(?Z) 1004 | refute is_nth_formula_char(?m) 1005 | refute is_nth_formula_char(?x) 1006 | end 1007 | 1008 | test "is_nth_formula_char/1 rejects special characters not in nth-formulas" do 1009 | refute is_nth_formula_char(?.) 1010 | refute is_nth_formula_char(?#) 1011 | refute is_nth_formula_char(?*) 1012 | refute is_nth_formula_char(?/) 1013 | refute is_nth_formula_char(?=) 1014 | refute is_nth_formula_char(?!) 1015 | refute is_nth_formula_char(?() 1016 | refute is_nth_formula_char(?)) 1017 | end 1018 | 1019 | test "is_nth_formula_char/1 rejects non-ASCII digits" do 1020 | refute is_nth_formula_char(0x0660) # Arabic-Indic digit 1021 | refute is_nth_formula_char(0xFF10) # Fullwidth digit 1022 | end 1023 | 1024 | test "is_nth_formula_char/1 rejects Unicode letters" do 1025 | refute is_nth_formula_char(0x00E9) # é 1026 | refute is_nth_formula_char(0x4E2D) # 中 1027 | end 1028 | 1029 | test "is_nth_formula_starting_char/1 accepts ASCII digits as starting characters" do 1030 | assert is_nth_formula_starting_char(?0) 1031 | assert is_nth_formula_starting_char(?1) 1032 | assert is_nth_formula_starting_char(?9) 1033 | end 1034 | 1035 | test "is_nth_formula_starting_char/1 accepts signs as starting characters" do 1036 | assert is_nth_formula_starting_char(?+) # +2n+1, +n 1037 | assert is_nth_formula_starting_char(?-) # -n+3, -2n 1038 | end 1039 | 1040 | test "is_nth_formula_starting_char/1 accepts variable n as starting character (case-insensitive)" do 1041 | assert is_nth_formula_starting_char(?n) # n+1, n 1042 | assert is_nth_formula_starting_char(?N) 1043 | end 1044 | 1045 | test "is_nth_formula_starting_char/1 accepts keyword starting letters (case-insensitive)" do 1046 | assert is_nth_formula_starting_char(?o) # odd 1047 | assert is_nth_formula_starting_char(?O) 1048 | assert is_nth_formula_starting_char(?e) # even 1049 | assert is_nth_formula_starting_char(?E) 1050 | end 1051 | 1052 | test "is_nth_formula_starting_char/1 accepts leading CSS whitespace" do 1053 | assert is_nth_formula_starting_char(0x0020) # Space 1054 | assert is_nth_formula_starting_char(0x0009) # Tab 1055 | assert is_nth_formula_starting_char(0x000A) # Line Feed 1056 | assert is_nth_formula_starting_char(0x000C) # Form Feed 1057 | assert is_nth_formula_starting_char(0x000D) # Carriage Return 1058 | end 1059 | 1060 | test "is_nth_formula_starting_char/1 rejects letters that cannot start nth-formulas" do 1061 | refute is_nth_formula_starting_char(?d) # 'd' can appear in "odd" but not start 1062 | refute is_nth_formula_starting_char(?v) # 'v' can appear in "even" but not start 1063 | refute is_nth_formula_starting_char(?a) 1064 | refute is_nth_formula_starting_char(?z) 1065 | refute is_nth_formula_starting_char(?m) 1066 | end 1067 | 1068 | test "is_nth_formula_starting_char/1 rejects special characters" do 1069 | refute is_nth_formula_starting_char(?.) 1070 | refute is_nth_formula_starting_char(?#) 1071 | refute is_nth_formula_starting_char(?*) 1072 | refute is_nth_formula_starting_char(?() 1073 | refute is_nth_formula_starting_char(?)) 1074 | refute is_nth_formula_starting_char(?=) 1075 | end 1076 | 1077 | test "is_nth_formula_starting_char/1 rejects non-ASCII digits" do 1078 | refute is_nth_formula_starting_char(0x0660) # Arabic-Indic digit 1079 | refute is_nth_formula_starting_char(0xFF10) # Fullwidth digit 1080 | end 1081 | 1082 | test "nth-formula validates simple integer formulas" do 1083 | # "5" 1084 | assert is_nth_formula_starting_char(?5) 1085 | 1086 | # "0" 1087 | assert is_nth_formula_starting_char(?0) 1088 | end 1089 | 1090 | test "nth-formula validates keyword formulas" do 1091 | # "odd" 1092 | assert is_nth_formula_starting_char(?o) 1093 | assert is_nth_formula_char(?d) 1094 | assert is_nth_formula_char(?d) 1095 | 1096 | # "even" 1097 | assert is_nth_formula_starting_char(?e) 1098 | assert is_nth_formula_char(?v) 1099 | assert is_nth_formula_char(?e) 1100 | assert is_nth_formula_char(?n) 1101 | end 1102 | 1103 | test "nth-formula validates An+B formulas" do 1104 | # "2n+1" 1105 | assert is_nth_formula_starting_char(?2) 1106 | assert is_nth_formula_char(?n) 1107 | assert is_nth_formula_char(?+) 1108 | assert is_nth_formula_char(?1) 1109 | 1110 | # "-n+3" 1111 | assert is_nth_formula_starting_char(?-) 1112 | assert is_nth_formula_char(?n) 1113 | assert is_nth_formula_char(?+) 1114 | assert is_nth_formula_char(?3) 1115 | 1116 | # "3n-2" 1117 | assert is_nth_formula_starting_char(?3) 1118 | assert is_nth_formula_char(?n) 1119 | assert is_nth_formula_char(?-) 1120 | assert is_nth_formula_char(?2) 1121 | end 1122 | 1123 | test "nth-formula validates formulas with whitespace" do 1124 | # " 2n + 1 " (with spaces) 1125 | assert is_nth_formula_starting_char(0x0020) # Leading space 1126 | assert is_nth_formula_char(?2) 1127 | assert is_nth_formula_char(?n) 1128 | assert is_nth_formula_char(0x0020) # Space before + 1129 | assert is_nth_formula_char(?+) 1130 | assert is_nth_formula_char(0x0020) # Space after + 1131 | assert is_nth_formula_char(?1) 1132 | assert is_nth_formula_char(0x0020) # Trailing space 1133 | end 1134 | 1135 | test "nth-formula validates n-only formulas" do 1136 | # "n" 1137 | assert is_nth_formula_starting_char(?n) 1138 | 1139 | # "+n" 1140 | assert is_nth_formula_starting_char(?+) 1141 | assert is_nth_formula_char(?n) 1142 | 1143 | # "-n" 1144 | assert is_nth_formula_starting_char(?-) 1145 | assert is_nth_formula_char(?n) 1146 | end 1147 | 1148 | test "nth-formula validates coefficient-only formulas" do 1149 | # "2n" 1150 | assert is_nth_formula_starting_char(?2) 1151 | assert is_nth_formula_char(?n) 1152 | 1153 | # "-3n" 1154 | assert is_nth_formula_starting_char(?-) 1155 | assert is_nth_formula_char(?3) 1156 | assert is_nth_formula_char(?n) 1157 | end 1158 | 1159 | test "comprehensive selector examples" do 1160 | # Simple selector: div.class#id 1161 | for char <- String.to_charlist("div.class#id") do 1162 | assert is_selector_char(char) 1163 | end 1164 | 1165 | # Complex selector: [data-value~="test"]:nth-child(2n+1) 1166 | for char <- String.to_charlist("[data-value~=\"test\"]:nth-child(2n+1)") do 1167 | assert is_selector_char(char) 1168 | end 1169 | 1170 | # International selector: .クラス#标识符[атрибут="القيمة"] 1171 | for char <- String.to_charlist(".クラス#标识符[атрибут=\"القيمة\"]") do 1172 | assert is_selector_char(char) 1173 | end 1174 | 1175 | # Namespace and combinators: ns|element > .class + #id ~ [attr] 1176 | for char <- String.to_charlist("ns|element > .class + #id ~ [attr]") do 1177 | assert is_selector_char(char) 1178 | end 1179 | end 1180 | end 1181 | end 1182 | --------------------------------------------------------------------------------